diff --git a/etl/customers/aiha/xml_extraction.py b/etl/customers/aiha/xml_extraction.py index 4d4705c9..c246105a 100644 --- a/etl/customers/aiha/xml_extraction.py +++ b/etl/customers/aiha/xml_extraction.py @@ -3,10 +3,10 @@ from io import BytesIO import pandas as pd -from etl.ownership.config import EXCLUDED_UPRNS from etl.xml_survey_extraction.XmlParser import XmlParser SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS" +CONTINGENCY_RATE = 0.26 def main(): @@ -274,6 +274,7 @@ def main(): { "measure": "Internal Wall Insulation", "description": "100mm internal wall insulation", + "hlp": 24.13 * 2.63, "sap_points": 5, "ending_sap": 69, }, @@ -292,12 +293,14 @@ def main(): { "measure": "Internal Wall Insulation", "description": "100mm internal wall insulation", + "hlp": (22.35 * 3.24) + (22.13 * 2.53), "sap_points": 8, "ending_sap": 52, }, { "measure": "Cavity Wall Insulation", "description": "CWI to rdSAP default standard", + "hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension "sap_points": 1, "ending_sap": 53, }, @@ -328,6 +331,7 @@ def main(): { "measure": "Internal Wall Insulation", "description": "100mm internal wall insulation", + "hlp": (18.50 * 3.12) + (19.00 * 2.75), "sap_points": 5, "ending_sap": 68, }, @@ -346,12 +350,15 @@ def main(): { "measure": "Double Glazing", "description": "Installation of double glazing", + "n_windows": 20, # Counted the bay windows each as 3 + "windows_area": 10.66, "sap_points": 2, "ending_sap": 48, }, { "measure": "Draught Proofing", "description": "Window draught proofing improvements", + "n_windows": 20, # Counted the bay windows each as 3 "sap_points": 1, "ending_sap": 49, }, @@ -390,6 +397,7 @@ def main(): { "measure": "Roof Insulation", "description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)", + "floor_area": 39.75, # based on the floor area of the RIR "sap_points": 6, "ending_sap": 59, }, @@ -403,6 +411,7 @@ def main(): { "measure": "Cavity Wall Insulation", "description": "CWI to rdSAP default standard", + "hlp": (35.40 * 2.65) + (26.70 * 2.73) + (16.30 * 2.71), # 1st & 2nd extension "sap_points": 6, "ending_sap": 67, }, @@ -441,6 +450,7 @@ def main(): { "measure": "Cavity Wall Insulation", "description": "CWI to rdSAP default standard", + "hlp": (11.00 * 2.6) + (11.00 * 2.65) + (4.60 * 2.7), "sap_points": 5, "ending_sap": 68, }, @@ -483,11 +493,11 @@ def main(): {'item': '80mm cylinder insulation', 'unit_price': 50, 'unit': 'unit'}, {'item': '100mm internal wall insulation', 'unit_price': 244.8, 'unit': 'hlp_m2'}, {'item': 'CWI to rdSAP default standard', 'unit_price': 14.21, 'unit': 'hlp_m2'}, - {'item': 'Window draught proofing improvements', 'unit_price': None, 'unit': 'unit'}, - {'item': '100mm flat roof insulation', 'unit_price': None, 'unit': 'floor_m2'}, + {'item': 'Window draught proofing improvements', 'unit_price': 63, 'unit': 'window'}, + {'item': '100mm flat roof insulation', 'unit_price': 195, 'unit': 'floor_m2'}, {'item': 'Switch to 24-hour tariff', 'unit_price': 0, 'unit': None}, {'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'}, - {'item': 'Installation of double glazing', 'unit_price': None, 'unit': 'window'}, + {'item': 'Installation of double glazing', 'unit_price': 1074, 'unit': 'window'}, {'item': 'Ecoforest ecoAIR EVI 4-20 20kW air source heat pump', 'unit_price': 21189, 'unit': 'unit'}, {'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'}, {'item': '100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)', 'unit_price': 244.80, @@ -500,51 +510,49 @@ def main(): pricing_data = pd.DataFrame(pricing_data) for recommendation in recommended_measures: - property_data = measures_data[measures_data["survey_key"] == recommendation["survey_key"]].squeeze() + total_cost = 0 for measure in recommendation["recommended_measures"]: measure_pricing = pricing_data[pricing_data["item"] == measure["description"]] measure_unit = measure_pricing["unit"].values[0] - if measure_unit is None: - blah - continue - if measure_unit == "unit": - measure["Total Cost"] = float(measure_pricing["unit_price"].values[0]) - continue - - if measure_unit == "unit_needs_scaffolding": - # We need the number of floors + if measure_unit in ["unit", None]: + measure_cost = float(measure_pricing["unit_price"].values[0]) + elif measure_unit == "unit_needs_scaffolding": n_floors = property_data["number_of_floors"] - cost_of_scalfolding = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"] - measure["Total Cost"] = float(measure_pricing["unit_price"].values[0]) + cost_of_scalfolding - continue + scaffolding_cost = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"] + measure_cost = float(measure_pricing["unit_price"].values[0]) + scaffolding_cost + elif measure_unit == "floor_m2": + measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["floor_area"] + elif measure_unit == "hlp_m2": + measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["hlp"] + elif measure_unit == "window": + measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["n_windows"] + else: + raise Exception("Unknown unit type") - if measure_unit == "floor_m2": - floor_area = measure["floor_area"] - measure["Total Cost"] = float(measure_pricing["unit_price"].values[0]) * floor_area - continue + measure["Total Cost"] = measure_cost + total_cost += measure_cost - if measure_unit == "hlp_m2": - hlp = measure["hlp"] - measure["Total Cost"] = float(measure_pricing["unit_price"].values[0]) * hlp - - raise Exception("Unknown unit type") + recommendation["total_cost"] = total_cost # Step 1: Normalize the recommended_measures data into a DataFrame. normalized_measures = [] - for survey in recommended_measures: survey_key = survey["survey_key"] starting_sap = survey["starting_sap"] + total_cost = survey.get("total_cost", 0) + for measure in survey.get("recommended_measures", []): normalized_measures.append({ "survey_key": survey_key, "starting_sap": starting_sap, "measure": measure["measure"], "description": measure.get("description", ""), - "sap_points": measure.get("sap_points", 0) + "sap_points": measure.get("sap_points", 0), + "measure_cost": measure.get("Total Cost", 0), + "total_cost": total_cost }) # Convert the normalized list into a DataFrame. @@ -559,12 +567,16 @@ def main(): fill_value=None ).reset_index() - # Step 3: Calculate the total sap points for each survey. - total_sap_points = measures_df.groupby("survey_key")["sap_points"].sum().reset_index() - total_sap_points.columns = ["survey_key", "total_sap_points"] + # Step 3: Calculate the total sap points and total cost for each survey. + sap_cost_totals = measures_df.groupby("survey_key").agg( + total_sap_points=("sap_points", "sum"), + total_cost_of_measures=("measure_cost", "sum") + ).reset_index() # Merge total sap points into the pivoted measures. - pivoted_measures = pd.merge(pivoted_measures, total_sap_points, on="survey_key", how="left") + pivoted_measures = pd.merge(pivoted_measures, sap_cost_totals, on="survey_key", how="left") + pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE + pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"] # Step 4: Extract starting SAP for each survey key. starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]