diff --git a/etl/customers/aiha/xml_extraction.py b/etl/customers/aiha/xml_extraction.py index 8c5c9008..7dc516a6 100644 --- a/etl/customers/aiha/xml_extraction.py +++ b/etl/customers/aiha/xml_extraction.py @@ -113,6 +113,7 @@ def main(): # TODO: Need AIH001-02 9C Clapton Common # TODO: Check which properties are in a conservation area + # TODO: AIH001-16 - Is the loft insulation suitable (already has 100mm in the RIR) recommended_measures = [ { @@ -560,6 +561,7 @@ def main(): { "measure": "Loft Insulation", "description": "300mm loft insulation", + "floor_area": 20.92, # Based on floor area of RIR "sap_points": 1, "ending_sap": 65, }, @@ -616,6 +618,27 @@ def main(): "ending_sap": 78, } ] + }, + { + "survey_key": "AIH001-18", + "starting_sap": 58, + "recommended_measures": [], + + }, + { + "survey_key": "AIH001-19", + "starting_sap": 76, + "recommended_measures": [] + }, + { + "survey_key": "AIH001-20", + "starting_sap": 82, + "recommended_measures": [] + }, + { + "survey_key": "AIH001-21", + "starting_sap": 53, + "recommended_measures": [] } ] @@ -648,6 +671,7 @@ def main(): {'item': '2x DMEV fans', 'unit_price': 1070, 'unit': 'unit'}, {'item': '1.6kWp Solar PV system', 'unit_price': 3040, 'unit': 'unit_needs_scaffolding'}, {'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'}, + {'item': '2.4kWp Solar PV system', 'unit_price': 3363, 'unit': 'unit_needs_scaffolding'}, {'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'}, {'item': '4kWp Solar PV system', 'unit_price': 4009, 'unit': 'unit_needs_scaffolding'}, {'item': '5.6kWp Solar PV system', 'unit_price': 5015, 'unit': 'unit_needs_scaffolding'}, @@ -690,8 +714,14 @@ def main(): total_cost = survey.get("total_cost", 0) for measure in survey.get("recommended_measures", []): + # Include hlp and floor_area for each measure if available + hlp = measure.get("hlp", None) + floor_area = measure.get("floor_area", None) + normalized_measures.append({ "survey_key": survey_key, + "hlp": hlp, + "floor_area": floor_area, "starting_sap": starting_sap, "measure": measure["measure"], "description": measure.get("description", ""), @@ -712,16 +742,38 @@ def main(): fill_value=None ).reset_index() + measures_columns = [x for x in pivoted_measures.columns if x not in ["survey_key"]] + # We add a "Cost of" column for each measure + for measure in measures_columns: + pivoted_measures[f"Cost of {measure}"] = None + + pivoted_floor_area = measures_df.pivot_table( + index="survey_key", + columns="measure", + values="floor_area", + aggfunc="first" # Use 'first' since each measure should only appear once per survey_key + ).add_prefix("floor_area - ").reset_index() + + pivoted_hlp = measures_df.pivot_table( + index="survey_key", + columns="measure", + values="hlp", + aggfunc="first" + ).add_prefix("hlp - ").reset_index() + + # Merge hlp and floor_area data + pivoted_measures = pivoted_measures.merge(pivoted_hlp, on="survey_key", how="left") + pivoted_measures = pivoted_measures.merge(pivoted_floor_area, on="survey_key", how="left") + # Step 3: Calculate the total sap points and total cost for each survey. - sap_cost_totals = measures_df.groupby("survey_key").agg( + totals = measures_df.groupby("survey_key").agg( total_sap_points=("sap_points", "sum"), - total_cost_of_measures=("measure_cost", "sum") ).reset_index() # Merge total sap points into the pivoted measures. - pivoted_measures = pd.merge(pivoted_measures, sap_cost_totals, on="survey_key", how="left") - pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE - pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"] + pivoted_measures = pd.merge(pivoted_measures, totals, on="survey_key", how="left") + # pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE + # pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"] # Step 4: Extract starting SAP for each survey key. starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]] @@ -738,5 +790,10 @@ def main(): result_df, how="left", on="survey_key" ) + final_measures.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Measures packages.csv") + + # Store costs + pricing_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Pricing data.csv") + # if __name__ == "__main__": # main() diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index fa70b6b7..ef8daf51 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -784,6 +784,7 @@ class XmlParser: glazing_type_lookup = { "ND": "Single glazing", + "1": "double glazing installed before 2002", "2": "double glazing installed during or after 2002", "3": "double glazing, unknown install date", "5": "Single glazing",