From ce9b3e5e2014fdeaba52ecf977618a5b16898a29 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 25 Oct 2024 18:13:28 +0100 Subject: [PATCH] creating aiha output --- etl/customers/aiha/xml_extraction.py | 452 ++++++++++++++++++++++++++- 1 file changed, 448 insertions(+), 4 deletions(-) diff --git a/etl/customers/aiha/xml_extraction.py b/etl/customers/aiha/xml_extraction.py index d235be78..416065e7 100644 --- a/etl/customers/aiha/xml_extraction.py +++ b/etl/customers/aiha/xml_extraction.py @@ -1,5 +1,8 @@ import os from io import BytesIO + +import pandas as pd + from etl.xml_survey_extraction.XmlParser import XmlParser SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS" @@ -22,7 +25,8 @@ def main(): xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')] if not xml_files: - raise FileNotFoundError(f"No XML files found in subfolder: {subfolder}") + print(f"No XML files found in subfolder: {subfolder}") + continue # If any XML files are found, perform the data extraction. We use the subfolder name as the survey key. for xml_file in xml_files: @@ -44,16 +48,456 @@ def main(): # Run the parser to extract the data xml_parser.run() + if not xml_parser.epc: + # If we don't have a lig xml + continue # Store the extracted data for further processing extracted_surveys.append({ - "epc": xml_parser.epc, - "additional_data": xml_parser.additional_data, - "subfolder": subfolder + "survey_key": subfolder.split("/")[-1], + **xml_parser.epc, + **xml_parser.additional_data }) print(f"Extracted {len(extracted_surveys)} surveys.") # Process the extracted_surveys as needed, for example, save to a database or write to a file. + extracted_surveys = pd.DataFrame(extracted_surveys) + + # THis is the data we need for the AIHA project + measures_data = extracted_surveys[ + ["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating"] + ] + measures_data = measures_data.sort_values("survey_key", ascending=True) + + # Note: + # The properties will still have "Very poor" ratings for their hot water + + # TODO + # - AIH001-03 has a basement and so we should discount this area from the ground floor + # - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft + # - AIH001-03 instead of cylinder insulation, we could install an air source heat pump but it might not be the + # best option for this property due to it being extrememly large and the walls being uninsulated. It might not + # be performant enough in the winter, when COP will be more like 1.5. + # - AIH001-03 - can add additional 1.6kWp solar PV to flat roof to get close to EPC C. How many occupants are + # in the property? Does it make sense to have such a large solar PV system (5.6kWp)? + # - AIH001-04 why couldn't the cylinder be accessed? - treating this could get to the EPC C + # - Generally, should we consider insulated doors? + # - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same + # buulding + # - AIH001-09 - The extension is 1900-1929 but has a cavity wall + # - AIH001-09 - Is it not possible to install a loft hatch? + # - AIH001-09 - Why is there assumed secondary heating? + # - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units? + # - AIH001-11 - The layout of this unit is confusing, is there roof access? + # - AIH001-12 - Why was there not access to the cylinder? + # + + recommended_measures = [ + { + "survey_key": "AIH001-01", + "starting_sap": 69, + "recommended_measures": [], + "notes": "Is EPC C" + }, + { + "survey_key": "AIH001-03", + "starting_sap": 43, + "recommended_measures": [ + { + "measure": "Cylinder Insulation", + "description": "80mm cylinder insulation", + "sap_points": 1, + "ending_sap": 44, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with various configurations", + "config": [ + { + "size": "4kWp", + "orientation": "East", + "elavation": 30, + "overshading": "Modest", + }, + { + "size": "1.6kWp", + "orientation": "Horizontal", + "elavation": "Horizontal", + "overshading": "Modest", + } + ], + "sap_points": 7, + "ending_sap": 53 + }, + { + "measure": "Loft Insulation", + "description": "300mm of loft insulation", + "sap_points": 8, + "ending_sap": 61 + }, + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 3, + "ending_sap": 64 + } + ], + "notes": "There was no access to the loft for this property and so a loft hatch would need to be " + "installed..." + }, + { + "survey_key": "AIH001-04", + "starting_sap": 48, + "recommended_measures": [ + { + "measure": "Flat Roof Insulation", + "description": "100mm flat roof insulation", + "sap_points": 4, + "ending_sap": 52 + }, + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 3, + "ending_sap": 55 + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 4kW capacity, south-facing", + "config": [ + { + "size": "4kW", + "orientation": "South", + "elavation": 30, + "overshading": "Modest", + } + ], + "sap_points": 12, + "ending_sap": 67 + } + ], + "notes": "" + }, + { + "survey_key": "AIH001-05", + "starting_sap": 54, + "recommended_measures": [ + { + "measure": "Flat Roof Insulation", + "description": "100mm flat roof insulation", + "sap_points": 5, + "ending_sap": 59, + }, + { + "measure": "Cylinder Insulation", + "description": "80mm cylinder insulation", + "sap_points": 2, + "ending_sap": 61, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 4kW capacity, horizontal orientation", + "config": [ + { + "size": "4kW", + "orientation": "Horizontal", + "elavation": 30, + "overshading": "Modest", + } + ], + "sap_points": 9, + "ending_sap": 70 + }, + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 3, + "ending_sap": 73 + } + ], + "notes": "" + }, + { + "survey_key": "AIH001-06", + "starting_sap": 62, + "recommended_measures": [ + { + "measure": "Cylinder Insulation", + "description": "80mm cylinder insulation", + "sap_points": 2, + "ending_sap": 64, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 2kW capacity, south-facing", + "config": [ + { + "size": "2kW", + "orientation": "South", + "elavation": 30, + "overshading": "Modest", + } + ], + "sap_points": 6, + "ending_sap": 70 + } + ] + }, + { + "survey_key": "AIH001-07", + "starting_sap": 74, + "recommended_measures": [], + "notes": "Is EPC C" + }, + { + "survey_key": "AIH001-08", + "starting_sap": 56, + "recommended_measures": [ + { + "measure": "Loft Insulation", + "description": "300mm of loft insulation", + "sap_points": 2, + "ending_sap": 58, + }, + { + "measure": "Cylinder Insulation", + "description": "80mm cylinder insulation", + "sap_points": 4, + "ending_sap": 62, + }, + { + "measure": "Internal Wall Insulation", + "description": "100mm internal wall insulation", + "sap_points": 5, + "ending_sap": 69, + }, + { + "measure": "Ventilation", + "description": "Ventilation improvement", + "sap_points": 0, + "ending_sap": 69, + } + ] + }, + { + "survey_key": "AIH001-09", + "starting_sap": 44, + "recommended_measures": [ + { + "measure": "Internal Wall Insulation", + "description": "100mm internal wall insulation", + "sap_points": 8, + "ending_sap": 52, + }, + { + "measure": "Cavity Wall Insulation", + "description": "Cavity wall insulation for extensions", + "sap_points": 1, + "ending_sap": 53, + }, + { + "measure": "Ventilation", + "description": "Ventilation improvement", + "sap_points": 0, + "ending_sap": 53, + }, + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 3, + "ending_sap": 56, + } + ] + }, + { + "survey_key": "AIH001-11", + "starting_sap": 59, + "recommended_measures": [ + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 4, + "ending_sap": 63, + }, + { + "measure": "Internal Wall Insulation", + "description": "100mm internal wall insulation", + "sap_points": 5, + "ending_sap": 68, + }, + { + "measure": "Cylinder Insulation", + "description": "80mm cylinder insulation", + "sap_points": 1, + "ending_sap": 69, + } + ] + }, + { + "survey_key": "AIH001-12", + "starting_sap": 46, + "recommended_measures": [ + { + "measure": "Double Glazing", + "description": "Installation of double glazing", + "sap_points": 2, + "ending_sap": 48, + }, + { + "measure": "Draught Proofing", + "description": "Draught proofing improvements", + "sap_points": 1, + "ending_sap": 49, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 3.2kW capacity, east-facing", + "config": [ + { + "size": "3.2W", + "orientation": "East", + "elavation": 30, + "overshading": "Little or none", + } + ], + "sap_points": 9, + "ending_sap": 58 + }, + { + "measure": "Air Source Heat Pump", + "description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump", + "sap_points": 15, + "ending_sap": 73 + }, + { + "measure": "Tariff Review", + "description": "Switch to 24-hour tariff", + "sap_points": 15, + "ending_sap": 88 + } + ] + }, + { + "survey_key": "AIH001-13", + "starting_sap": 53, + "recommended_measures": [ + { + "measure": "Roof Insulation", + "description": "100mm+ insulation on all surfaces (ceiling u=0.16, walls u=0.3)", + "sap_points": 6, + "ending_sap": 59, + }, + { + "measure": "Flat Roof Insulation", + "description": "Flat roof insulation", + "sap_points": 2, + "ending_sap": 61, + }, + { + "measure": "Cavity Wall Insulation", + "description": "Cavity wall insulation", + "sap_points": 6, + "ending_sap": 67, + }, + { + "measure": "Ventilation", + "description": "Ventilation improvement", + "sap_points": 0, + "ending_sap": 67, + }, + { + "measure": "TTZC", + "description": "Thermostatic Time Zone Control", + "sap_points": 2, + "ending_sap": 69, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 4kW capacity, flat roof installation", + "config": [ + { + "size": "4kW", + "orientation": "Horizontal", + "elavation": 30, + "overshading": "None or little", + } + ], + "sap_points": 9, + "ending_sap": 78 + } + ] + }, + { + "survey_key": "AIH001-14", + "starting_sap": 63, + "recommended_measures": [ + { + "measure": "Cavity Wall Insulation", + "description": "Insulation for cavity walls", + "sap_points": 5, + "ending_sap": 68, + }, + { + "measure": "Ventilation", + "description": "Ventilation improvement", + "sap_points": 0, + "ending_sap": 68, + }, + { + "measure": "Loft Insulation", + "description": "Installation of loft insulation", + "sap_points": 1, + "ending_sap": 69, + }, + { + "measure": "Solar PV", + "description": "Solar PV system with 10kW capacity", + "sap_points": 10, + "ending_sap": 79, + } + ] + }, + ] + + # Step 1: Normalize the recommended_measures data into a DataFrame. + normalized_measures = [] + + for survey in recommended_measures: + survey_key = survey["survey_key"] + starting_sap = survey["starting_sap"] + for measure in survey.get("recommended_measures", []): + normalized_measures.append({ + "survey_key": survey_key, + "starting_sap": starting_sap, + "measure": measure["measure"], + "description": measure.get("description", "") + }) + + # Convert the normalized list into a DataFrame. + measures_df = pd.DataFrame(normalized_measures) + + # Step 2: Pivot the measures_df to have a column for each measure type, using the description as values. + pivoted_measures = measures_df.pivot_table( + index="survey_key", + columns="measure", + values="description", + aggfunc=lambda x: ' '.join(x), # Concatenate descriptions if there are multiple entries. + fill_value=None + ).reset_index() + + # Step 3: Extract starting SAP for each survey key. + starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]] + + # Merge starting SAP back onto pivoted measures. + result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left") + + # Step 4: Calculate the ending SAP using the total sap points. + # Note: If you want to use total sap points, you'll need to update the total calculation accordingly. + + # Step 5: Merge the result with the measures_data to get the final DataFrame. + final_measures = measures_data.merge( + result_df, how="left", on="survey_key" + ) if __name__ == "__main__":