creating aiha output

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-25 18:13:28 +01:00
parent bfded2aaf9
commit ce9b3e5e20

View file

@ -1,5 +1,8 @@
import os
from io import BytesIO
import pandas as pd
from etl.xml_survey_extraction.XmlParser import XmlParser
SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
@ -22,7 +25,8 @@ def main():
xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
if not xml_files:
raise FileNotFoundError(f"No XML files found in subfolder: {subfolder}")
print(f"No XML files found in subfolder: {subfolder}")
continue
# If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
for xml_file in xml_files:
@ -44,16 +48,456 @@ def main():
# Run the parser to extract the data
xml_parser.run()
if not xml_parser.epc:
# If we don't have a lig xml
continue
# Store the extracted data for further processing
extracted_surveys.append({
"epc": xml_parser.epc,
"additional_data": xml_parser.additional_data,
"subfolder": subfolder
"survey_key": subfolder.split("/")[-1],
**xml_parser.epc,
**xml_parser.additional_data
})
print(f"Extracted {len(extracted_surveys)} surveys.")
# Process the extracted_surveys as needed, for example, save to a database or write to a file.
extracted_surveys = pd.DataFrame(extracted_surveys)
# THis is the data we need for the AIHA project
measures_data = extracted_surveys[
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating"]
]
measures_data = measures_data.sort_values("survey_key", ascending=True)
# Note:
# The properties will still have "Very poor" ratings for their hot water
# TODO
# - AIH001-03 has a basement and so we should discount this area from the ground floor
# - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft
# - AIH001-03 instead of cylinder insulation, we could install an air source heat pump but it might not be the
# best option for this property due to it being extrememly large and the walls being uninsulated. It might not
# be performant enough in the winter, when COP will be more like 1.5.
# - AIH001-03 - can add additional 1.6kWp solar PV to flat roof to get close to EPC C. How many occupants are
# in the property? Does it make sense to have such a large solar PV system (5.6kWp)?
# - AIH001-04 why couldn't the cylinder be accessed? - treating this could get to the EPC C
# - Generally, should we consider insulated doors?
# - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same
# buulding
# - AIH001-09 - The extension is 1900-1929 but has a cavity wall
# - AIH001-09 - Is it not possible to install a loft hatch?
# - AIH001-09 - Why is there assumed secondary heating?
# - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units?
# - AIH001-11 - The layout of this unit is confusing, is there roof access?
# - AIH001-12 - Why was there not access to the cylinder?
#
recommended_measures = [
{
"survey_key": "AIH001-01",
"starting_sap": 69,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-03",
"starting_sap": 43,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 44,
},
{
"measure": "Solar PV",
"description": "Solar PV system with various configurations",
"config": [
{
"size": "4kWp",
"orientation": "East",
"elavation": 30,
"overshading": "Modest",
},
{
"size": "1.6kWp",
"orientation": "Horizontal",
"elavation": "Horizontal",
"overshading": "Modest",
}
],
"sap_points": 7,
"ending_sap": 53
},
{
"measure": "Loft Insulation",
"description": "300mm of loft insulation",
"sap_points": 8,
"ending_sap": 61
},
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 3,
"ending_sap": 64
}
],
"notes": "There was no access to the loft for this property and so a loft hatch would need to be "
"installed..."
},
{
"survey_key": "AIH001-04",
"starting_sap": 48,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"sap_points": 4,
"ending_sap": 52
},
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 3,
"ending_sap": 55
},
{
"measure": "Solar PV",
"description": "Solar PV system with 4kW capacity, south-facing",
"config": [
{
"size": "4kW",
"orientation": "South",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 12,
"ending_sap": 67
}
],
"notes": ""
},
{
"survey_key": "AIH001-05",
"starting_sap": 54,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"sap_points": 5,
"ending_sap": 59,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Solar PV",
"description": "Solar PV system with 4kW capacity, horizontal orientation",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 9,
"ending_sap": 70
},
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 3,
"ending_sap": 73
}
],
"notes": ""
},
{
"survey_key": "AIH001-06",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "Solar PV system with 2kW capacity, south-facing",
"config": [
{
"size": "2kW",
"orientation": "South",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 6,
"ending_sap": 70
}
]
},
{
"survey_key": "AIH001-07",
"starting_sap": 74,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-08",
"starting_sap": 56,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm of loft insulation",
"sap_points": 2,
"ending_sap": 58,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 4,
"ending_sap": 62,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"sap_points": 5,
"ending_sap": 69,
},
{
"measure": "Ventilation",
"description": "Ventilation improvement",
"sap_points": 0,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-09",
"starting_sap": 44,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "Cavity wall insulation for extensions",
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "Ventilation improvement",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 3,
"ending_sap": 56,
}
]
},
{
"survey_key": "AIH001-11",
"starting_sap": 59,
"recommended_measures": [
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 4,
"ending_sap": 63,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-12",
"starting_sap": 46,
"recommended_measures": [
{
"measure": "Double Glazing",
"description": "Installation of double glazing",
"sap_points": 2,
"ending_sap": 48,
},
{
"measure": "Draught Proofing",
"description": "Draught proofing improvements",
"sap_points": 1,
"ending_sap": 49,
},
{
"measure": "Solar PV",
"description": "Solar PV system with 3.2kW capacity, east-facing",
"config": [
{
"size": "3.2W",
"orientation": "East",
"elavation": 30,
"overshading": "Little or none",
}
],
"sap_points": 9,
"ending_sap": 58
},
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump",
"sap_points": 15,
"ending_sap": 73
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 88
}
]
},
{
"survey_key": "AIH001-13",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Roof Insulation",
"description": "100mm+ insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"sap_points": 6,
"ending_sap": 59,
},
{
"measure": "Flat Roof Insulation",
"description": "Flat roof insulation",
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Cavity Wall Insulation",
"description": "Cavity wall insulation",
"sap_points": 6,
"ending_sap": 67,
},
{
"measure": "Ventilation",
"description": "Ventilation improvement",
"sap_points": 0,
"ending_sap": 67,
},
{
"measure": "TTZC",
"description": "Thermostatic Time Zone Control",
"sap_points": 2,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "Solar PV system with 4kW capacity, flat roof installation",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 78
}
]
},
{
"survey_key": "AIH001-14",
"starting_sap": 63,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "Insulation for cavity walls",
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Ventilation",
"description": "Ventilation improvement",
"sap_points": 0,
"ending_sap": 68,
},
{
"measure": "Loft Insulation",
"description": "Installation of loft insulation",
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "Solar PV system with 10kW capacity",
"sap_points": 10,
"ending_sap": 79,
}
]
},
]
# Step 1: Normalize the recommended_measures data into a DataFrame.
normalized_measures = []
for survey in recommended_measures:
survey_key = survey["survey_key"]
starting_sap = survey["starting_sap"]
for measure in survey.get("recommended_measures", []):
normalized_measures.append({
"survey_key": survey_key,
"starting_sap": starting_sap,
"measure": measure["measure"],
"description": measure.get("description", "")
})
# Convert the normalized list into a DataFrame.
measures_df = pd.DataFrame(normalized_measures)
# Step 2: Pivot the measures_df to have a column for each measure type, using the description as values.
pivoted_measures = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="description",
aggfunc=lambda x: ' '.join(x), # Concatenate descriptions if there are multiple entries.
fill_value=None
).reset_index()
# Step 3: Extract starting SAP for each survey key.
starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]
# Merge starting SAP back onto pivoted measures.
result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left")
# Step 4: Calculate the ending SAP using the total sap points.
# Note: If you want to use total sap points, you'll need to update the total calculation accordingly.
# Step 5: Merge the result with the measures_data to get the final DataFrame.
final_measures = measures_data.merge(
result_df, how="left", on="survey_key"
)
if __name__ == "__main__":