Merge pull request #368 from Hestia-Homes/aiha-measures

Aiha measures
This commit is contained in:
KhalimCK 2024-11-05 11:34:38 +00:00 committed by GitHub
commit 1e80656904
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 2593 additions and 16 deletions

View file

@ -256,16 +256,12 @@ class SearchEpc:
else:
params = {"address": self.address1, "postcode": self.postcode}
url = os.path.join(self.client.domestic.host, "search")
for retry in range(self.max_retries):
try:
if "uprn" in params:
# We use the direct call method inside, since we need to implement uprn as a valid
# parameter for the search function
url = os.path.join(self.client.domestic.host, "search")
response = self.client.domestic.call(method="get", url=url, params=params)
else:
response = self.client.domestic.search(params=params, size=size)
response = self.client.domestic.call(method="get", url=url, params=params)
if response:
self.data = response

View file

@ -0,0 +1,984 @@
import os
from io import BytesIO
import pandas as pd
from etl.xml_survey_extraction.XmlParser import XmlParser
SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
CONTINGENCY_RATE = 0.26
def sap_to_epc(sap_points: int | float):
"""
Simple utility function to convert SAP points to EPC rating.
:param sap_points: numerical value of SAP points, typically between 0 and 100
:return:
"""
if sap_points <= 0:
raise ValueError("SAP points should be above 0.")
if sap_points >= 92:
return "A"
elif sap_points >= 81:
return "B"
elif sap_points >= 69:
return "C"
elif sap_points >= 55:
return "D"
elif sap_points >= 39:
return "E"
elif sap_points >= 21:
return "F"
else:
return "G"
def main():
"""
This script handles the extraction of data from the XML files in the survey folders.
:return:
"""
# Step 1: List all subfolders inside SURVEY_FOLDER_PATH.
subfolders = [f.path for f in os.scandir(SURVEY_FOLDER_PATH) if f.is_dir()]
# Step 2: Loop through each subfolder and find the XML files.
extracted_surveys = []
for subfolder in subfolders:
print(f"Searching in subfolder: {subfolder}")
# Find all XML files in the current subfolder.
xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
if not xml_files:
print(f"No XML files found in subfolder: {subfolder}")
continue
# If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
for xml_file in xml_files:
xml_path = os.path.join(subfolder, xml_file)
print(f"Processing XML file: {xml_path}")
# Read in the XML and parse it using the XmlParser class.
with open(xml_path, 'rb') as file:
xml_data_io = BytesIO(file.read())
uprn = None # Set the UPRN if available.
# Create an XmlParser instance
xml_parser = XmlParser(
file=xml_data_io,
filekey=xml_path,
surveyor_company="",
uprn=uprn,
)
# Run the parser to extract the data
xml_parser.run()
if not xml_parser.epc:
# If we don't have a lig xml
continue
# Store the extracted data for further processing
extracted_surveys.append({
"survey_key": subfolder.split("/")[-1],
**xml_parser.epc,
**xml_parser.additional_data
})
print(f"Extracted {len(extracted_surveys)} surveys.")
# Process the extracted_surveys as needed, for example, save to a database or write to a file.
extracted_surveys = pd.DataFrame(extracted_surveys)
# THis is the data we need for the AIHA project
measures_data = extracted_surveys[
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating", "number_of_floors"]
]
measures_data = measures_data.sort_values("survey_key", ascending=True)
# Note:
# The properties will still have "Very poor" ratings for their hot water
# TODO
# - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft
# [Can't remember, not clear - Chenai will check]
# - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same
# buulding [Question for Lewis & Kevin]
# - AIH001-09 - Is it not possible to install a loft hatch? [IT IS NOT, NO ACCESS - would need to accessed from
# the other unit]
# - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units?
# [Question for Lewis & Kevin] - [YES - ASHP!!!!]
# TODO: Check which properties are in a conservation area
# TODO: AIH001-16 - Is the loft insulation suitable (already has 100mm in the RIR)
# TODO: Adjust Archetype 14 homes to exclude double glazing? Or should we exclude entirely
recommended_measures = [
{
"survey_key": "AIH001-01",
"starting_sap": 69,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-02",
"starting_sap": 65,
"recommended_measures": [
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 72,
"notes": "The array can be mounted on the flat roof, so that panels are south facing"
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 76
}
],
},
{
"survey_key": "AIH001-03",
"starting_sap": 43,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 44,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
],
"sap_points": 10,
"ending_sap": 54
},
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 20,
"ending_sap": 74
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 89
}
],
"notes": "Unclear if the loft is accessible"
},
{
"survey_key": "AIH001-04",
"starting_sap": 48,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 39.1482, # based on area of top floor
"sap_points": 4,
"ending_sap": 52
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 55
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "South",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 15,
"ending_sap": 70
}
],
"notes": "Roof is flat, PV array should be installed south facing with elevation"
},
{
"survey_key": "AIH001-05",
"starting_sap": 54,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 49.48, # based on area of top floor
"sap_points": 5,
"ending_sap": 59,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 9,
"ending_sap": 70
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 73
}
],
"notes": ""
},
{
"survey_key": "AIH001-06",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "2kWp Solar PV system",
"config": [
{
"size": "2kW",
"orientation": "South",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 6,
"ending_sap": 70
}
]
},
{
"survey_key": "AIH001-07",
"starting_sap": 74,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-08",
"starting_sap": 56,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 54.2864, # Based on area of top floor
"sap_points": 2,
"ending_sap": 58,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 4,
"ending_sap": 62,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": 24.13 * 2.63,
"sap_points": 7,
"ending_sap": 69,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-09",
"starting_sap": 44,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This property is a house split into 2 flats. We can install a PV array for both units (one array"
"per unit). Area on south-east part of roof is ~22m2 with no overshadowing. Flat roof area is 8m2"
"with modest overshadowing. We suggest a 3.2kWp system, across two units"
},
{
"survey_key": "AIH001-11",
"starting_sap": 59,
"recommended_measures": [
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 63,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (18.50 * 3.12) + (19.00 * 2.75),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-12",
"starting_sap": 46,
"recommended_measures": [
{
"measure": "Double Glazing",
"description": "Installation of double glazing",
"n_windows": 20, # Counted the bay windows each as 3
"windows_area": 10.66,
"sap_points": 3,
"ending_sap": 49,
},
# {
# "measure": "Solar PV",
# "description": "3.2kWp Solar PV system",
# "config": [
# {
# "size": "3.2W",
# "orientation": "East",
# "elavation": 30,
# "overshading": "Little or none",
# }
# ],
# "sap_points": 9,
# "ending_sap": 58
# },
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 15,
"ending_sap": 65
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 80
}
]
},
{
"survey_key": "AIH001-13",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 39.75, # based on the floor area of the RIR
"sap_points": 6,
"ending_sap": 59,
},
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 33.06, # Based on area of the extension
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (35.40 * 2.65) + (26.70 * 2.73) + (16.30 * 2.71), # 1st & 2nd extension
"sap_points": 6,
"ending_sap": 67,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 67,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 78
}
]
},
{
"survey_key": "AIH001-14",
"starting_sap": 63,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (11.00 * 2.6) + (11.00 * 2.65) + (4.60 * 2.7),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 68,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation", # Based on area of main building
"floor_area": 59.20,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"sap_points": 10,
"ending_sap": 79,
}
]
},
{
"survey_key": "AIH001-15",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 73.81, # Based on area of main building
"sap_points": 1,
"ending_sap": 61,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 71,
"notes": "The array is North-west facing and therefore will be slightly less efficient than south"
"facing, however the impact is not so severe as to make the installation not worthwhile."
"Ground mounted"
}
]
},
{
"survey_key": "AIH001-16",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (21.56 * 2.60) + (26.79 * 2.8) + (6.74 * 2.60),
"sap_points": 4,
"ending_sap": 64,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 64,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 20.92, # Based on floor area of RIR
"sap_points": 1,
"ending_sap": 65,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "South-East",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 5,
"ending_sap": 70,
}
]
},
{
"survey_key": "AIH001-17",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 63,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 66,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "3.2kW",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kW",
"orientation": "West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 12,
"ending_sap": 78,
}
]
},
{
"survey_key": "AIH001-18",
"starting_sap": 58,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 37.52, # Based on area of main building and 1st extension
"sap_points": 7,
"ending_sap": 65,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 66,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 68,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 75,
}
],
},
{
"survey_key": "AIH001-19",
"starting_sap": 76,
"recommended_measures": []
},
{
"survey_key": "AIH001-20",
"starting_sap": 82,
"recommended_measures": []
},
{
"survey_key": "AIH001-21",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Cyliner Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 55,
},
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 22.80, # Based on floor area of RIR
"sap_points": 7,
"ending_sap": 62,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "1.6kWp",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kWp",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 71,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 74,
}
]
},
{
"survey_key": "AIH001-SIMULATED-01",
"elmhurst_reference": "000020",
"starting_sap": None,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This was cloned from 80A. There is no existing data for 80B"
},
{
"survey_key": "AIH001-SIMULATED-05",
"starting_sap": 68,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 42.5,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 8,
"ending_sap": 77,
}
]
}
]
scaffolding_data = [
{
"number_of_floors": 2,
"price": 841,
},
{
"number_of_floors": 3,
"price": 1077,
}
]
# TODO - Need an update cost for cylinder insulation
pricing_data = [
{'item': '80mm cylinder insulation', 'unit_price': 50, 'unit': 'unit'},
{'item': '100mm internal wall insulation', 'unit_price': 244.8, 'unit': 'hlp_m2'},
{'item': 'CWI to rdSAP default standard', 'unit_price': 14.21, 'unit': 'hlp_m2'},
{'item': 'Window draught proofing improvements', 'unit_price': 63, 'unit': 'window'},
{'item': '100mm flat roof insulation', 'unit_price': 195, 'unit': 'floor_m2'},
{'item': 'Switch to 24-hour tariff', 'unit_price': 0, 'unit': None},
{'item': 'Installation of double glazing', 'unit_price': 1074, 'unit': 'window'},
{'item': 'Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)', 'unit_price': 21189 + 1200,
'unit': 'unit'},
{'item': '100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)', 'unit_price': 244.80,
'unit': 'floor_m2'},
{'item': '300mm loft insulation', 'unit_price': 16.07, 'unit': 'floor_m2'},
{'item': 'Smart Thermostat', 'unit_price': 1200, 'unit': 'unit'},
{'item': '2x DMEV fans', 'unit_price': 1070, 'unit': 'unit'},
{'item': '1.6kWp Solar PV system', 'unit_price': 3040, 'unit': 'unit_needs_scaffolding'},
{'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'},
{'item': '2.4kWp Solar PV system', 'unit_price': 3363, 'unit': 'unit_needs_scaffolding'},
{'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'},
{'item': '4kWp Solar PV system', 'unit_price': 4009, 'unit': 'unit_needs_scaffolding'},
{'item': '5.6kWp Solar PV system', 'unit_price': 5015, 'unit': 'unit_needs_scaffolding'},
]
pricing_data = pd.DataFrame(pricing_data)
for recommendation in recommended_measures:
property_data = measures_data[measures_data["survey_key"] == recommendation["survey_key"]].squeeze()
total_cost = 0
for measure in recommendation["recommended_measures"]:
measure_pricing = pricing_data[pricing_data["item"] == measure["description"]]
measure_unit = measure_pricing["unit"].values[0]
if measure_unit in ["unit", None]:
measure_cost = float(measure_pricing["unit_price"].values[0])
elif measure_unit == "unit_needs_scaffolding":
n_floors = property_data["number_of_floors"]
scaffolding_cost = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"]
measure_cost = float(measure_pricing["unit_price"].values[0]) + scaffolding_cost
elif measure_unit == "floor_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["floor_area"]
elif measure_unit == "hlp_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["hlp"]
elif measure_unit == "window":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["n_windows"]
else:
raise Exception("Unknown unit type")
measure["Total Cost"] = measure_cost
total_cost += measure_cost
recommendation["total_cost"] = total_cost
# Step 1: Normalize the recommended_measures data into a DataFrame.
normalized_measures = []
for survey in recommended_measures:
survey_key = survey["survey_key"]
starting_sap = survey["starting_sap"]
total_cost = survey.get("total_cost", 0)
for measure in survey.get("recommended_measures", []):
# Include hlp and floor_area for each measure if available
hlp = measure.get("hlp", None)
floor_area = measure.get("floor_area", None)
normalized_measures.append({
"survey_key": survey_key,
"hlp": hlp,
"floor_area": floor_area,
"starting_sap": starting_sap,
"measure": measure["measure"],
"description": measure.get("description", ""),
"sap_points": measure.get("sap_points", 0),
"measure_cost": measure.get("Total Cost", 0),
"total_cost": total_cost
})
# Convert the normalized list into a DataFrame.
measures_df = pd.DataFrame(normalized_measures)
# Step 2: Pivot the measures_df to have a column for each measure type, using the description as values.
pivoted_measures = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="description",
aggfunc=lambda x: ' '.join(x), # Concatenate descriptions if there are multiple entries.
fill_value=None
).reset_index()
measures_columns = [x for x in pivoted_measures.columns if x not in ["survey_key"]]
# We add a "Cost of" column for each measure
for measure in measures_columns:
pivoted_measures[f"Cost of {measure}"] = None
pivoted_floor_area = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="floor_area",
aggfunc="first" # Use 'first' since each measure should only appear once per survey_key
).add_prefix("floor_area - ").reset_index()
pivoted_hlp = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="hlp",
aggfunc="first"
).add_prefix("hlp - ").reset_index()
# Merge hlp and floor_area data
pivoted_measures = pivoted_measures.merge(pivoted_hlp, on="survey_key", how="left")
pivoted_measures = pivoted_measures.merge(pivoted_floor_area, on="survey_key", how="left")
# Step 3: Calculate the total sap points and total cost for each survey.
totals = measures_df.groupby("survey_key").agg(
total_sap_points=("sap_points", "sum"),
).reset_index()
# Merge total sap points into the pivoted measures.
pivoted_measures = pd.merge(pivoted_measures, totals, on="survey_key", how="left")
# pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE
# pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"]
# Step 4: Extract starting SAP for each survey key.
starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]
# Merge starting SAP back onto pivoted measures.
result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left")
# Step 5: Calculate the ending SAP.
result_df["Ending SAP"] = result_df["starting_sap"] + result_df["total_sap_points"]
result_df["Ending EPC Rating"] = result_df["Ending SAP"].apply(sap_to_epc)
# Step 6: Merge the result with the measures_data to get the final DataFrame.
final_measures = measures_data.merge(
result_df, how="left", on="survey_key"
)
final_measures.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Measures packages.csv")
# Store costs
pricing_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Pricing data.csv")
# if __name__ == "__main__":
# main()

View file

@ -0,0 +1,225 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["Number"]
full_address = home["Full Address"]
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0
)
asset_list["row_id"] = asset_list.index
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = "livewest EPC Data pull - 29 Oct.xlsx"
asset_list.to_excel(filename, index=False)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,4 @@
PyPDF2
pandas
tqdm
openpyxl

View file

@ -9,7 +9,8 @@ from etl.xml_survey_extraction.pcdb import heating_data
PROPERTY_TYPE_LOOKUP = {
"0": "House",
"House": "House",
"2": "Flat"
"2": "Flat",
"3": "Maisonette",
}
@ -107,11 +108,13 @@ class XmlParser:
BUILT_FORM_MAP = {
"1": "Detached",
"2": "Semi-Detached",
"3": "End-Terrace",
"4": "Mid-Terrace",
}
GLAZED_AREA_MAP = {
"2": "More than Typical",
"4": "Much More Than Typical"
}
@ -120,7 +123,9 @@ class XmlParser:
}
TRANSACTION_TYPE_MAP = {
"13": "ECO assessment"
"5": "Rented (social)",
"13": "ECO assessment",
"14": "Stock condition survey",
}
TENURE_MAP = {
@ -131,7 +136,8 @@ class XmlParser:
TARIFF_MAP = {
"1": "Dual",
"2": "Single"
"2": "Single",
"3": "Unknown"
}
def __init__(self, file, filekey, surveyor_company, uprn=None):
@ -400,8 +406,13 @@ class XmlParser:
]
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
return wall_areas - window_areas
window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
if not window_areas:
# We discount 10% of the wall area
insulation_wall_area = wall_areas * 0.9
else:
insulation_wall_area = wall_areas - sum(window_areas)
return insulation_wall_area
def extract_additional_data(self):
@ -415,7 +426,8 @@ class XmlParser:
main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
number_of_windows = len(main_dwelling_windows)
windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
windows_area = sum(windows_area) if windows_area else None
boolean_lookup = {
"true": True,
@ -427,6 +439,7 @@ class XmlParser:
cylinder_insulation_type = {
None: "",
"1": "Foam",
"2": "Jacket"
}
cylinder_insulation_thickness = int(
@ -461,7 +474,7 @@ class XmlParser:
"cylinder_thermostat": cylinder_thermostat,
"main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
"number_of_windows": int(number_of_windows),
"windows_area": float(windows_area),
"windows_area": float(windows_area) if windows_area is not None else windows_area,
}
def get_node_value(self, tag_name):
@ -769,9 +782,10 @@ class XmlParser:
:return:
"""
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
glazing_type_lookup = {
"ND": "Single glazing",
"1": "double glazing installed before 2002",
"2": "double glazing installed during or after 2002",
"3": "double glazing, unknown install date",
"5": "Single glazing",
}
@ -787,6 +801,40 @@ class XmlParser:
"8": "North West"
}
sap_windows = self.xml.getElementsByTagName("SAP-Windows")
if not sap_windows:
# We look for Multi-Glazed-Proportion
multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazing-Type"
)[0].firstChild.nodeValue
pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"PVC-Window-Frames"
)
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazed-Proportion"
)[0].firstChild.nodeValue
self.windows = [
{
"window_location": "0",
"window_area": None,
"window_type": None,
"glazing_type": glazing_type_lookup[multiple_glazing_type],
"pvc_frame": pvc_frame,
"glazing_gap": None,
"orientation": None,
"multple_glazed_proportion": multple_glazed_proportion
}
]
return
sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
self.windows = [
self._parse_windows_content(
window=window,