From 325748524817708da77c30c9f1155470d7020e1f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 24 Jul 2024 18:46:39 +0100 Subject: [PATCH 01/49] xml extraction wip --- etl/xml_survey_extraction/XmlParser.py | 540 ++++++++++++ etl/xml_survey_extraction/app.py | 43 +- etl/xml_survey_extraction/pcdb.py | 1129 ++++++++++++++++++++++++ utils/s3.py | 83 ++ 4 files changed, 1794 insertions(+), 1 deletion(-) create mode 100644 etl/xml_survey_extraction/XmlParser.py create mode 100644 etl/xml_survey_extraction/pcdb.py diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py new file mode 100644 index 00000000..de7e35f8 --- /dev/null +++ b/etl/xml_survey_extraction/XmlParser.py @@ -0,0 +1,540 @@ +import re +import usaddress +from xml.dom.minidom import parseString +from backend.app.utils import sap_to_epc +from etl.xml_survey_extraction.pcdb import heating_data + +PROPERTY_TYPE_LOOKUP = { + "0": "House", + "House": "House", +} + + +def get_house_number(address: str) -> str | None: + """ + This method will use the usaddress library to parse an address and extract the house number + :return: + """ + + parsed = usaddress.parse(address) + parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")] + parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None + + if parsed_house_number is None: + # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat', + # we also add a custom approach + + # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning + pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' + + match = re.search(pattern, address) + + if match: + # Return the first non-None group found + return next(g for g in match.groups() if g is not None) + else: + return None + + # Remove training commas + parsed_house_number = parsed_house_number.replace(",", "") + + return parsed_house_number + + +class XmlParser: + uprn = None + property_type = None + current_energy_efficiency = None + current_energy_rating = None + + # heating/emissions information + space_heating_kwh = None + water_heating_kwh = None + co2_emissions_current = None + heating_cost_current = None + hot_water_cost_current = None + lighting_cost_current = None + energy_consumption_current = None + heating_system = None + heating_controls = None + + # Assessor details + surveyor_name = None + + # Addresses + address1 = None + address2 = None + address3 = None + posttown = None + postcode = None + address = None + + # Dates + survey_date = None + + # Building Fabric + # Walls + walls_description = None + walls_classification = None + walls_energy_rating = None + # Roof + roof_description = None + roof_energy_rating = None + is_loft = None + # Floor + floor_description = None + floor_energy_rating = None + # Windows + windows_description = None + windows_energy_rating = None + # main heating + main_heating_description = None + main_heating_energy_rating = None + # Heating controls + main_heating_controls_description = None + main_heating_controls_energy_rating = None + # Hot water + hot_water_description = None + hot_water_energy_rating = None + # Lighting + lighting_description = None + lighting_energy_rating = None + # Second Heating + second_heating_description = None + second_heating_energy_rating = None + + number_of_doors = None + number_of_insulated_doors = None + photo_supply = None + + # Property dimensions + number_of_floors = None + perimeter = None + heat_loss_perimeter = None + party_wall_length = None + total_floor_area = None + ground_floor_area = None + is_there_party_wall = None + floor_height = None + insulation_wall_area = None + + rrn = None + + database_data = None + + # We assume that the insulation wall area is 85% of the total wall area, as a standard estimate + INSULATION_WALL_AREA_FACTOR = 0.85 + + # The value of the URPN tells us about the file type that we're parsing + UPRN_FILETYPE_MAP = { + 0: "EPR", + -1: "RDSAP_EPR" + } + + RATINGS_MAP = { + "0": "N/A", + "1": "Very Poor", + "2": "Poor", + "3": "Average", + "4": "Good", + "5": "Very Good" + } + + def __init__(self, file, filekey, uprn=None): + file.seek(0) # Ensure the file pointer is at the beginning + xml_string = file.read().decode('utf-8') + self.xml = parseString(xml_string) + self.filekey = filekey + + # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different + # information + # In order to identify the file type, we can look for the presence of the 'UPRN' tag + # If the UPRN tag is present, we can assume that the file is an EPC + # If the UPRN tag is not present, we can assume that the file is an EPR + self.get_uprn() + + self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC") + + @staticmethod + def get_node(node): + """ + Utility function to get the node value from the xml, where data might be optional + :return: + """ + + node_first_child = node.firstChild + if node_first_child is None: + return None + + return node_first_child.nodeValue + + def run(self): + if self.file_type == "RDSAP_EPR": + # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit + # unless we learn something else that determines that we need information from this file + return + self.get_property_type() + self.get_sap() + self.get_property_address() + self.get_dates() + self.get_assessor_details() + + self.get_heating_and_emissions_data() + self.get_detailed_heating_specs() + + # Building fabric + self.get_walls() + self.get_roof() + self.get_floor() + self.get_windows() + self.get_heating() + self.get_hot_water() + self.get_lighting() + self.get_doors() + self.get_photo_supply() + + # Property dimensions + self.get_property_dimensions() + + def get_uprn(self, uprn): + + if uprn is not None: + self.uprn = uprn + return + + uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild + if uprn_tag is None: + self.uprn = -1 + return + + self.uprn = uprn_tag.nodeValue + # If all of the characters in the UPRN are 0, then there is not set UPRN + if self.uprn.count("0") == len(self.uprn): + self.uprn = 0 + else: + self.uprn = self.uprn.lower().split("uprn-")[1] + + def get_property_type(self): + if not self.xml: + raise ValueError("You need to read the file first") + + property_type = self.xml.getElementsByTagName('Property-Type') + if not property_type: + property_type = self.xml.getElementsByTagName('PropertyType1') + + self.property_type = PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue] + + def get_sap(self): + sap_score = self.xml.getElementsByTagName('Energy-Rating-Current') + sap_score = int(sap_score[0].firstChild.nodeValue) + epc_rating = sap_to_epc(sap_score) + self.current_energy_efficiency = str(sap_score) + self.current_energy_rating = epc_rating + + def get_heating_and_emissions_data(self): + """ + This method will extract the following pieces of information: + 1) Space heating requirement + 2) Water heating requirement + 3) CO2 emissions + 4) Heat demand per square meter per year + 5) Bills + + :return: + """ + + self.space_heating_kwh = self.xml.getElementsByTagName( + 'Space-Heating-Existing-Dwelling' + )[0].firstChild.nodeValue + + self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue + + self.co2_emissions_current = self.xml.getElementsByTagName('CO2-Emissions-Current')[0].firstChild.nodeValue + self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue + self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue + self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue + self.energy_consumption_current = ( + self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue + ) + + def get_detailed_heating_specs(self): + """ + Given the heating data that is found in the tag, we extract the detailed about the heating + system + :return: + """ + sap_main_heating_details = ( + self.xml.getElementsByTagName('SAP-Heating')[0] + .getElementsByTagName("Main-Heating-Details")[0] + .getElementsByTagName("Main-Heating")[0] + ) + + heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue + + # Get the heating system + heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"] + heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}" + + # Get the heating controls + heating_controls_code = ( + sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue + ) + + heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"] + heating_controls = ( + heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}" + ) + + self.heating_system = heating_system + self.heating_controls = heating_controls + + def get_walls(self): + + wall_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Wall')[0] + + self.walls_description = ( + wall_xml_data + .getElementsByTagName("Description")[0] + .firstChild.nodeValue + ) + + self.walls_energy_rating = ( + wall_xml_data + .getElementsByTagName("Energy-Efficiency-Rating")[0] + .firstChild.nodeValue + ) + + is_cavity = "cavity wall" in self.walls_description.lower() + is_empty = "no insulation" in self.walls_description.lower() + is_partial = "partial insulation" in self.walls_description.lower() + + if not is_cavity: + self.walls_classification = "NON CAVITY" + return + + if is_empty: + self.walls_classification = "EMPTY" + return + + if is_partial: + self.walls_classification = "PARTIAL" + return + + if is_cavity and not is_empty and not is_partial: + self.walls_classification = "FULL" + return + + raise NotImplementedError("Implement me") + + def get_roof(self): + + room_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Roof')[0] + + self.roof_description = ( + room_xml_data + .getElementsByTagName("Description")[0] + .firstChild.nodeValue + ) + + self.roof_energy_rating = ( + room_xml_data + .getElementsByTagName("Energy-Efficiency-Rating")[0] + .firstChild.nodeValue + ) + + loft_recommendation_tag = self.xml.getElementsByTagName("Impact-Of-Loft-Insulation") + description_contains_loft = "loft" in self.roof_description.lower() + + if not loft_recommendation_tag and not description_contains_loft: + self.is_loft = "No" + return + + self.is_loft = "Yes" + return + + def get_floor(self): + + floor_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Floor')[0] + + self.floor_description = ( + floor_xml_data + .getElementsByTagName("Description")[0] + .firstChild.nodeValue + ) + + self.floor_energy_rating = ( + floor_xml_data + .getElementsByTagName("Energy-Efficiency-Rating")[0] + .firstChild.nodeValue + ) + + def get_windows(self): + + windows_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Window')[0] + + self.windows_description = ( + windows_xml_data + .getElementsByTagName("Description")[0] + .firstChild.nodeValue + ) + + self.windows_energy_rating = ( + windows_xml_data + .getElementsByTagName("Energy-Efficiency-Rating")[0] + .firstChild.nodeValue + ) + + def get_heating(self): + """ + This function will retrieve the main heating and the main heating controls + :return: + """ + mainheating_xml_data = self.xml.getElementsByTagName('Main-Heating')[0] + + self.main_heating_description = ( + mainheating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue + ) + + self.main_heating_energy_rating = ( + mainheating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue + ) + + mainheating_controls_xml_data = self.xml.getElementsByTagName('Main-Heating-Controls')[0] + + self.main_heating_controls_description = ( + mainheating_controls_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue + ) + + self.main_heating_controls_energy_rating = ( + mainheating_controls_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue + ) + + second_heating_xml_data = self.xml.getElementsByTagName('Secondary-Heating')[0] + + self.second_heating_description = ( + second_heating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue + ) + + self.second_heating_energy_rating = ( + second_heating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue + ) + + def get_hot_water(self): + hot_water_xml_data = self.xml.getElementsByTagName('Hot-Water')[0] + + self.hot_water_description = ( + hot_water_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue + ) + + self.hot_water_energy_rating = ( + hot_water_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue + ) + + def get_lighting(self): + lighting_xml_data = self.xml.getElementsByTagName('Lighting')[0] + + self.lighting_description = ( + lighting_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue + ) + + self.lighting_energy_rating = ( + lighting_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue + ) + + def get_doors(self): + + # Doors can be found in the SAP-Property-Details tag + self.number_of_doors = int( + self.xml.getElementsByTagName('SAP-Property-Details')[0] + .getElementsByTagName('Door-Count')[0] + .firstChild.nodeValue + ) + + self.number_of_insulated_doors = int( + self.xml.getElementsByTagName('SAP-Property-Details')[0] + .getElementsByTagName('Insulated-Door-Count')[0] + .firstChild.nodeValue + ) + + def get_photo_supply(self): + self.photo_supply = float( + self.xml.getElementsByTagName('Photovoltaic-Supply')[0] + .getElementsByTagName('Percent-Roof-Area')[0] + .firstChild.nodeValue + ) + + def get_assessor_details(self): + + energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0] + + self.surveyor_name = ( + energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue + ) + + def get_property_address(self): + + property_tag = self.xml.getElementsByTagName("Property")[0] + + self.address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) + self.address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) + self.address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) + self.posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) + self.postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) + self.address = ", ".join( + [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None] + ) + + def get_dates(self): + self.survey_date = ( + self.xml.getElementsByTagName("Inspection-Date")[0].firstChild.nodeValue + ) + + def get_property_dimensions(self): + """ + This function will extract the relevant property dimensions including the floor area, + number of floors, perimeter, party wall length and the insulation_wall_area. + + insulation_wall_area is typically simplified down to perimeter * height * 0.85 + :return: + """ + + # Each floor has its own SAP-Floor-Dimension tag + floor_dimensions = ( + self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0] + .getElementsByTagName("SAP-Floor-Dimension") + ) + + self.number_of_floors = len(floor_dimensions) + + self.heat_loss_perimeter = float( + floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue + ) + + self.party_wall_length = float( + floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue + ) + + party_wall_construction_tag = ( + self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip() + ) + + self.is_there_party_wall = ( + "Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No" + ) + + # We pull out all of the floor areas + floor_areas = [ + float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions + ] + + self.total_floor_area = sum(floor_areas) + self.ground_floor_area = floor_areas[0] + + self.floor_height = float( + floor_dimensions[0] + .getElementsByTagName("Room-Height")[0] + .firstChild.nodeValue + ) + + self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR + self.perimeter = self.heat_loss_perimeter + self.party_wall_length diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 6f53e4e2..9bcbb168 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -1,3 +1,16 @@ +from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder +from utils.logger import setup_logger +from etl.xml_survey_extraction.XmlParser import XmlParser +import os +from io import BytesIO + +logger = setup_logger() + +SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS" +PROJECT_CODE = "VDE001" +BUCKET = "retrofit-energy-assessments-dev" + + def main(): """ This function executes the main process, which will retrieve data from the specified locations, extract the data @@ -6,4 +19,32 @@ def main(): """ # TODO: Build solution to get this data from Onedrive and store what we need in S3 - # In s3, we have a bucket called retrofit-energy-assessments-{stage} which + # In s3, we have a bucket called retrofit-energy-assessments-{stage} which contains the data we need + # The data is stored in a folder called {surveyors}/{project_code}/{uprn} + # We'll need to get the uprn from the folder name, which we can do with EpcSearcher class + + # + energy_assessments = list_files_and_subfolders_in_s3_folder( + bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/" + ) + + logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}") + assessments_map = {} + for assessment in energy_assessments: + uploaded_xmls = list_xmls_in_s3_folder( + bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans") + ) + uprn = int(assessment.rstrip("/").split("/")[-1]) + assessments_map[uprn] = uploaded_xmls + + logger.info(f"Exatracted XMLS for the energy assessments") + + # For each property, we download the xmls and extract the data + for uprn, xmls in assessments_map.items(): + extracted_data = {} + for xml in xmls: + xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) + xml_data_io = BytesIO(xml_data) + xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn) + xml_parser.run() + logger.info(f"Extracted data from {xml}") diff --git a/etl/xml_survey_extraction/pcdb.py b/etl/xml_survey_extraction/pcdb.py new file mode 100644 index 00000000..64d65708 --- /dev/null +++ b/etl/xml_survey_extraction/pcdb.py @@ -0,0 +1,1129 @@ +""" +This script contains the systems data, contained in the BRE product characteristics database (PCDB). + +For SAP 10.2, this can be found in the following document: +https://files.bregroup.com/SAP/SAP%2010.2%20-%2017-12-2021.pdf + +From page 157 onwards +""" +import pandas as pd + +no_heating_system = [ + { + "category": "No heating system present", + "description": "Electric heaters (assumed)", + "efficiency": 100, + "heating_type": 1, + "responsiveness": 1.0, + "code": 699 + } +] + +boiler_systems_with_radiators_or_underfloor_heating = [ + # Solid fuel boilers + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Manual feed independent boiler", + "efficiency_A": 65, + "efficiency_B": 60, + "heating_type": 2, + "responsiveness": 0.75, + "code": 151 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Auto (gravity) feed independent boiler", + "efficiency_A": 70, + "efficiency_B": 65, + "heating_type": 2, + "responsiveness": 0.75, + "code": 153 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Wood chip/pellet independent boiler", + "efficiency_A": 75, + "efficiency_B": 70, + "heating_type": 2, + "responsiveness": 0.75, + "code": 155 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Open fire with back boiler to radiators", + "efficiency_A": 63, + "efficiency_B": 55, + "heating_type": 3, + "responsiveness": 0.50, + "code": 156 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Closed room heater with boiler to radiators", + "efficiency_A": 67, + "efficiency_B": 65, + "heating_type": 3, + "responsiveness": 0.50, + "code": 158 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Stove (pellet-fired) with boiler to radiators", + "efficiency_A": 75, + "efficiency_B": 70, + "heating_type": 2, + "responsiveness": 0.75, + "code": 159 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Range cooker boiler (integral oven and boiler)", + "efficiency_A": 50, + "efficiency_B": 45, + "heating_type": 3, + "responsiveness": 0.50, + "code": 160 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Solid fuel boiler - Range cooker boiler (independent oven and boiler)", + "efficiency_A": 60, + "efficiency_B": 55, + "heating_type": 3, + "responsiveness": 0.50, + "code": 161 + }, + # Electric boilers + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - Direct acting electric boiler", + "efficiency": 100, + "heating_type": "From Table 4d", + "responsiveness": None, + "code": 191 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - CPSU in heated space – radiators or underfloor", + "efficiency": 100, + "heating_type": 1, + "responsiveness": 1.0, + "code": 192 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - Dry core storage boiler in heated space", + "efficiency": 100, + "heating_type": 2, + "responsiveness": 0.75, + "code": 193 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - Dry core storage boiler in unheated space", + "efficiency": 85, + "heating_type": 2, + "responsiveness": 0.75, + "code": 194 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - Water storage boiler in heated space", + "efficiency": 100, + "heating_type": 2, + "responsiveness": 0.75, + "code": 195 + }, + { + "category": "Boiler systems with radiators or underfloor heating", + "description": "Electric boiler - Water storage boiler in unheated space", + "efficiency": 85, + "heating_type": 2, + "responsiveness": 0.75, + "code": 196 + } +] + +heat_pumps_with_radiators_or_underfloor_heating = [ + # Electric heat pumps + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Ground source heat pump with flow temperature <= 35°C", + "space": 230, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 211 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Water source heat pump with flow temperature <= 35°C", + "space": 230, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 213 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Air source heat pump with flow temperature <= 35°C", + "space": 170, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 214 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Ground source heat pump in other cases", + "space": 170, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 221 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Water source heat pump, in other cases", + "space": 170, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 223 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Electric heat pumps - Air source heat pump in other cases", + "space": 170, + "water": 170, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 224 + }, + # Gast fired heat pumps + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Ground source heat pump with flow temperature <= 35°C", + "space": 120, + "water": 84, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 215 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Water source heat pump with flow temperature <= 35°C", + "space": 120, + "water": 84, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 216 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Air source heat pump with flow temperature <= 35°C", + "space": 110, + "water": 77, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 217 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Ground source heat pump in other cases", + "space": 84, + "water": 84, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 225 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Water source heat pump in other cases", + "space": 84, + "water": 84, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 226 + }, + { + "category": "Heat pumps with radiators or underfloor heating", + "description": "Gas-fired heat pumps - Air source heat pump in other cases", + "space": 77, + "water": 77, + "heating_type": "From Table 4d", # Replace with specific value as needed + "responsiveness": None, # Not provided, assuming 'None' + "code": 227 + } +] + +electric_heat_pumps_warm_air_distribution = [ + { + "category": "Heat pumps with warm air distribution", + "description": "Electric heat pumps - Ground source heat pump", + "space": 230, + "water": 170, + "heating_type": 1, + "responsiveness": 1.0, + "code": 521 + }, + { + "category": "Heat pumps with warm air distribution", + "description": "Electric heat pumps - Water source heat pump", + "space": 230, + "water": 170, + "heating_type": 1, + "responsiveness": 1.0, + "code": 523 + }, + { + "category": "Heat pumps with warm air distribution", + "description": "Electric heat pumps - Air source heat pump", + "space": 170, + "water": 170, + "heating_type": 1, + "responsiveness": 1.0, + "code": 524 + } +] + +gas_fired_heat_pumps_warm_air_distribution = [ + { + "category": "Heat pumps with warm air distribution", + "description": "Gas-fired heat pumps - Ground source heat pump", + "space": 120, + "water": 84, + "heating_type": 1, + "responsiveness": 1.0, + "code": 525 + }, + { + "category": "Heat pumps with warm air distribution", + "description": "Gas-fired heat pumps - Water source heat pump", + "space": 120, + "water": 84, + "heating_type": 1, + "responsiveness": 1.0, + "code": 526 + }, + { + "category": "Heat pumps with warm air distribution", + "description": "Gas-fired heat pumps - Air source heat pump", + "space": 110, + "water": 77, + "heating_type": 1, + "responsiveness": 1.0, + "code": 527 + } +] + +heat_networks = [ + { + "category": "Heat networks", + "description": "Boilers (SAP)", + "efficiency": 80, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 2 + }, + { + "category": "Heat networks", + "description": "CHP (SAP)", + "efficiency": 75, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 1 + }, + { + "category": "Heat networks", + "description": "Waste heat from power station (SAP)", + "efficiency": 100, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 4 + }, + { + "category": "Heat networks", + "description": "Heat pump (SAP)", + "efficiency": 300, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 3 + }, + { + "category": "Heat networks", + "description": "Geothermal heat source (SAP)", + "efficiency": 100, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 5 + }, + { + "category": "Heat networks", + "description": "Boilers only (RdSAP)", + "efficiency": 80, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 301 + }, + { + "category": "Heat networks", + "description": "CHP and boilers (RdSAP)", + "efficiency": 75, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 302 + }, + { + "category": "Heat networks", + "description": "Heat pump (RdSAP)", + "efficiency": 300, + "heating_type": "From table 4d", # Replace with specific value as needed + "code": 304 + } +] + +electric_storage_systems = [ + { + "category": "Electric Storage Systems", + "description": "Old (large volume) storage heaters", + "efficiency": 100, + "heating_type": 6, + "responsiveness": 0.0, + "code": 401 + }, + { + "category": "Electric Storage Systems", + "description": "Slimline storage heaters", + "code": 402, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 5, "responsiveness": 0.2}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Convector storage heaters", + "code": 403, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 5, "responsiveness": 0.2}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Fan storage heaters", + "code": 404, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Slimline storage heaters with Celect-type control", + "code": 405, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Convector storage heaters with Celect-type control", + "code": 406, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Fan storage heaters with Celect-type control", + "code": 407, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6} + ] + }, + { + "category": "Electric Storage Systems", + "description": "Integrated storage + direct-acting heater", + "efficiency": 100, + "heating_type": 3, + "responsiveness": 0.6, + "code": 408 + }, + { + "category": "Electric Storage Systems", + "description": "High heat retention storage heaters", + "code": 409, + "options": [ + {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 2, "responsiveness": 0.8}, + {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 2, "responsiveness": 0.8} + ] + } +] + +off_peak_tariffs_electric_underfloor_heating = [ + { + "category": "Electric Underfloor Heating", + "description": "Off-peak tariffs - In concrete slab (off-peak only)", + "efficiency": 100, + "heating_type": 5, + "responsiveness": 0.0, + "code": 421 + }, + { + "category": "Electric Underfloor Heating", + "description": "Off-peak tariffs - Integrated (storage+direct-acting)", + "efficiency": 100, + "heating_type": 4, + "responsiveness": 0.25, + "code": 422 + }, + { + "category": "Electric Underfloor Heating", + "description": "Off-peak tariffs - Integrated (storage+direct-acting) with low (off-peak) tariff control", + "efficiency": 100, + "heating_type": 3, + "responsiveness": 0.50, + "code": 423 + } +] + +standard_or_off_peak_tariff_electric_underfloor_heating = [ + { + "category": "Electric Underfloor Heating", + "description": "Standard or off-peak tariff - In screed above insulation", + "efficiency": 100, + "heating_type": 2, + "responsiveness": 0.75, + "code": 424 + }, + { + "category": "Electric Underfloor Heating", + "description": "Standard or off-peak tariff - In timber floor, or immediately below floor covering", + "efficiency": 100, + "heating_type": 1, + "responsiveness": 1.0, + "code": 425 + } +] + +gas_fired_warm_air_fan_assisted = [ + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Ducted, on-off control, pre 1998", "efficiency": 70, + "heating_type": 1, + "responsiveness": 1.0, + "code": 501 + }, + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Ducted, on-off control, 1998 or later", + "efficiency": 76, + "heating_type": 1, + "responsiveness": 1.0, + "code": 502 + }, + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Ducted, modulating control, pre 1998", + "efficiency": 72, + "heating_type": 1, + "responsiveness": 1.0, + "code": 503 + }, + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Ducted, modulating control, 1998 or later", + "efficiency": 78, + "heating_type": 1, + "responsiveness": 1.0, + "code": 504 + }, + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Room heater with in-floor ducts", + "efficiency": 69, + "heating_type": 1, + "responsiveness": 1.0, + "code": 505 + }, + { + "category": "Warm Air Systems", + "description": "Gas-fired warm air with fan-assisted flue - Condensing", + "efficiency": 81, + "heating_type": 1, + "responsiveness": 1.0, + "code": 520 + } +] + +gas_fired_warm_air_balanced_or_open_flue = [ + {"category": "Warm Air Systems", + "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, on-off control, pre 1998", + "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 506}, + {"category": "Warm Air Systems", + "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, on-off control, " + "1998 or later", + "efficiency": 76, "heating_type": 1, "responsiveness": 1.0, "code": 507}, + {"category": "Warm Air Systems", + "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, modulating control, " + "pre 1998", + "efficiency": 72, "heating_type": 1, "responsiveness": 1.0, "code": 508}, + {"category": "Warm Air Systems", + "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, modulating control, " + "1998 or later", + "efficiency": 78, "heating_type": 1, "responsiveness": 1.0, "code": 509}, + {"category": "Warm Air Systems", + "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted with flue heat recovery", + "efficiency": 85, "heating_type": 1, "responsiveness": 1.0, "code": 510}, + {"category": "Warm Air Systems", "description": "Gas-fired warm air with balanced or open flue - Condensing", + "efficiency": 81, "heating_type": 1, "responsiveness": 1.0, "code": 511} +] + +liquid_fired_warm_air = [ + {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Ducted output (on/off control)", + "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 512}, + {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Ducted output (modulating control)", + "efficiency": 72, "heating_type": 1, "responsiveness": 1.0, "code": 513}, + {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Stub duct system", "efficiency": 70, + "heating_type": 1, "responsiveness": 1.0, "code": 514} +] + +electric_warm_air_systems = [ + { + "category": "Warm Air Systems", + "description": "Electric warm air - Electricaire system", + "efficiency": 100, + "heating_type": 2, + "responsiveness": 0.75, + "code": 515 + } +] + +room_heaters = [ + # Gas (including LPG and biogas) room heaters + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, pre-1980 (open fronted)", + "flue": "OF", "efficiency_A": 50, "efficiency_B": 50, "heating_type": 1, "responsiveness": 1.0, "code": 601}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, pre-1980 (open fronted), " + "with back boiler unit", + "flue": "OF*", "efficiency_A": 50, "efficiency_B": 50, "heating_type": 1, "responsiveness": 1.0, "code": 602}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, 1980 or later (open fronted), " + "sitting proud of, and sealed to, fireplace opening", + "flue": "OF", "efficiency_A": 63, "efficiency_B": 64, "heating_type": 1, "responsiveness": 1.0, "code": 603}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, 1980 or later (open fronted), " + "sitting proud of, and sealed to, fireplace opening, with back boiler unit", + "flue": "OF*", "efficiency_A": 63, "efficiency_B": 64, "heating_type": 1, "responsiveness": 1.0, "code": 604}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open " + "fronted), sealed to fireplace opening", + "flue": "OF", "efficiency_A": 40, "efficiency_B": 41, "heating_type": 1, "responsiveness": 1.0, "code": 605}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open " + "fronted), sealed to fireplace opening, with back boiler unit", + "flue": "OF*", "efficiency_A": 40, "efficiency_B": 41, "heating_type": 1, "responsiveness": 1.0, "code": 606}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open " + "fronted), fan assisted, sealed to fireplace opening", + "flue": "OF", "efficiency_A": 45, "efficiency_B": 46, "heating_type": 1, "responsiveness": 1.0, "code": 607}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire or wall heater, balanced flue", + "flue": "RS", "efficiency_A": 58, "efficiency_B": 60, "heating_type": 1, "responsiveness": 1.0, "code": 609}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Gas fire, closed fronted, fan assisted", + "flue": "RS", "efficiency_A": 72, "efficiency_B": 73, "heating_type": 1, "responsiveness": 1.0, "code": 610}, + {"category": "Room Heaters", "description": "Gas (including LPG and biogas) room heaters - Condensing gas fire", + "flue": "RS", "efficiency_A": 85, "efficiency_B": 85, "heating_type": 1, "responsiveness": 1.0, "code": 611}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Decorative Fuel Effect gas fire, open to chimney", + "flue": "C", "efficiency_A": 20, "efficiency_B": 20, "heating_type": 1, "responsiveness": 1.0, "code": 612}, + {"category": "Room Heaters", + "description": "Gas (including LPG and biogas) room heaters - Flueless gas fire, secondary heating only", + "flue": "none", "efficiency_A": 90, "efficiency_B": 92, "heating_type": 1, "responsiveness": 1.0, "code": 613}, + + # Liquid fuel room heaters + {"category": "Room Heaters", "description": "Liquid fuel room heaters - Room heater, pre 2000", "efficiency": 55, + "heating_type": 1, "responsiveness": 1.0, "code": 621}, + {"category": "Room Heaters", + "description": "Liquid fuel room heaters - Room heater, pre 2000, with boiler (no radiators)", "efficiency": 65, + "heating_type": 1, "responsiveness": 1.0, "code": 622}, + {"category": "Room Heaters", "description": "Liquid fuel room heaters - Room heater, 2000 or later", + "efficiency": 60, "heating_type": 1, "responsiveness": 1.0, "code": 623}, + {"category": "Room Heaters", + "description": "Liquid fuel room heaters - Room heater, 2000 or later with boiler (no radiators)", + "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 624}, + {"category": "Room Heaters", "description": "Liquid fuel room heaters - Bioethanol heater, secondary heating only", + "efficiency": 94, "heating_type": 1, "responsiveness": 1.0, "code": 625}, + + # Solid fuel room heaters + {"category": "Room Heaters", "description": "Solid fuel room heaters - Open fire in grate", "efficiency_A": 37, + "efficiency_B": 32, "heating_type": 3, "responsiveness": 0.5, "code": 631}, + {"category": "Room Heaters", "description": "Solid fuel room heaters - Open fire with back boiler (no radiators)", + "efficiency_A": 50, "efficiency_B": 50, "heating_type": 3, "responsiveness": 0.5, "code": 632}, + {"category": "Room Heaters", "description": "Solid fuel room heaters - Closed room heater", "efficiency_A": 65, + "efficiency_B": 60, "heating_type": 3, "responsiveness": 0.5, "code": 633}, + {"category": "Room Heaters", + "description": "Solid fuel room heaters - Closed room heater with boiler (no radiators)", "efficiency_A": 67, + "efficiency_B": 65, "heating_type": 3, "responsiveness": 0.5, "code": 634}, + {"category": "Room Heaters", "description": "Solid fuel room heaters - Stove (pellet fired)", "efficiency_A": 70, + "efficiency_B": 65, "heating_type": 2, "responsiveness": 0.75, "code": 635}, + {"category": "Room Heaters", + "description": "Solid fuel room heaters - Stove (pellet fired) with boiler (no radiators)", "efficiency_A": 75, + "efficiency_B": 70, "heating_type": 2, "responsiveness": 0.75, "code": 636}, + + # Electric (direct acting) room heaters + {"category": "Room Heaters", + "description": "Electric (direct acting) room heaters - Panel, convector or radiant heaters", "efficiency": 100, + "heating_type": 1, "responsiveness": 1.0, "code": 691}, + {"category": "Room Heaters", + "description": "Electric (direct acting) room heaters - Water- or oil-filled radiators", "efficiency": 100, + "heating_type": 1, "responsiveness": 1.0, "code": 694}, + {"category": "Room Heaters", "description": "Electric (direct acting) room heaters - Fan heaters", + "efficiency": 100, "heating_type": 1, "responsiveness": 1.0, "code": 692}, + {"category": "Room Heaters", "description": "Electric (direct acting) room heaters - Portable electric heaters", + "efficiency": 100, "heating_type": 1, "responsiveness": 1.0, "code": 693} +] + +other_space_heating_systems = [ + { + "category": "Other Space Heating Systems", + "description": "Electric ceiling heating", + "efficiency": 100, + "heating_type": 2, + "responsiveness": 0.75, + "code": 701 + } +] + +hot_water_systems = [ + {"category": "Hot Water Systems", "description": "No hot water system present - electric immersion assumed", + "efficiency": 100, "code": 999}, + { + "category": "Hot Water Systems", + "description": "HWP from the primary heating system", + "code": 901, + "options": [ + {"sub_description": "Back boiler (hot water only), gas*", "efficiency": 65}, + {"sub_description": "Circulator built into a gas warm air system, pre 1998", "efficiency": 65}, + {"sub_description": "Circulator built into a gas warm air system, 1998 or later", "efficiency": 73}, + {"sub_description": "Heat exchanger in a gas warm air system, condensing unit", "efficiency": 74}, + ] + }, + {"category": "Hot Water Systems", + "description": "From second main system", "efficiency": None, + "code": 914}, + {"category": "Hot Water Systems", "description": "From secondary system", + "efficiency": None, "code": 902}, + {"category": "Hot Water Systems", "description": "Electric immersion", "efficiency": 100, "code": 903}, + {"category": "Hot Water Systems", + "description": "Single-point gas-fired water heater (instantaneous at point of use)", "efficiency": 70, + "code": 907}, + {"category": "Hot Water Systems", + "description": "Multi-point gas-fired water heater (instantaneous serving several taps)", "efficiency": 65, + "code": 908}, + {"category": "Hot Water Systems", "description": "Electric instantaneous at point of use", "efficiency": 100, + "code": 909}, + {"category": "Hot Water Systems", "description": "Gas boiler/circulator for water heating only*", "efficiency": 65, + "code": 911}, + {"category": "Hot Water Systems", "description": "Liquid fuel boiler/circulator for water heating only*", + "efficiency": 70, "code": 912}, + {"category": "Hot Water Systems", "description": "Solid fuel boiler/circulator for water heating only", + "efficiency": 55, "code": 913}, + # Range cookers with boiler for water heating only + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, single burner with permanent pilot", + "efficiency": 46, "code": 921}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, single burner with automatic ignition", + "efficiency": 50, + "code": 922}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, twin burner with permanent pilot pre 1998", + "efficiency": 60, + "code": 923}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, twin burner with automatic ignition pre " + "1998", + "efficiency": 65, "code": 924}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, twin burner with permanent pilot 1998 or " + "later", + "efficiency": 65, "code": 925}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Gas, twin burner with automatic ignition 1998 " + "or later", + "efficiency": 70, "code": 926}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Liquid fuel, single burner", "efficiency": 60, + "code": 927}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Liquid fuel, twin burner pre 1998", + "efficiency": 70, + "code": 928}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Liquid fuel, twin burner 1998 or later", + "efficiency": 75, + "code": 929}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Solid fuel, integral oven and boiler", + "efficiency": 45, + "code": 930}, + {"category": "Hot Water Systems", + "description": "Range cooker with boiler for water heating only: Solid fuel, independent oven and boiler", + "efficiency": 55, + "code": 931}, + # Electric heat pump for water heating only + {"category": "Hot Water Systems", "description": "Electric heat pump for water heating only*", "efficiency": 170, + "code": 941}, + # Hot-water only heat network + # Remove the SAP version + # {"category": "Hot Water Systems", + # "description": "Hot-water only heat network (SAP)", "efficiency": None, + # "code": 950}, + {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - boilers", "efficiency": 80, + "code": 950}, + {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - CHP", "efficiency": 75, + "code": 951}, + {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - heat pump", + "efficiency": 300, "code": 952} +] + +boilers_seasonal = [ + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular non-condensing with " + "automatic ignition", + "efficiency_winter": 74, "efficiency_summer": 64, "code": 101}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular condensing with " + "automatic ignition", + "efficiency_winter": 84, "efficiency_summer": 74, "code": 102}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Non-condensing combi with " + "automatic ignition", + "efficiency_winter": 74, "efficiency_summer": 65, "code": 103}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Condensing combi with " + "automatic ignition", + "efficiency_winter": 84, "efficiency_summer": 75, "code": 104}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular non-condensing with " + "permanent pilot light", + "efficiency_winter": 70, "efficiency_summer": 60, "code": 105}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular condensing with " + "permanent pilot light", + "efficiency_winter": 80, "efficiency_summer": 70, "code": 106}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Non-condensing combi with " + "permanent pilot light", + "efficiency_winter": 70, "efficiency_summer": 61, "code": 107}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Condensing combi with " + "permanent pilot light", + "efficiency_winter": 80, "efficiency_summer": 71, "code": 108}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Back boiler to radiators", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 109}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, " + "low thermal capacity", + "efficiency_winter": 73, "efficiency_summer": 63, "code": 110}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, " + "high or unknown thermal capacity", + "efficiency_winter": 69, "efficiency_summer": 59, "code": 111}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Combi", + "efficiency_winter": 71, "efficiency_summer": 62, "code": 112}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Condensing " + "combi", + "efficiency_winter": 84, "efficiency_summer": 75, "code": 113}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, " + "condensing", + "efficiency_winter": 84, "efficiency_summer": 74, "code": 114}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - " + "Regular, wall mounted", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 115}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - " + "Regular, floor mounted, pre 1979", + "efficiency_winter": 56, "efficiency_summer": 46, "code": 116}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - " + "Regular, floor mounted, 1979 to 1997", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 117}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - Combi", + "efficiency_winter": 66, "efficiency_summer": 57, "code": 118}, + {"category": "Boilers - seasonal", + "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - Back " + "boiler to radiators", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 119}, + {"category": "Boilers - seasonal", + "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With automatic ignition (" + "non-condensing)", + "efficiency_winter": 74, "efficiency_summer": 72, "code": 120}, + {"category": "Boilers - seasonal", + "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With automatic ignition (" + "condensing)", + "efficiency_winter": 83, "efficiency_summer": 81, "code": 121}, + {"category": "Boilers - seasonal", + "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With permanent pilot (" + "non-condensing)", + "efficiency_winter": 70, "efficiency_summer": 68, "code": 122}, + {"category": "Boilers - seasonal", + "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With permanent pilot (" + "condensing)", + "efficiency_winter": 79, "efficiency_summer": 77, "code": 123}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler pre-1985", + "efficiency_winter": 66, "efficiency_summer": 54, "code": 124}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler 1985 to 1997", + "efficiency_winter": 71, "efficiency_summer": 59, "code": 125}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler, 1998 or later", + "efficiency_winter": 80, "efficiency_summer": 68, "code": 126}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Condensing oil boiler", + "efficiency_winter": 84, "efficiency_summer": 72, "code": 127}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Combi oil boiler, pre-1998", + "efficiency_winter": 71, "efficiency_summer": 62, "code": 128}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Combi oil boiler, 1998 or later", + "efficiency_winter": 77, "efficiency_summer": 68, "code": 129}, + {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Condensing combi oil boiler", + "efficiency_winter": 82, "efficiency_summer": 73, "code": 130}, + {"category": "Boilers - seasonal", + "description": "Liquid fuel boilers - Oil room heater with boiler to radiators, pre 2000", "efficiency_winter": 66, + "efficiency_summer": 54, "code": 131}, + {"category": "Boilers - seasonal", + "description": "Liquid fuel boilers - Oil room heater with boiler to radiators, 2000 or later", + "efficiency_winter": 71, "efficiency_summer": 59, "code": 132}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Single burner with permanent pilot", + "efficiency_winter": 47, "efficiency_summer": 37, "code": 133}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Single burner with automatic ignition", + "efficiency_winter": 51, "efficiency_summer": 41, "code": 134}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with permanent pilot (" + "non-condensing) pre 1998", + "efficiency_winter": 61, "efficiency_summer": 51, "code": 135}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with automatic ignition (" + "non-condensing) pre 1998", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 136}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with permanent pilot (" + "non-condensing) 1998 or later", + "efficiency_winter": 66, "efficiency_summer": 56, "code": 137}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with automatic ignition (" + "non-condensing) 1998 or later", + "efficiency_winter": 71, "efficiency_summer": 61, "code": 138}, + {"category": "Boilers - seasonal", "description": "Range cooker boilers (liquid fuel) - Single burner", + "efficiency_winter": 61, "efficiency_summer": 49, "code": 139}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (liquid fuel) - Twin burner (non-condensing) pre 1998", + "efficiency_winter": 71, "efficiency_summer": 59, "code": 140}, + {"category": "Boilers - seasonal", + "description": "Range cooker boilers (liquid fuel) - Twin burner (non-condensing) 1998 or later", + "efficiency_winter": 76, "efficiency_summer": 64, "code": 141}, +] + +# Heating controls +no_heating_system_controls = [ + { + "category": "No heating system present", + "description": "None", + "control": 2, + "temperature_adjustment_c": "+0.3", + "code": 2699 + } +] + +boiler_system_controls = [ + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Not applicable", "control": None, "temperature_adjustment_c": None, + "code": 2100}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "No time or thermostatic control of room temperature", "control": 1, + "temperature_adjustment_c": "+0.6", "code": 2101}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer, no room thermostat", + "control": 1, "temperature_adjustment_c": "+0.6", "code": 2102}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Room thermostat only", + "control": 1, "temperature_adjustment_c": "0", "code": 2103}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer and room thermostat", + "control": 1, "temperature_adjustment_c": "0", "code": 2104}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Programmer and at least two room thermostats", "control": 2, "temperature_adjustment_c": "0", + "code": 2105}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Room thermostat and TRVs", + "control": 2, "temperature_adjustment_c": "0", "code": 2113}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Programmer, room thermostat and TRVs", "control": 2, "temperature_adjustment_c": "0", + "code": 2106}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "TRVs and bypass", "control": 2, + "temperature_adjustment_c": "0", "code": 2111}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer, TRVs and bypass", + "control": 2, "temperature_adjustment_c": "0", "code": 2107}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Programmer, TRVs and flow switch", "control": 2, "temperature_adjustment_c": "0", "code": 2108}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Programmer, TRVs and boiler energy manager", "control": 2, "temperature_adjustment_c": "0", + "code": 2109}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Time and temperature zone control by arrangement of plumbing and electrical services", + "control": 3, "temperature_adjustment_c": "0", "code": 2110}, + {"category": "Boiler Systems with Radiators or Underfloor Heating", + "description": "Time and temperature zone control by device in PCDB", "control": 3, + "temperature_adjustment_c": "0", "code": 2112}, +] + +heat_pump_controls = [ + # We have a previous 2100 code for not applicable + # {"category": "Heat Pumps with Radiators or Underfloor Heating", + # "description": "Not applicable (heat pump provides DHW only)", "control": None, "temperature_adjustment_c": None, + # "code": 2100}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", + "description": "No time or thermostatic control of room temperature", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2201}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer, no room thermostat", + "control": 1, "temperature_adjustment_c": "+0.3", "code": 2202}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Room thermostat only", "control": 1, + "temperature_adjustment_c": "0", "code": 2203}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer and room thermostat", + "control": 1, "temperature_adjustment_c": "0", "code": 2204}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", + "description": "Programmer and at least two room thermostats", "control": 2, "temperature_adjustment_c": "0", + "code": 2205}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Room thermostat and TRVs", + "control": 2, "temperature_adjustment_c": "0", "code": 2209}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", + "description": "Programmer, room thermostat and TRVs", "control": 2, "temperature_adjustment_c": "0", + "code": 2210}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer, TRVs and bypass", + "control": 2, "temperature_adjustment_c": "0", "code": 2206}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", + "description": "Time and temperature zone control by arrangement of plumbing and electrical services", + "control": 3, "temperature_adjustment_c": "0", "code": 2207}, + {"category": "Heat Pumps with Radiators or Underfloor Heating", + "description": "Time and temperature zone control by device in PCDB", "control": 3, + "temperature_adjustment_c": "0", "code": 2208}, +] + +heat_network_controls = [ + {"category": "Heat Networks", "description": "Flat rate charging*, no thermostatic control of room temperature", + "control": 1, "temperature_adjustment_c": "+0.3", "code": 2301}, + {"category": "Heat Networks", "description": "Flat rate charging*, programmer, no room thermostat", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2302}, + {"category": "Heat Networks", "description": "Flat rate charging*, room thermostat only", "control": 1, + "temperature_adjustment_c": "0", "code": 2303}, + {"category": "Heat Networks", "description": "Flat rate charging*, programmer and room thermostat", "control": 1, + "temperature_adjustment_c": "0", "code": 2304}, + {"category": "Heat Networks", "description": "Flat rate charging*, room thermostat and TRVs", "control": 2, + "temperature_adjustment_c": "0", "code": 2313}, + {"category": "Heat Networks", "description": "Flat rate charging*, TRVs", "control": 2, + "temperature_adjustment_c": "0", "code": 2307}, + {"category": "Heat Networks", "description": "Flat rate charging*, programmer and TRVs", "control": 2, + "temperature_adjustment_c": "0", "code": 2305}, + {"category": "Heat Networks", "description": "Flat rate charging*, programmer and at least two room thermostats", + "control": 2, "temperature_adjustment_c": "0", "code": 2311}, + {"category": "Heat Networks", "description": "Charging system linked to use of heating, room thermostat only", + "control": 2, "temperature_adjustment_c": "0", "code": 2308}, + {"category": "Heat Networks", + "description": "Charging system linked to use of heating, programmer and room thermostat", "control": 2, + "temperature_adjustment_c": "0", "code": 2309}, + {"category": "Heat Networks", "description": "Charging system linked to use of heating, room thermostat and TRVs", + "control": 3, "temperature_adjustment_c": "0", "code": 2314}, + {"category": "Heat Networks", "description": "Charging system linked to use of heating, TRVs", "control": 3, + "temperature_adjustment_c": "0", "code": 2310}, + {"category": "Heat Networks", "description": "Charging system linked to use of heating, programmer and TRVs", + "control": 3, "temperature_adjustment_c": "0", "code": 2306}, + {"category": "Heat Networks", + "description": "Charging system linked to use of heating, programmer and at least two room thermostats", + "control": 3, "temperature_adjustment_c": "0", "code": 2312}, +] + +electric_storage_systems_controls = [ + {"category": "Electric Storage Systems", "description": "Manual charge control", "control": 3, + "temperature_adjustment_c": "+0.7", "code": 2401}, + {"category": "Electric Storage Systems", "description": "Automatic charge control", "control": 3, + "temperature_adjustment_c": "+0.4", "code": 2402}, + {"category": "Electric Storage Systems", "description": "Celect-type controls", "control": 3, + "temperature_adjustment_c": "+0.4", "code": 2403}, + {"category": "Electric Storage Systems", "description": "Controls for high heat retention storage heaters §", + "control": 3, "temperature_adjustment_c": "0", "code": 2404}, +] + +warm_air_systems_controls = [ + {"category": "Warm Air Systems", "description": "No time or thermostatic control of room temperature", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2501}, + {"category": "Warm Air Systems", "description": "Programmer, no room thermostat", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2502}, + {"category": "Warm Air Systems", "description": "Room thermostat only", "control": 1, + "temperature_adjustment_c": "0", "code": 2503}, + {"category": "Warm Air Systems", "description": "Programmer and room thermostat", "control": 1, + "temperature_adjustment_c": "0", "code": 2504}, + {"category": "Warm Air Systems", "description": "Programmer and at least two room thermostats", "control": 2, + "temperature_adjustment_c": "0", "code": 2505}, + {"category": "Warm Air Systems", "description": "Time and temperature zone control", "control": 3, + "temperature_adjustment_c": "0", "code": 2506}, +] + +room_heater_systems_controls = [ + {"category": "Room Heater Systems", "description": "No thermostatic control of room temperature", "control": 2, + "temperature_adjustment_c": "+0.3", "code": 2601}, + {"category": "Room Heater Systems", "description": "Appliance thermostats", "control": 3, + "temperature_adjustment_c": "0", "code": 2602}, + {"category": "Room Heater Systems", "description": "Programmer and appliance thermostats", "control": 3, + "temperature_adjustment_c": "0", "code": 2603}, + {"category": "Room Heater Systems", "description": "Room thermostats only", "control": 3, + "temperature_adjustment_c": "0", "code": 2604}, + {"category": "Room Heater Systems", "description": "Programmer and room thermostats", "control": 3, + "temperature_adjustment_c": "0", "code": 2605}, +] + +other_systems_controls = [ + {"category": "Other Systems", "description": "No time or thermostatic control of room temperature", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2701}, + {"category": "Other Systems", "description": "Programmer, no room thermostat", "control": 1, + "temperature_adjustment_c": "+0.3", "code": 2702}, + {"category": "Other Systems", "description": "Room thermostat only", "control": 1, "temperature_adjustment_c": "0", + "code": 2703}, + {"category": "Other Systems", "description": "Programmer and room thermostat", "control": 1, + "temperature_adjustment_c": "0", "code": 2704}, + {"category": "Other Systems", "description": "Temperature zone control", "control": 2, + "temperature_adjustment_c": "0", "code": 2705}, + {"category": "Other Systems", "description": "Time and temperature zone control", "control": 3, + "temperature_adjustment_c": "0", "code": 2706}, +] + +heating_data = ( + no_heating_system + + boiler_systems_with_radiators_or_underfloor_heating + + heat_pumps_with_radiators_or_underfloor_heating + + electric_heat_pumps_warm_air_distribution + + gas_fired_heat_pumps_warm_air_distribution + + heat_networks + + electric_storage_systems + + off_peak_tariffs_electric_underfloor_heating + + standard_or_off_peak_tariff_electric_underfloor_heating + + gas_fired_warm_air_fan_assisted + + gas_fired_warm_air_balanced_or_open_flue + + liquid_fired_warm_air + + electric_warm_air_systems + + room_heaters + + other_space_heating_systems + + hot_water_systems + + boilers_seasonal + + no_heating_system_controls + + boiler_system_controls + + heat_pump_controls + + heat_network_controls + + electric_storage_systems_controls + + warm_air_systems_controls + + room_heater_systems_controls + + other_systems_controls +) + +heating_data = pd.DataFrame(heating_data) diff --git a/utils/s3.py b/utils/s3.py index 1b14ca97..b3553824 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -276,3 +276,86 @@ def list_files_in_s3_folder(bucket_name, folder_name): except Exception as e: logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}') return [] + + +def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): + """ + List all files and immediate subfolders in a given folder in an S3 bucket. + + E.g. if we have a folder structure in S3 like this: + - folder1/ + - file1.csv + - file2.csv + - subfolder1/ + - file3.csv + + Then calling list_files_and_subfolders_in_s3_folder(bucket_name='my-bucket', folder_name='folder1/') + would return ['folder1/file1.csv', 'folder1/file2.csv', 'folder1/subfolder1/']. + + Namely, the nested files are not included in the list, only the immediate files and subfolders. + + :param bucket_name: The name of the S3 bucket. + :param folder_name: The folder name within the S3 bucket. + :return: A list of file keys and subfolder prefixes in the specified S3 folder. + """ + + # For this function, folder_name should end with a forward slash + if not folder_name.endswith('/'): + folder_name += '/' + + try: + s3 = boto3.client('s3') + response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/') + + items = [] + + # Add files to the list + if 'Contents' in response: + items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name]) + + # Add immediate subfolders to the list + if 'CommonPrefixes' in response: + items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']]) + + return items + + except NoCredentialsError: + logger.error("Credentials not available.") + return [] + except PartialCredentialsError: + logger.error("Incomplete credentials provided.") + return [] + except Exception as e: + logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}') + return [] + + +def list_xmls_in_s3_folder(bucket_name, folder_name): + """ + List all XML files in a given folder in an S3 bucket. + + :param bucket_name: The name of the S3 bucket. + :param folder_name: The folder name within the S3 bucket. + :return: A list of XML file keys in the specified S3 folder. + """ + try: + s3 = boto3.client('s3') + response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) + + if 'Contents' not in response: + logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + return [] + + # Filter XML files + xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')] + return xml_files + + except NoCredentialsError: + logger.error("Credentials not available.") + return [] + except PartialCredentialsError: + logger.error("Incomplete credentials provided.") + return [] + except Exception as e: + logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + return [] From 791e22146e6354291ebf56b61aeee3423286a609 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 12:18:48 +0100 Subject: [PATCH 02/49] set up fundamental epc extraction --- etl/bill_savings/data_collection.py | 8 +- etl/bill_savings/data_combining.py | 2 +- etl/xml_survey_extraction/XmlParser.py | 200 ++++++++++++++++++++++++- 3 files changed, 198 insertions(+), 12 deletions(-) diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index d2283ac4..6095741f 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -133,8 +133,8 @@ def app(): energy_consumption_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 - if i < 250: - continue + # if i < 344: + # continue data = pd.read_csv(directory / "certificates.csv", low_memory=False) # Rename the columns to the same format as the api returns @@ -146,12 +146,12 @@ def app(): # Take just the newest EPC per uprn, based on lodgement-date data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn") - data = data.sample(sample_size) + data = data.sample(sample_size, replace=False) # We use the addreess data to find the related information collected_data = [] for _, property_data in data.iterrows(): - time.sleep(np.random.uniform(0.3, 2)) + time.sleep(np.random.uniform(0.2, 1.5)) uprn = int(property_data["uprn"]) address = property_data["address1"] diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py index 11366360..d3a8d679 100644 --- a/etl/bill_savings/data_combining.py +++ b/etl/bill_savings/data_combining.py @@ -94,7 +94,7 @@ def app(): # We also estimate the energy consumption reduction from this data, by band df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"] - consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index() + consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index() # Save the consumption averages back to s3 save_dataframe_to_s3_parquet( diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index de7e35f8..973ea5e8 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -55,6 +55,7 @@ class XmlParser: hot_water_cost_current = None lighting_cost_current = None energy_consumption_current = None + energy_consumption_potential = None heating_system = None heating_controls = None @@ -140,6 +141,30 @@ class XmlParser: "5": "Very Good" } + MECHANICAL_VENTILATION_MAP = { + "0": "natural" + } + + BUILT_FORM_MAP = { + "1": "Detached", + } + + GLAZED_AREA_MAP = { + "4": "Much More Than Typical" + } + + FUEL_TYPE_MAP = { + "26": "mains gas (not community)" + } + + TRANSACTION_TYPE_MAP = { + "13": "ECO assessment" + } + + TENURE_MAP = { + '1': "Owner-occupied" + } + def __init__(self, file, filekey, uprn=None): file.seek(0) # Ensure the file pointer is at the beginning xml_string = file.read().decode('utf-8') @@ -151,7 +176,7 @@ class XmlParser: # In order to identify the file type, we can look for the presence of the 'UPRN' tag # If the UPRN tag is present, we can assume that the file is an EPC # If the UPRN tag is not present, we can assume that the file is an EPR - self.get_uprn() + self.get_uprn(uprn) self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC") @@ -180,6 +205,7 @@ class XmlParser: self.get_assessor_details() self.get_heating_and_emissions_data() + self.get_detailed_heating_specs() # Building fabric @@ -191,11 +217,160 @@ class XmlParser: self.get_hot_water() self.get_lighting() self.get_doors() - self.get_photo_supply() # Property dimensions self.get_property_dimensions() + # Get all of the EPC data + self.extract_epc() + + def extract_epc(self): + # Property Summary + low_energy_fixed_light_count = None + construction_age_band = None + self.epc = { + "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), + # TODO: Needs to be done more carefully + # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'), + "construction-age-band": self.get_node_value('Construction-Age-Band'), + "mainheat-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating') + ], + "windows-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating') + ], + "lighting-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating') + ], + "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'), + # TODO: Needs to be done more careully since we have multiple windows + # "glazed-type": self.get_node_value('Glazing-Type'), + "mainheatcont-description": + self.get_property_summary_value('Main-Heating-Controls', 'Description'), + "sheating-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'), + ], + # TODO: Doesn't seem to be included in the xml + # "local-authority": self.get_node_value('Local-Authority'), + "local-authority-label": self.get_node_value('Local-Authority-Label'), + "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'), + # TODO: Doesn't seem to be included in the xml + # "energy-tariff": self.get_node_value('Energy-Tariff'), + "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')], + "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'), + "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'), + "number-heated-rooms": self.get_node_value('Heated-Room-Count'), + "floor-description": self.get_property_summary_value('Floor', 'Description'), + "energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'), + "built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')], + "number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'), + "windows-description": self.get_property_summary_value('Window', 'Description'), + "glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')], + "inspection-date": self.get_node_value('Inspection-Date'), + "mains-gas-flag": self.get_node_value('Mains-Gas'), + "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'), + # TODO: Not included in the xml for houses - need an example of flats + # "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'), + # TODO: Need an example of flats + # "flat-storey-count": self.get_node_value('Flat-Storey-Count'), + "roof-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating') + ], + "total-floor-area": self.get_node_value('Total-Floor-Area'), + "environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'), + "roof-description": self.get_property_summary_value('Roof', 'Description'), + "floor-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating') + ], + "number-habitable-rooms": self.get_node_value('Habitable-Room-Count'), + "hot-water-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating') + ], + "mainheatc-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating') + ], + "main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')], + "lighting-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating') + ], + "windows-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Window', 'Energy-Efficiency-Rating') + ], + "floor-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating') + ], + "sheating-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating') + ], + "lighting_description": self.get_property_summary_value('Lighting', 'Description'), + "roof-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating') + ], + "walls-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating') + ], + "photo-supply": self.get_photo_supply(), + "lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'), + "mainheat-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating') + ], + "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'), + "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'), + # TODO: NEdd an example of flats + # "flat-top-storey": self.get_node_value('Flat-Top-Storey'), + "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'), + "walls-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating') + ], + "transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')], + "extension-count": self.get_node_value('Extensions-Count'), + "mainheatc-env-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating') + ], + "lmk-key": "", # Doesn't exist for non-EPC xmls + "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'), + "tenure": self.TENURE_MAP[self.get_node_value('Tenure')], + # TODO: Need an example of flats + # "floor-level": self.get_node_value('Floor-Level'), + "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'), + "hot-water-energy-eff": self.RATINGS_MAP[ + self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating') + ], + "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'), + "walls-description": self.get_property_summary_value('Wall', 'Description'), + "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'), + } + + def get_node_value(self, tag_name): + nodes = self.xml.getElementsByTagName(tag_name) + if nodes and nodes[0].firstChild: + return nodes[0].firstChild.nodeValue + return None + + def get_node_value_from_floor_dimensions(self, tag_name): + nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension') + if nodes: + tag = nodes[0].getElementsByTagName(tag_name) + if tag and tag[0].firstChild: + return tag[0].firstChild.nodeValue + return None + + def get_property_summary_value(self, section, tag_name): + nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section) + if nodes: + tag = nodes[0].getElementsByTagName(tag_name) + if tag and tag[0].firstChild: + return tag[0].firstChild.nodeValue + return None + + def get_energy_assessment_value(self, tag_name): + nodes = self.xml.getElementsByTagName('Energy-Assessment')[0] + if nodes: + tag = nodes.getElementsByTagName(tag_name) + if tag and tag[0].firstChild: + return tag[0].firstChild.nodeValue + return None + def get_uprn(self, uprn): if uprn is not None: @@ -253,9 +428,14 @@ class XmlParser: self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue + + # Energy consumption self.energy_consumption_current = ( self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue ) + self.energy_consumption_potential = ( + self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue + ) def get_detailed_heating_specs(self): """ @@ -457,11 +637,17 @@ class XmlParser: ) def get_photo_supply(self): - self.photo_supply = float( - self.xml.getElementsByTagName('Photovoltaic-Supply')[0] - .getElementsByTagName('Percent-Roof-Area')[0] - .firstChild.nodeValue - ) + photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0] + # Check if the "None-Or-No-Details" tag is present + if photo_supply_tag.getElementsByTagName("None-Or-No-Details"): + return ( + photo_supply_tag. + getElementsByTagName("None-Or-No-Details")[0]. + getElementsByTagName("Percent-Roof-Area")[0]. + firstChild.nodeValue + ) + else: + raise NotImplementedError("Implement me") def get_assessor_details(self): From 9a343db93bcc66aa15142d1e839b33e90a672349 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 12:25:53 +0100 Subject: [PATCH 03/49] handled flats for the moment --- etl/xml_survey_extraction/XmlParser.py | 33 ++++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 973ea5e8..dccc0a9f 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -198,7 +198,6 @@ class XmlParser: # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit # unless we learn something else that determines that we need information from this file return - self.get_property_type() self.get_sap() self.get_property_address() self.get_dates() @@ -225,9 +224,20 @@ class XmlParser: self.extract_epc() def extract_epc(self): - # Property Summary - low_energy_fixed_light_count = None - construction_age_band = None + + property_type = self.get_property_type() + + if property_type == "Flat": + raise NotImplementedError( + "Need to handle: heat-loss-corridor, unheated-corridor-length, flat-storey-count, flat-top-storey, " + "floor-level" + ) + heat_loss_corridor = "NO DATA!" + unheated_corridor_length = "" + flat_storey_count = "" + flat_top_storey = "" + floor_level = "NO DATA!" + self.epc = { "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), # TODO: Needs to be done more carefully @@ -269,10 +279,9 @@ class XmlParser: "inspection-date": self.get_node_value('Inspection-Date'), "mains-gas-flag": self.get_node_value('Mains-Gas'), "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'), - # TODO: Not included in the xml for houses - need an example of flats - # "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'), - # TODO: Need an example of flats - # "flat-storey-count": self.get_node_value('Flat-Storey-Count'), + "heat-loss-corridor": heat_loss_corridor, + "unheated-corridor-length": unheated_corridor_length, + "flat-storey-count": flat_storey_count, "roof-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating') ], @@ -316,8 +325,7 @@ class XmlParser: ], "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'), "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'), - # TODO: NEdd an example of flats - # "flat-top-storey": self.get_node_value('Flat-Top-Storey'), + "flat-top-storey": flat_top_storey, "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'), "walls-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating') @@ -330,8 +338,7 @@ class XmlParser: "lmk-key": "", # Doesn't exist for non-EPC xmls "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'), "tenure": self.TENURE_MAP[self.get_node_value('Tenure')], - # TODO: Need an example of flats - # "floor-level": self.get_node_value('Floor-Level'), + "floor-level": floor_level, "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'), "hot-water-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating') @@ -397,7 +404,7 @@ class XmlParser: if not property_type: property_type = self.xml.getElementsByTagName('PropertyType1') - self.property_type = PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue] + return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue] def get_sap(self): sap_score = self.xml.getElementsByTagName('Energy-Rating-Current') From d3e7c60009fabf486999f26fa9b39a7f87479586 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 12:47:41 +0100 Subject: [PATCH 04/49] Added missing variables --- etl/xml_survey_extraction/XmlParser.py | 63 +++++++++++++------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index dccc0a9f..ef18c6db 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -198,9 +198,6 @@ class XmlParser: # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit # unless we learn something else that determines that we need information from this file return - self.get_sap() - self.get_property_address() - self.get_dates() self.get_assessor_details() self.get_heating_and_emissions_data() @@ -239,6 +236,10 @@ class XmlParser: floor_level = "NO DATA!" self.epc = { + "uprn": self.uprn, + "property-type": property_type, + **self.get_sap(), + **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), # TODO: Needs to be done more carefully # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'), @@ -260,9 +261,8 @@ class XmlParser: "sheating-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'), ], - # TODO: Doesn't seem to be included in the xml - # "local-authority": self.get_node_value('Local-Authority'), - "local-authority-label": self.get_node_value('Local-Authority-Label'), + "local-authority": "", # Not included in the xml + "local-authority-label": "", "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'), # TODO: Doesn't seem to be included in the xml # "energy-tariff": self.get_node_value('Energy-Tariff'), @@ -346,6 +346,13 @@ class XmlParser: "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'), "walls-description": self.get_property_summary_value('Wall', 'Description'), "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'), + "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'), + "heating-cost-current": self.get_node_value('Heating-Cost-Current'), + "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'), + "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'), + "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'), + "lodgement-date": self.get_node_value('Inspection-Date'), + } def get_node_value(self, tag_name): @@ -410,8 +417,11 @@ class XmlParser: sap_score = self.xml.getElementsByTagName('Energy-Rating-Current') sap_score = int(sap_score[0].firstChild.nodeValue) epc_rating = sap_to_epc(sap_score) - self.current_energy_efficiency = str(sap_score) - self.current_energy_rating = epc_rating + + return { + "current-energy-efficiency": str(sap_score), + "current-energy-rating": epc_rating + } def get_heating_and_emissions_data(self): """ @@ -431,19 +441,6 @@ class XmlParser: self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue - self.co2_emissions_current = self.xml.getElementsByTagName('CO2-Emissions-Current')[0].firstChild.nodeValue - self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue - self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue - self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue - - # Energy consumption - self.energy_consumption_current = ( - self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue - ) - self.energy_consumption_potential = ( - self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue - ) - def get_detailed_heating_specs(self): """ Given the heating data that is found in the tag, we extract the detailed about the heating @@ -668,19 +665,23 @@ class XmlParser: property_tag = self.xml.getElementsByTagName("Property")[0] - self.address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) - self.address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) - self.address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) - self.posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) - self.postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) - self.address = ", ".join( + address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) + address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) + address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) + posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) + postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) + address = ", ".join( [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None] ) - def get_dates(self): - self.survey_date = ( - self.xml.getElementsByTagName("Inspection-Date")[0].firstChild.nodeValue - ) + return { + "address1": address1, + "address2": address2, + "address3": address3, + "posttown": posttown, + "postcode": postcode, + "address": address + } def get_property_dimensions(self): """ From a32f479e10634983fe578247bb4f4bbb1e419c9a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 12:50:13 +0100 Subject: [PATCH 05/49] removed basic attributes that are in the epc --- etl/xml_survey_extraction/XmlParser.py | 160 ------------------------- 1 file changed, 160 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index ef18c6db..4ca1eb50 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -205,13 +205,6 @@ class XmlParser: self.get_detailed_heating_specs() # Building fabric - self.get_walls() - self.get_roof() - self.get_floor() - self.get_windows() - self.get_heating() - self.get_hot_water() - self.get_lighting() self.get_doors() # Property dimensions @@ -472,159 +465,6 @@ class XmlParser: self.heating_system = heating_system self.heating_controls = heating_controls - def get_walls(self): - - wall_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Wall')[0] - - self.walls_description = ( - wall_xml_data - .getElementsByTagName("Description")[0] - .firstChild.nodeValue - ) - - self.walls_energy_rating = ( - wall_xml_data - .getElementsByTagName("Energy-Efficiency-Rating")[0] - .firstChild.nodeValue - ) - - is_cavity = "cavity wall" in self.walls_description.lower() - is_empty = "no insulation" in self.walls_description.lower() - is_partial = "partial insulation" in self.walls_description.lower() - - if not is_cavity: - self.walls_classification = "NON CAVITY" - return - - if is_empty: - self.walls_classification = "EMPTY" - return - - if is_partial: - self.walls_classification = "PARTIAL" - return - - if is_cavity and not is_empty and not is_partial: - self.walls_classification = "FULL" - return - - raise NotImplementedError("Implement me") - - def get_roof(self): - - room_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Roof')[0] - - self.roof_description = ( - room_xml_data - .getElementsByTagName("Description")[0] - .firstChild.nodeValue - ) - - self.roof_energy_rating = ( - room_xml_data - .getElementsByTagName("Energy-Efficiency-Rating")[0] - .firstChild.nodeValue - ) - - loft_recommendation_tag = self.xml.getElementsByTagName("Impact-Of-Loft-Insulation") - description_contains_loft = "loft" in self.roof_description.lower() - - if not loft_recommendation_tag and not description_contains_loft: - self.is_loft = "No" - return - - self.is_loft = "Yes" - return - - def get_floor(self): - - floor_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Floor')[0] - - self.floor_description = ( - floor_xml_data - .getElementsByTagName("Description")[0] - .firstChild.nodeValue - ) - - self.floor_energy_rating = ( - floor_xml_data - .getElementsByTagName("Energy-Efficiency-Rating")[0] - .firstChild.nodeValue - ) - - def get_windows(self): - - windows_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Window')[0] - - self.windows_description = ( - windows_xml_data - .getElementsByTagName("Description")[0] - .firstChild.nodeValue - ) - - self.windows_energy_rating = ( - windows_xml_data - .getElementsByTagName("Energy-Efficiency-Rating")[0] - .firstChild.nodeValue - ) - - def get_heating(self): - """ - This function will retrieve the main heating and the main heating controls - :return: - """ - mainheating_xml_data = self.xml.getElementsByTagName('Main-Heating')[0] - - self.main_heating_description = ( - mainheating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue - ) - - self.main_heating_energy_rating = ( - mainheating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue - ) - - mainheating_controls_xml_data = self.xml.getElementsByTagName('Main-Heating-Controls')[0] - - self.main_heating_controls_description = ( - mainheating_controls_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue - ) - - self.main_heating_controls_energy_rating = ( - mainheating_controls_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue - ) - - second_heating_xml_data = self.xml.getElementsByTagName('Secondary-Heating')[0] - - self.second_heating_description = ( - second_heating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue - ) - - self.second_heating_energy_rating = ( - second_heating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue - ) - - def get_hot_water(self): - hot_water_xml_data = self.xml.getElementsByTagName('Hot-Water')[0] - - self.hot_water_description = ( - hot_water_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue - ) - - self.hot_water_energy_rating = ( - hot_water_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue - ) - - def get_lighting(self): - lighting_xml_data = self.xml.getElementsByTagName('Lighting')[0] - - self.lighting_description = ( - lighting_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue - ) - - self.lighting_energy_rating = ( - lighting_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue - ) - def get_doors(self): # Doors can be found in the SAP-Property-Details tag From 445b76d50a2277d21d92ef77d9ec657d5b0b7531 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 14:46:11 +0100 Subject: [PATCH 06/49] Added full extraction of floor dimensions --- etl/xml_survey_extraction/XmlParser.py | 69 ++++++++++++++++++++++++-- etl/xml_survey_extraction/app.py | 3 ++ 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 4ca1eb50..7f317f29 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -1,5 +1,6 @@ import re import usaddress +from datetime import datetime from xml.dom.minidom import parseString from backend.app.utils import sap_to_epc from etl.xml_survey_extraction.pcdb import heating_data @@ -119,6 +120,8 @@ class XmlParser: floor_height = None insulation_wall_area = None + floor_dimensions = None + rrn = None database_data = None @@ -230,7 +233,9 @@ class XmlParser: self.epc = { "uprn": self.uprn, + "uprn-source": "Address Matched", "property-type": property_type, + "building-reference-number": "", **self.get_sap(), **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), @@ -252,7 +257,7 @@ class XmlParser: "mainheatcont-description": self.get_property_summary_value('Main-Heating-Controls', 'Description'), "sheating-energy-eff": self.RATINGS_MAP[ - self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'), + self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating') ], "local-authority": "", # Not included in the xml "local-authority-label": "", @@ -304,7 +309,7 @@ class XmlParser: "sheating-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating') ], - "lighting_description": self.get_property_summary_value('Lighting', 'Description'), + "lighting-description": self.get_property_summary_value('Lighting', 'Description'), "roof-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating') ], @@ -329,10 +334,11 @@ class XmlParser: self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating') ], "lmk-key": "", # Doesn't exist for non-EPC xmls - "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'), + "wind-turbine-count": self.get_node_value('Wind-Turbines-Count'), "tenure": self.TENURE_MAP[self.get_node_value('Tenure')], "floor-level": floor_level, "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'), + "potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))), "hot-water-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating') ], @@ -341,10 +347,15 @@ class XmlParser: "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'), "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'), "heating-cost-current": self.get_node_value('Heating-Cost-Current'), + "heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'), "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'), + "hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'), "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'), "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'), "lodgement-date": self.get_node_value('Inspection-Date'), + "lodgement-datetime": + datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(), + "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'), } @@ -511,8 +522,15 @@ class XmlParser: posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) address = ", ".join( - [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None] + [x for x in [address1, address2, address3] if x is not None] ) + county = property_tag.getElementsByTagName("County") + if county: + county = county[0].firstChild.nodeValue + + # Seems to be unavailable in the xml + constituency = None + constituency_label = None return { "address1": address1, @@ -520,7 +538,10 @@ class XmlParser: "address3": address3, "posttown": posttown, "postcode": postcode, - "address": address + "address": address, + "county": county, + "constituency": constituency, + "constituency-label": constituency_label } def get_property_dimensions(self): @@ -572,3 +593,41 @@ class XmlParser: self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR self.perimeter = self.heat_loss_perimeter + self.party_wall_length + + def get_floor_dimensions(self): + + """ + Extracts physical measurements of the property such as the floor area, room height, etc. + across the main dwelling and any extensions. + :return: + """ + + def get_part_value(node, tag_name): + element = node.getElementsByTagName(tag_name) + if element and element[0].firstChild: + return element[0].firstChild.nodeValue + return None + + # Each part will correspond to the main + sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part") + + floor_dimensions = [] + for building_part in sap_building_parts: + building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue + sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension") + + data = [ + { + 'building_part_identifier': building_part_identifier, + 'floor': get_part_value(floor_dimension, 'Floor'), + 'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'), + 'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'), + 'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'), + 'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'), + 'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'), + 'room_height': get_part_value(floor_dimension, 'Room-Height') + } for floor_dimension in sap_floor_dimensions + ] + floor_dimensions.extend(data) + + self.floor_dimensions = floor_dimensions diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 9bcbb168..c70097d4 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -9,6 +9,7 @@ logger = setup_logger() SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS" PROJECT_CODE = "VDE001" BUCKET = "retrofit-energy-assessments-dev" +PORTFOLIO_ID = None def main(): @@ -48,3 +49,5 @@ def main(): xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn) xml_parser.run() logger.info(f"Extracted data from {xml}") + + # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio From eaa1c3bca4e97c88b1908a0ba329043ac9cfc0cd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 14:47:02 +0100 Subject: [PATCH 07/49] etracting floor dimensions --- etl/xml_survey_extraction/XmlParser.py | 52 -------------------------- 1 file changed, 52 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 7f317f29..0d9dc512 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -44,70 +44,18 @@ def get_house_number(address: str) -> str | None: class XmlParser: uprn = None - property_type = None - current_energy_efficiency = None - current_energy_rating = None # heating/emissions information space_heating_kwh = None water_heating_kwh = None - co2_emissions_current = None - heating_cost_current = None - hot_water_cost_current = None - lighting_cost_current = None - energy_consumption_current = None - energy_consumption_potential = None heating_system = None heating_controls = None # Assessor details surveyor_name = None - # Addresses - address1 = None - address2 = None - address3 = None - posttown = None - postcode = None - address = None - - # Dates - survey_date = None - - # Building Fabric - # Walls - walls_description = None - walls_classification = None - walls_energy_rating = None - # Roof - roof_description = None - roof_energy_rating = None - is_loft = None - # Floor - floor_description = None - floor_energy_rating = None - # Windows - windows_description = None - windows_energy_rating = None - # main heating - main_heating_description = None - main_heating_energy_rating = None - # Heating controls - main_heating_controls_description = None - main_heating_controls_energy_rating = None - # Hot water - hot_water_description = None - hot_water_energy_rating = None - # Lighting - lighting_description = None - lighting_energy_rating = None - # Second Heating - second_heating_description = None - second_heating_energy_rating = None - number_of_doors = None number_of_insulated_doors = None - photo_supply = None # Property dimensions number_of_floors = None From a3c2ff06a8a2cf4317fc8e89285756fa5a49d398 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 15:55:13 +0100 Subject: [PATCH 08/49] retrieved all epc fields --- etl/xml_survey_extraction/XmlParser.py | 74 ++++++++++++++++++++++++-- etl/xml_survey_extraction/app.py | 8 +++ 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 0d9dc512..1533d4c7 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -1,4 +1,5 @@ import re +import numpy as np import usaddress from datetime import datetime from xml.dom.minidom import parseString @@ -43,6 +44,7 @@ def get_house_number(address: str) -> str | None: class XmlParser: + epc = None uprn = None # heating/emissions information @@ -56,6 +58,7 @@ class XmlParser: number_of_doors = None number_of_insulated_doors = None + windows = None # Property dimensions number_of_floors = None @@ -153,7 +156,7 @@ class XmlParser: self.get_heating_and_emissions_data() - self.get_detailed_heating_specs() + # self.get_detailed_heating_specs() # Building fabric self.get_doors() @@ -161,11 +164,21 @@ class XmlParser: # Property dimensions self.get_property_dimensions() + self.get_floor_dimensions() + + self.get_windows() + # Get all of the EPC data self.extract_epc() def extract_epc(self): + if self.floor_dimensions is None: + raise ValueError("Run get_floor_dimensions() first") + + if self.windows is None: + raise ValueError("Run get_windows() first") + property_type = self.get_property_type() if property_type == "Flat": @@ -178,6 +191,15 @@ class XmlParser: flat_storey_count = "" flat_top_storey = "" floor_level = "NO DATA!" + energy_tariff = "NO DATA!" + + floor_height = np.mean([ + float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling' + ]) + + # Take the most prevelant glazing type + glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0'] + glazed_type = max(glazed_type, key=glazed_type.count) self.epc = { "uprn": self.uprn, @@ -286,7 +308,7 @@ class XmlParser: "tenure": self.TENURE_MAP[self.get_node_value('Tenure')], "floor-level": floor_level, "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'), - "potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))), + "potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))), "hot-water-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating') ], @@ -304,7 +326,9 @@ class XmlParser: "lodgement-datetime": datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(), "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'), - + "floor-height": floor_height, + "glazed-type": glazed_type, + "energy-tariff": energy_tariff, } def get_node_value(self, tag_name): @@ -405,7 +429,7 @@ class XmlParser: .getElementsByTagName("Main-Heating")[0] ) - heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue + heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue # Get the heating system heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"] @@ -579,3 +603,45 @@ class XmlParser: floor_dimensions.extend(data) self.floor_dimensions = floor_dimensions + + def get_windows(self): + """ + Extracts data about the windows in the property, including the number of windows and the window type. + :return: + """ + + sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window") + + # This is the data in each sap window: + # 2 + # 1.55 + # 1 + # 3 + # true + # 16+ + # 7 + + glazing_type_lookup = { + "3": "double glazing, unknown install date" + } + + orientation_lookup = { + "3": "East", + "5": "South", + "1": "North", + "7": "West", + } + + self.windows = [ + { + "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue, + "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue, + "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue, + "glazing_type": glazing_type_lookup[ + window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue + ], + "pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue, + "glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue, + "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue] + } for window in sap_windows + ] diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index c70097d4..c32bd787 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -1,3 +1,5 @@ +import pandas as pd + from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder from utils.logger import setup_logger from etl.xml_survey_extraction.XmlParser import XmlParser @@ -51,3 +53,9 @@ def main(): logger.info(f"Extracted data from {xml}") # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio + + # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which + # can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat + # https://www.ncm-pcdb.org.uk/sap/download + # However retrieving this data is not a priority, so we can leave this for now as parsing the database + # is a non-trivial task From bc84ed2c2a95dad1926e632bee8e9e6406f7e115 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:04:09 +0100 Subject: [PATCH 09/49] cleaning up epc data and adding additional: --- etl/xml_survey_extraction/XmlParser.py | 184 ++++++++++++++----------- etl/xml_survey_extraction/app.py | 2 +- 2 files changed, 101 insertions(+), 85 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 1533d4c7..53f7e859 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -45,6 +45,7 @@ def get_house_number(address: str) -> str | None: class XmlParser: epc = None + additional_data = None uprn = None # heating/emissions information @@ -66,20 +67,11 @@ class XmlParser: heat_loss_perimeter = None party_wall_length = None total_floor_area = None - ground_floor_area = None - is_there_party_wall = None floor_height = None insulation_wall_area = None floor_dimensions = None - rrn = None - - database_data = None - - # We assume that the insulation wall area is 85% of the total wall area, as a standard estimate - INSULATION_WALL_AREA_FACTOR = 0.85 - # The value of the URPN tells us about the file type that we're parsing UPRN_FILETYPE_MAP = { 0: "EPR", @@ -119,6 +111,10 @@ class XmlParser: '1': "Owner-occupied" } + TARIFF_MAP = { + "2": "Single" + } + def __init__(self, file, filekey, uprn=None): file.seek(0) # Ensure the file pointer is at the beginning xml_string = file.read().decode('utf-8') @@ -161,9 +157,6 @@ class XmlParser: # Building fabric self.get_doors() - # Property dimensions - self.get_property_dimensions() - self.get_floor_dimensions() self.get_windows() @@ -171,6 +164,9 @@ class XmlParser: # Get all of the EPC data self.extract_epc() + # Put together all of the additional data we capture + self.extract_additional_data() + def extract_epc(self): if self.floor_dimensions is None: @@ -191,16 +187,23 @@ class XmlParser: flat_storey_count = "" flat_top_storey = "" floor_level = "NO DATA!" - energy_tariff = "NO DATA!" floor_height = np.mean([ - float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling' + float(x['room_height']) for x in self.floor_dimensions if + x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof'] ]) # Take the most prevelant glazing type glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0'] glazed_type = max(glazed_type, key=glazed_type.count) + energy_tariff = ( + self.xml.getElementsByTagName("SAP-Energy-Source")[0] + .getElementsByTagName("Meter-Type")[0] + .firstChild.nodeValue + ) + energy_tariff = self.TARIFF_MAP[energy_tariff] + self.epc = { "uprn": self.uprn, "uprn-source": "Address Matched", @@ -209,8 +212,6 @@ class XmlParser: **self.get_sap(), **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), - # TODO: Needs to be done more carefully - # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'), "construction-age-band": self.get_node_value('Construction-Age-Band'), "mainheat-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating') @@ -222,8 +223,6 @@ class XmlParser: self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating') ], "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'), - # TODO: Needs to be done more careully since we have multiple windows - # "glazed-type": self.get_node_value('Glazing-Type'), "mainheatcont-description": self.get_property_summary_value('Main-Heating-Controls', 'Description'), "sheating-energy-eff": self.RATINGS_MAP[ @@ -232,8 +231,7 @@ class XmlParser: "local-authority": "", # Not included in the xml "local-authority-label": "", "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'), - # TODO: Doesn't seem to be included in the xml - # "energy-tariff": self.get_node_value('Energy-Tariff'), + "energy-tariff": energy_tariff, "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')], "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'), "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'), @@ -328,7 +326,47 @@ class XmlParser: "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'), "floor-height": floor_height, "glazed-type": glazed_type, - "energy-tariff": energy_tariff, + } + + def get_insulation_wall_area(self): + """ + Extracts the insulation wall area for the main dwelling + :return: + """ + + main_dwelling_floors = [ + f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] + ] + main_dwelling_windows = [ + w for w in self.windows if w["window_location"] == "0" + ] + + wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors]) + window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows]) + return wall_areas - window_areas + + def extract_additional_data(self): + + self.insulation_wall_area = self.get_insulation_wall_area() + + self.additional_data = { + "file_location": self.filekey, + "surveyor_name": self.surveyor_name, + "space_heating_kwh": self.space_heating_kwh, + "water_heating_kwh": self.water_heating_kwh, + # "heating_system": self.heating_system, + # "heating_controls": self.heating_controls, + "number_of_doors": self.number_of_doors, + "number_of_insulated_doors": self.number_of_insulated_doors, + "number_of_floors": self.number_of_floors, + "insulation_wall_area": self.insulation_wall_area, + "heat_loss_perimeter": self.heat_loss_perimeter, + "party_wall_length": self.party_wall_length, + "perimeter": self.perimeter, + "rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'), + "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'), + "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'), + "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'), } def get_node_value(self, tag_name): @@ -516,56 +554,6 @@ class XmlParser: "constituency-label": constituency_label } - def get_property_dimensions(self): - """ - This function will extract the relevant property dimensions including the floor area, - number of floors, perimeter, party wall length and the insulation_wall_area. - - insulation_wall_area is typically simplified down to perimeter * height * 0.85 - :return: - """ - - # Each floor has its own SAP-Floor-Dimension tag - floor_dimensions = ( - self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0] - .getElementsByTagName("SAP-Floor-Dimension") - ) - - self.number_of_floors = len(floor_dimensions) - - self.heat_loss_perimeter = float( - floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue - ) - - self.party_wall_length = float( - floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue - ) - - party_wall_construction_tag = ( - self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip() - ) - - self.is_there_party_wall = ( - "Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No" - ) - - # We pull out all of the floor areas - floor_areas = [ - float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions - ] - - self.total_floor_area = sum(floor_areas) - self.ground_floor_area = floor_areas[0] - - self.floor_height = float( - floor_dimensions[0] - .getElementsByTagName("Room-Height")[0] - .firstChild.nodeValue - ) - - self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR - self.perimeter = self.heat_loss_perimeter + self.party_wall_length - def get_floor_dimensions(self): """ @@ -594,16 +582,53 @@ class XmlParser: 'floor': get_part_value(floor_dimension, 'Floor'), 'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'), 'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'), - 'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'), - 'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'), - 'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'), - 'room_height': get_part_value(floor_dimension, 'Room-Height') + 'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'), + 'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'), + 'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'), + 'room_height': get_part_value(floor_dimension, 'Room-Height'), + "room_roof": False } for floor_dimension in sap_floor_dimensions ] + + room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof") + room_roof_data = [ + { + "building_part_identifier": building_part_identifier, + "floor": str(max([int(d["floor"]) for d in data]) + 1), + "floor_construction": "", + "floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue, + "heat_loss_perimeter": "", + "party_wall_length": "", + "total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue, + "room_height": "", + "room_roof": True + } for rr in room_roofs + ] + floor_dimensions.extend(data) + floor_dimensions.extend(room_roof_data) self.floor_dimensions = floor_dimensions + self.number_of_floors = len( + [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"] + ) + self.heat_loss_perimeter = max( + [ + float(f["heat_loss_perimeter"]) for f in self.floor_dimensions + if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] + ] + ) + + self.party_wall_length = max( + [ + float(f["party_wall_length"]) for f in self.floor_dimensions + if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] + ] + ) + + self.perimeter = self.heat_loss_perimeter + self.party_wall_length + def get_windows(self): """ Extracts data about the windows in the property, including the number of windows and the window type. @@ -612,15 +637,6 @@ class XmlParser: sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window") - # This is the data in each sap window: - # 2 - # 1.55 - # 1 - # 3 - # true - # 16+ - # 7 - glazing_type_lookup = { "3": "double glazing, unknown install date" } diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index c32bd787..b3500e71 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -48,7 +48,7 @@ def main(): for xml in xmls: xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) xml_data_io = BytesIO(xml_data) - xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn) + xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn) xml_parser.run() logger.info(f"Extracted data from {xml}") From c9d3bb6eec1dde40a136b01ff7efefb1d51f811c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:09:37 +0100 Subject: [PATCH 10/49] completed extraction of data --- etl/xml_survey_extraction/XmlParser.py | 19 ++++++++++++++++++- etl/xml_survey_extraction/app.py | 7 ++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 53f7e859..d14dafc4 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -115,11 +115,12 @@ class XmlParser: "2": "Single" } - def __init__(self, file, filekey, uprn=None): + def __init__(self, file, filekey, surveyor_company, uprn=None): file.seek(0) # Ensure the file pointer is at the beginning xml_string = file.read().decode('utf-8') self.xml = parseString(xml_string) self.filekey = filekey + self.surveyor_company = surveyor_company # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different # information @@ -349,9 +350,21 @@ class XmlParser: self.insulation_wall_area = self.get_insulation_wall_area() + boolean_lookup = { + "true": True, + "false": False, + "Y": True, + "N": False + } + + cylinder_insulation_type = { + "1": "Foam", + } + self.additional_data = { "file_location": self.filekey, "surveyor_name": self.surveyor_name, + "surveyor_company": self.surveyor_company, "space_heating_kwh": self.space_heating_kwh, "water_heating_kwh": self.water_heating_kwh, # "heating_system": self.heating_system, @@ -367,6 +380,10 @@ class XmlParser: "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'), "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'), "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'), + "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')], + "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')], + "cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'), + "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')], } def get_node_value(self, tag_name): diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index b3500e71..92048a68 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -48,7 +48,12 @@ def main(): for xml in xmls: xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) xml_data_io = BytesIO(xml_data) - xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn) + xml_parser = XmlParser( + file=xml_data_io, + filekey=os.path.join(f"s3://{BUCKET}", xml), + uprn=uprn, + surveyor_company=SURVEYORS, + ) xml_parser.run() logger.info(f"Extracted data from {xml}") From 7b04e1edc72a2e255fbc359fbbec3c1c72a37206 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:13:50 +0100 Subject: [PATCH 11/49] preparing for data extraction --- etl/xml_survey_extraction/XmlParser.py | 27 +++++++++----------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index d14dafc4..76fa5612 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -44,8 +44,8 @@ def get_house_number(address: str) -> str | None: class XmlParser: - epc = None - additional_data = None + epc = {} + additional_data = {} uprn = None # heating/emissions information @@ -72,12 +72,6 @@ class XmlParser: floor_dimensions = None - # The value of the URPN tells us about the file type that we're parsing - UPRN_FILETYPE_MAP = { - 0: "EPR", - -1: "RDSAP_EPR" - } - RATINGS_MAP = { "0": "N/A", "1": "Very Poor", @@ -122,14 +116,11 @@ class XmlParser: self.filekey = filekey self.surveyor_company = surveyor_company - # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different - # information - # In order to identify the file type, we can look for the presence of the 'UPRN' tag - # If the UPRN tag is present, we can assume that the file is an EPC - # If the UPRN tag is not present, we can assume that the file is an EPR - self.get_uprn(uprn) + # We check if we have a lig xml or rdsap xml + # We look for the presence of the Schema-Version-Original tag + self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0 - self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC") + self.get_uprn(uprn) @staticmethod def get_node(node): @@ -145,10 +136,10 @@ class XmlParser: return node_first_child.nodeValue def run(self): - if self.file_type == "RDSAP_EPR": - # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit - # unless we learn something else that determines that we need information from this file + + if not self.is_lig: return + self.get_assessor_details() self.get_heating_and_emissions_data() From 6702eb65b06419b1459b1226915d73aded06110b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:35:00 +0100 Subject: [PATCH 12/49] energy assessment model --- backend/app/db/models/energy_assessments.py | 121 ++++++++++++++++++++ etl/xml_survey_extraction/XmlParser.py | 10 +- etl/xml_survey_extraction/app.py | 2 + 3 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 backend/app/db/models/energy_assessments.py diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py new file mode 100644 index 00000000..b6f7dd73 --- /dev/null +++ b/backend/app/db/models/energy_assessments.py @@ -0,0 +1,121 @@ +from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class EnergyAssessment(Base): + __tablename__ = 'energy_assessments' + id = Column(BigInteger, primary_key=True, autoincrement=True) + uprn = Column(BigInteger, nullable=False) + uprn_source = Column(Text, nullable=False) + property_type = Column(Text, nullable=False) + building_reference_number = Column(Text) + current_energy_efficiency = Column(Text, nullable=False) + current_energy_rating = Column(Text, nullable=False) + address1 = Column(Text, nullable=False) + address2 = Column(Text, nullable=False) + address3 = Column(Text) + posttown = Column(Text, nullable=False) + postcode = Column(Text, nullable=False) + address = Column(Text, nullable=False) + county = Column(Text) + constituency = Column(Text) + constituency_label = Column(Text) + low_energy_fixed_light_count = Column(Text, nullable=False) + construction_age_band = Column(Text, nullable=False) + mainheat_energy_eff = Column(Text, nullable=False) + windows_env_eff = Column(Text, nullable=False) + lighting_energy_eff = Column(Text, nullable=False) + environment_impact_potential = Column(Text, nullable=False) + mainheatcont_description = Column(Text, nullable=False) + sheating_energy_eff = Column(Text, nullable=False) + local_authority = Column(Text, nullable=False) + local_authority_label = Column(Text, nullable=False) + fixed_lighting_outlets_count = Column(Text, nullable=False) + energy_tariff = Column(Text, nullable=False) + mechanical_ventilation = Column(Text, nullable=False) + solar_water_heating_flag = Column(Text, nullable=False) + co2_emissions_potential = Column(Text, nullable=False) + number_heated_rooms = Column(Text, nullable=False) + floor_description = Column(Text, nullable=False) + energy_consumption_potential = Column(Text, nullable=False) + built_form = Column(Text, nullable=False) + number_open_fireplaces = Column(Text, nullable=False) + windows_description = Column(Text, nullable=False) + glazed_area = Column(Text, nullable=False) + inspection_date = Column(DateTime(timezone=True), nullable=False) + mains_gas_flag = Column(Text, nullable=False) + co2_emiss_curr_per_floor_area = Column(Text, nullable=False) + heat_loss_corridor = Column(Text, nullable=False) + unheated_corridor_length = Column(Text) + flat_storey_count = Column(Text) + roof_energy_eff = Column(Text, nullable=False) + total_floor_area = Column(Text, nullable=False) + environment_impact_current = Column(Text, nullable=False) + roof_description = Column(Text, nullable=False) + floor_energy_eff = Column(Text, nullable=False) + number_habitable_rooms = Column(Text, nullable=False) + hot_water_env_eff = Column(Text, nullable=False) + mainheatc_energy_eff = Column(Text, nullable=False) + main_fuel = Column(Text, nullable=False) + lighting_env_eff = Column(Text, nullable=False) + windows_energy_eff = Column(Text, nullable=False) + floor_env_eff = Column(Text, nullable=False) + sheating_env_eff = Column(Text, nullable=False) + lighting_description = Column(Text, nullable=False) + roof_env_eff = Column(Text, nullable=False) + walls_energy_eff = Column(Text, nullable=False) + photo_supply = Column(Text, nullable=False) + lighting_cost_potential = Column(Text, nullable=False) + mainheat_env_eff = Column(Text, nullable=False) + multi_glaze_proportion = Column(Text, nullable=False) + main_heating_controls = Column(Text, nullable=False) + flat_top_storey = Column(Text) + secondheat_description = Column(Text, nullable=False) + walls_env_eff = Column(Text, nullable=False) + transaction_type = Column(Text, nullable=False) + extension_count = Column(Text, nullable=False) + mainheatc_env_eff = Column(Text, nullable=False) + lmk_key = Column(Text) + wind_turbine_count = Column(Text, nullable=False) + tenure = Column(Text, nullable=False) + floor_level = Column(Text, nullable=False) + potential_energy_efficiency = Column(Text, nullable=False) + potential_energy_rating = Column(Text, nullable=False) + hot_water_energy_eff = Column(Text, nullable=False) + low_energy_lighting = Column(Text, nullable=False) + walls_description = Column(Text, nullable=False) + hotwater_description = Column(Text, nullable=False) + co2_emissions_current = Column(Text, nullable=False) + heating_cost_current = Column(Text, nullable=False) + heating_cost_potential = Column(Text, nullable=False) + hot_water_cost_current = Column(Text, nullable=False) + hot_water_cost_potential = Column(Text, nullable=False) + lighting_cost_current = Column(Text, nullable=False) + energy_consumption_current = Column(Text, nullable=False) + lodgement_date = Column(DateTime(timezone=True), nullable=False) + lodgement_datetime = Column(DateTime(timezone=True), nullable=False) + mainheat_description = Column(Text, nullable=False) + floor_height = Column(Float, nullable=False) + glazed_type = Column(Text, nullable=False) + file_location = Column(Text, nullable=False) + surveyor_name = Column(Text, nullable=False) + surveyor_company = Column(Text, nullable=False) + space_heating_kwh = Column(Text, nullable=False) + water_heating_kwh = Column(Text, nullable=False) + number_of_doors = Column(Integer, nullable=False) + number_of_insulated_doors = Column(Integer, nullable=False) + number_of_floors = Column(Integer, nullable=False) + insulation_wall_area = Column(Float, nullable=False) + heat_loss_perimeter = Column(Float, nullable=False) + party_wall_length = Column(Float, nullable=False) + perimeter = Column(Float, nullable=False) + rooms_with_bath_and_or_shower = Column(Integer) + rooms_with_mixer_shower_no_bath = Column(Integer) + room_with_bath_and_mixer_shower = Column(Integer) + percent_draftproofed = Column(Integer) + has_hot_water_cylinder = Column(Boolean) + cylinder_insulation_type = Column(Text) + cylinder_insulation_thickness = Column(Integer) + cylinder_thermostat = Column(Boolean) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 76fa5612..3f277bad 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -367,13 +367,13 @@ class XmlParser: "heat_loss_perimeter": self.heat_loss_perimeter, "party_wall_length": self.party_wall_length, "perimeter": self.perimeter, - "rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'), - "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'), - "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'), - "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'), + "rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')), + "rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')), + "room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')), + "percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')), "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')], "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')], - "cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'), + "cylinder_insulation_thickness": int(self.get_node_value('Cylinder-Insulation-Thickness')), "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')], } diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 92048a68..3e41b5fb 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -56,6 +56,8 @@ def main(): ) xml_parser.run() logger.info(f"Extracted data from {xml}") + extracted_epc = xml_parser.epc + extracted_additional_data = xml_parser.additional_data # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio From 4192ee7d690b1a74a5e1a3e361abe5c08a48bc43 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:43:14 +0100 Subject: [PATCH 13/49] putting together data upload to db --- etl/xml_survey_extraction/app.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 3e41b5fb..c6e16e3b 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -1,5 +1,5 @@ -import pandas as pd - +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder from utils.logger import setup_logger from etl.xml_survey_extraction.XmlParser import XmlParser @@ -43,6 +43,7 @@ def main(): logger.info(f"Exatracted XMLS for the energy assessments") # For each property, we download the xmls and extract the data + database_data = [] for uprn, xmls in assessments_map.items(): extracted_data = {} for xml in xmls: @@ -59,6 +60,16 @@ def main(): extracted_epc = xml_parser.epc extracted_additional_data = xml_parser.additional_data + data_to_update = { + **extracted_epc, **extracted_additional_data + } + extracted_data.update(data_to_update) + + database_data.append(extracted_data) + + logger.info("Uploading data to the database") + session = sessionmaker(bind=db_engine)() + # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which From 78066563cea4b328952707fe3d60aba367ef88db Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:47:43 +0100 Subject: [PATCH 14/49] Added missing orientations --- etl/xml_survey_extraction/XmlParser.py | 8 ++++++-- etl/xml_survey_extraction/app.py | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 3f277bad..c65173dd 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -650,10 +650,14 @@ class XmlParser: } orientation_lookup = { - "3": "East", - "5": "South", "1": "North", + "2": "North East", + "3": "East", + "4": "South East", + "5": "South", + "6": "South West", "7": "West", + "8": "North West" } self.windows = [ diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index c6e16e3b..6fe02e2d 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -42,6 +42,9 @@ def main(): logger.info(f"Exatracted XMLS for the energy assessments") + # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to + # the database at onece + # For each property, we download the xmls and extract the data database_data = [] for uprn, xmls in assessments_map.items(): From 81a77b26af003bc3fb94b619470f661125ce3329 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 18:49:29 +0100 Subject: [PATCH 15/49] Updated tariff map --- etl/xml_survey_extraction/XmlParser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index c65173dd..478891bf 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -106,6 +106,7 @@ class XmlParser: } TARIFF_MAP = { + "1": "Dual", "2": "Single" } From b60112d75b2362a0bed394bba215d486c8fe9a9c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 19:03:57 +0100 Subject: [PATCH 16/49] setting up push to db --- .../functions/energy_assessment_functions.py | 27 +++++++++++++ etl/xml_survey_extraction/app.py | 38 +++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 backend/app/db/functions/energy_assessment_functions.py diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py new file mode 100644 index 00000000..8befe903 --- /dev/null +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -0,0 +1,27 @@ +from backend.app.db.models.energy_assessments import EnergyAssessment +from sqlalchemy.orm import Session +from sqlalchemy.exc import IntegrityError + + +def bulk_insert_energy_assessments(session: Session, data_list): + """ + This function inserts multiple energy assessment records into the database. + + :param session: The database session + :param data_list: A list of dictionaries containing energy assessment data. + """ + + try: + # Map dictionaries to EnergyAssessment instances + assessments = [EnergyAssessment(**data) for data in data_list] + + # Add all instances to the session + session.add_all(assessments) + # Commit the transaction + session.commit() + print("All records inserted successfully.") + + except IntegrityError as e: + # Rollback the session in case of error + session.rollback() + print(f"Error occurred: {e}") diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 6fe02e2d..eea030e5 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -1,9 +1,11 @@ +from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder from utils.logger import setup_logger from etl.xml_survey_extraction.XmlParser import XmlParser import os +import pandas as pd from io import BytesIO logger = setup_logger() @@ -11,7 +13,8 @@ logger = setup_logger() SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS" PROJECT_CODE = "VDE001" BUCKET = "retrofit-energy-assessments-dev" -PORTFOLIO_ID = None +PORTFOLIO_ID = 86 +USER_ID = 8 def main(): @@ -59,7 +62,8 @@ def main(): surveyor_company=SURVEYORS, ) xml_parser.run() - logger.info(f"Extracted data from {xml}") + if xml_parser.is_lig: + logger.info(f"Extracted data from {xml}") extracted_epc = xml_parser.epc extracted_additional_data = xml_parser.additional_data @@ -72,8 +76,36 @@ def main(): logger.info("Uploading data to the database") session = sessionmaker(bind=db_engine)() + bulk_insert_energy_assessments(session, database_data) + session.close() - # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio + # Create the asset list + asset_list = [ + {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data + ] + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": "", + "budget": None, + } + print(body) # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which # can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat From c90c6d860b668f4d1960e4380ec170be1b95ddb1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 25 Jul 2024 23:56:36 +0100 Subject: [PATCH 17/49] starting looking at sfr --- .../functions/energy_assessment_functions.py | 30 ++++++-- etl/bill_savings/data_collection.py | 4 +- etl/customers/goldman/property_ownership.py | 75 +++++++++++++++---- etl/xml_survey_extraction/XmlParser.py | 2 + etl/xml_survey_extraction/app.py | 7 +- 5 files changed, 94 insertions(+), 24 deletions(-) diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index 8befe903..0970b71f 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -5,21 +5,35 @@ from sqlalchemy.exc import IntegrityError def bulk_insert_energy_assessments(session: Session, data_list): """ - This function inserts multiple energy assessment records into the database. + This function inserts or updates multiple energy assessment records into the database. - :param session: The database session + :param session: The SQLAlchemy session. :param data_list: A list of dictionaries containing energy assessment data. """ - try: - # Map dictionaries to EnergyAssessment instances - assessments = [EnergyAssessment(**data) for data in data_list] + for data in data_list: + uprn = data.get('uprn') + lodgement_date = data.get('lodgement_date') + + # Check if a record with the same uprn and lodgement_date exists + existing_record = session.query(EnergyAssessment).filter_by( + uprn=uprn, + lodgement_date=lodgement_date + ).first() + + if existing_record: + # Update the existing record with new data + for key, value in data.items(): + setattr(existing_record, key, value) + session.add(existing_record) + else: + # Insert a new record + new_assessment = EnergyAssessment(**data) + session.add(new_assessment) - # Add all instances to the session - session.add_all(assessments) # Commit the transaction session.commit() - print("All records inserted successfully.") + print("All records inserted or updated successfully.") except IntegrityError as e: # Rollback the session in case of error diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index 6095741f..e6f6de6f 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -133,8 +133,8 @@ def app(): energy_consumption_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 - # if i < 344: - # continue + if i < 57: + continue data = pd.read_csv(directory / "certificates.csv", low_memory=False) # Rename the columns to the same format as the api returns diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 500963a1..1b1cf014 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -73,7 +73,7 @@ def find_f_g_properties(paths): epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str) # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this - epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed') + epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed', errors="coerce") epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") @@ -84,7 +84,7 @@ def find_f_g_properties(paths): data = pd.concat(data) # Save as an excel - data.to_excel("EPC F & G Properties.xlsx", index=False) + data.to_excel("EPC F & G Properties - V2.xlsx", index=False) def remove_text_in_brackets(address: str) -> str: @@ -196,7 +196,7 @@ def remove_duplicate_matches(matching_lookup, properties, company_ownership): matches_to_drop[["UPRN", "Title Number"]].copy() ) - to_drop = pd.concat(to_drop) + to_drop = pd.concat(to_drop) if to_drop else pd.DataFrame() if not to_drop.empty: merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True) @@ -245,6 +245,44 @@ def remove_duplicate_uprn_matches(matching_lookup, properties, company_ownership return matching_lookup +def filter_land_registry(properties): + column_names = [ + "transaction_id", + "price", + "date_of_transfer", + "postcode", + "property_type", + "old_new", + "duration", + "paon", + "saon", + "street", + "locality", + "town_city", + "district", + "county", + "ppd_category_type", + "record_status", + ] + land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/pp-complete.csv", header=None) + land_registry.columns = column_names + land_registry = land_registry[ + land_registry["postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique()) + ] + land_registry["date_of_transfer"] = pd.to_datetime( + land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce" + ) + # Take data from the last 5 years + land_registry = land_registry[ + (land_registry["date_of_transfer"] >= "2019-01-01") + ] + + # Filter this + land_registry.to_csv( + "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv", index=False + ) + + def app(): """ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs @@ -293,17 +331,22 @@ def app(): # paths = list(set(paths)) # find_f_g_properties(paths) - properties = pd.read_excel("EPC F & G Properties.xlsx") - company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + properties = pd.read_excel("EPC F & G Properties - V2.xlsx") + # filter_land_registry(properties) + company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv") company_ownership["is_overseas"] = False - overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv") + overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv") overseas_company_ownership["is_overseas"] = True company_ownership = pd.concat([company_ownership, overseas_company_ownership]) # FIlter on relevant postcodes company_ownership = company_ownership[ - company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())] + company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique()) + ] + + # Read in land registry + land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv") # Now we filter properties the other way around properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())] @@ -316,6 +359,8 @@ def app(): # Take the newest UPRN properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") + # TODO: Do we want to filter properties based on lodgement dates? + # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the # the property itself starting_terms = [ @@ -414,8 +459,8 @@ def app(): freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup) leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup) - shared_leasehold_match = pd.concat(shared_leasehold_match) - shared_freehold_match = pd.concat(shared_freehold_match) + # shared_leasehold_match = pd.concat(shared_leasehold_match) + # shared_freehold_match = pd.concat(shared_freehold_match) # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx") # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx") @@ -429,7 +474,9 @@ def app(): # Combine combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup]) # Remove duplicates - combined_matching_lookup = remove_duplicate_matches(combined_matching_lookup, properties, company_ownership) + combined_matching_lookup = remove_duplicate_matches( + matching_lookup=combined_matching_lookup, properties=properties, company_ownership=company_ownership + ) # We also have duplicates at a UPRN level combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership) @@ -457,11 +504,13 @@ def app(): # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup.xlsx") # shared_leasehold_match = pd.read_excel("shared_leasehold_match.xlsx") - freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties) - leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties) + # freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties) + # leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties) combined_aggregate = aggregate_matches( - combined_matching_lookup, company_ownership, properties + matching_lookup=combined_matching_lookup, + company_ownership=company_ownership, + properties=properties ) investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 478891bf..90a51ae6 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -546,6 +546,8 @@ class XmlParser: county = property_tag.getElementsByTagName("County") if county: county = county[0].firstChild.nodeValue + else: + county = "" # Seems to be unavailable in the xml constituency = None diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index eea030e5..0cb95332 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -1,7 +1,7 @@ from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder +from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3 from utils.logger import setup_logger from etl.xml_survey_extraction.XmlParser import XmlParser import os @@ -70,6 +70,11 @@ def main(): data_to_update = { **extracted_epc, **extracted_additional_data } + + # We need to update the keys to match the database schema - i.e. we should replace all hyphens with + # underscores + data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()} + extracted_data.update(data_to_update) database_data.append(extracted_data) From b42d2c7750af60a2f869da9134f18fc6302bdf57 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 14:03:24 +0100 Subject: [PATCH 18/49] Added serialization method --- .../functions/energy_assessment_functions.py | 26 ++++++++++++++++--- backend/app/db/models/energy_assessments.py | 6 +++++ backend/app/plan/router.py | 6 +++++ etl/xml_survey_extraction/app.py | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index 0970b71f..2d6a611e 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -1,6 +1,8 @@ from backend.app.db.models.energy_assessments import EnergyAssessment from sqlalchemy.orm import Session from sqlalchemy.exc import IntegrityError +from typing import Optional +from sqlalchemy import desc def bulk_insert_energy_assessments(session: Session, data_list): @@ -13,12 +15,12 @@ def bulk_insert_energy_assessments(session: Session, data_list): try: for data in data_list: uprn = data.get('uprn') - lodgement_date = data.get('lodgement_date') + inspection_date = data.get('inspection_date') - # Check if a record with the same uprn and lodgement_date exists + # Check if a record with the same uprn and inspection_date exists existing_record = session.query(EnergyAssessment).filter_by( uprn=uprn, - lodgement_date=lodgement_date + inspection_date=inspection_date ).first() if existing_record: @@ -39,3 +41,21 @@ def bulk_insert_energy_assessments(session: Session, data_list): # Rollback the session in case of error session.rollback() print(f"Error occurred: {e}") + + +def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]: + """ + Retrieve the latest energy assessment for a given UPRN based on the inspection date. + + :param session: The database session + :param uprn: The unique property reference number + :return: The latest EnergyAssessment object or None if not found + """ + try: + # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order + latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by( + desc(EnergyAssessment.inspection_date)).first() + return latest_assessment.to_dict() if latest_assessment else {} + except Exception as e: + print(f"An error occurred: {e}") + return None diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index b6f7dd73..86230c00 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -119,3 +119,9 @@ class EnergyAssessment(Base): cylinder_insulation_type = Column(Text) cylinder_insulation_thickness = Column(Integer) cylinder_thermostat = Column(Boolean) + + def to_dict(self): + """ + Convert the SQLAlchemy object to a dictionary. + """ + return {column.name: getattr(self, column.name) for column in self.__table__.columns} diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 00e73b56..c73aff7e 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -21,6 +21,7 @@ from backend.app.db.functions.property_functions import ( from backend.app.db.functions.recommendations_functions import ( create_plan, create_plan_recommendations, upload_recommendations ) +from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest @@ -265,6 +266,7 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties = [] for config in tqdm(plan_input): + # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly uprn = config.get("uprn", None) if uprn: @@ -281,6 +283,10 @@ async def trigger_plan(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True) + + # We check for an energy assessment we have performed on this property: + energy_assessment = get_latest_assessment_by_uprn(session, uprn) + # Create a record in db property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 0cb95332..beb47454 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -107,7 +107,7 @@ def main(): "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": "", - "exclusions": "", + # "exclusions": [], "budget": None, } print(body) From bdd6171626e85689d430180520e84f507b6010e2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 15:07:23 +0100 Subject: [PATCH 19/49] Added mapping of age band --- .../functions/energy_assessment_functions.py | 2 +- backend/app/db/models/energy_assessments.py | 37 +++++++- backend/app/plan/router.py | 85 +++++++++++++------ etl/xml_survey_extraction/XmlParser.py | 23 ++++- 4 files changed, 117 insertions(+), 30 deletions(-) diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index 2d6a611e..45fb2b8b 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -55,7 +55,7 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by( desc(EnergyAssessment.inspection_date)).first() - return latest_assessment.to_dict() if latest_assessment else {} + return latest_assessment.to_dict() if latest_assessment else latest_assessment.empty_response() except Exception as e: print(f"An error occurred: {e}") return None diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index 86230c00..efcbc26c 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -120,8 +120,43 @@ class EnergyAssessment(Base): cylinder_insulation_thickness = Column(Integer) cylinder_thermostat = Column(Boolean) + EPC_KEYS = [ + 'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential', + 'unheated_corridor_length', 'hot_water_cost_potential', 'construction_age_band', 'potential_energy_rating', + 'mainheat_energy_eff', 'windows_env_eff', 'lighting_energy_eff', 'environment_impact_potential', 'glazed_type', + 'heating_cost_current', 'address3', 'mainheatcont_description', 'sheating_energy_eff', 'property_type', + 'local_authority_label', 'fixed_lighting_outlets_count', 'energy_tariff', 'mechanical_ventilation', + 'hot_water_cost_current', 'county', 'postcode', 'solar_water_heating_flag', 'constituency', + 'co2_emissions_potential', 'number_heated_rooms', 'floor_description', 'energy_consumption_potential', + 'local_authority', 'built_form', 'number_open_fireplaces', 'windows_description', 'glazed_area', + 'inspection_date', 'mains_gas_flag', 'co2_emiss_curr_per_floor_area', 'address1', 'heat_loss_corridor', + 'flat_storey_count', 'constituency_label', 'roof_energy_eff', 'total_floor_area', 'building_reference_number', + 'environment_impact_current', 'co2_emissions_current', 'roof_description', 'floor_energy_eff', + 'number_habitable_rooms', 'address2', 'hot_water_env_eff', 'posttown', 'mainheatc_energy_eff', 'main_fuel', + 'lighting_env_eff', 'windows_energy_eff', 'floor_env_eff', 'sheating_env_eff', 'lighting_description', + 'roof_env_eff', 'walls_energy_eff', 'photo_supply', 'lighting_cost_potential', 'mainheat_env_eff', + 'multi_glaze_proportion', 'main_heating_controls', 'lodgement_datetime', 'flat_top_storey', + 'current_energy_rating', 'secondheat_description', 'walls_env_eff', 'transaction_type', 'uprn', + 'current_energy_efficiency', 'energy_consumption_current', 'mainheat_description', 'lighting_cost_current', + 'lodgement_date', 'extension_count', 'mainheatc_env_eff', 'lmk_key', 'wind_turbine_count', 'tenure', + 'floor_level', 'potential_energy_efficiency', 'hot_water_energy_eff', 'low_energy_lighting', + 'walls_description', 'hotwater_description' + ] + def to_dict(self): """ Convert the SQLAlchemy object to a dictionary. """ - return {column.name: getattr(self, column.name) for column in self.__table__.columns} + + epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS} + # Get everything else + additional = { + column.name: getattr(self, column.name) + for column in self.__table__.columns if column.name not in self.EPC_KEYS + } + + return {"epc": epc, "additional": additional} + + @staticmethod + def empty_response(): + return {"epc": {}, "additional": {}} diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index c73aff7e..175561e4 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -220,6 +220,60 @@ def extract_portfolio_aggregation_data( return aggregation_data +def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): + """ + This function will set up with epc_records dictionary with the newest EPC, the full SAP EPC and the older EPCs + and will factor in an energy assessment that we have performed for a client. + :param epc_searcher: An instance of the SearchEpc class + :param energy_assessment: The energy assessment we have performed. If we have not performed an energy assessment, + this should be an empty response as defined by the models's + EnergyAssessment.empty_response() method + """ + + if not energy_assessment["epc"]: + return { + 'original_epc': epc_searcher.newest_epc.copy(), + 'full_sap_epc': epc_searcher.full_sap_epc.copy(), + 'old_data': epc_searcher.older_epcs.copy(), + } + + epc = energy_assessment["epc"] + energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d") + + # We check if the energy assessment is newer than the newest EPC + if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]): + # In this case, our energy assessment is newer than the EPCs available for this property + return { + "original_epc": epc, + "full_sap_epc": epc_searcher.full_sap_epc.copy(), + "old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()] + } + + # We check if the EPC we have produced is contained in the set of EPCs done for the property + # We do this based on inspection-date and SAP + epc_in_historicals = [ + x for x in epc_searcher.older_epcs + [epc_searcher.newest_epc] + if x["inspection-date"] == energy_assessment_date and + x["current-energy-efficiency"] == epc["current-energy-efficiency"] + ] + + if epc_in_historicals: + # Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest + return { + "original_epc": epc_searcher.newest_epc.copy(), + "full_sap_epc": epc_searcher.full_sap_epc.copy(), + "old_data": epc_searcher.older_epcs.copy() + } + + # In this case, our EPC is older than the newest publically avaible one, but is not contained in + # the historicals, so it can't have been lodged, so we include it in the old data + return { + 'original_epc': epc_searcher.newest_epc.copy(), + 'full_sap_epc': epc_searcher.full_sap_epc.copy(), + 'old_data': epc_searcher.older_epcs.copy() + [epc], + } + + router = APIRouter( prefix="/plan", tags=["plan"], @@ -285,7 +339,7 @@ async def trigger_plan(body: PlanTriggerRequest): epc_searcher.find_property(skip_os=True) # We check for an energy assessment we have performed on this property: - energy_assessment = get_latest_assessment_by_uprn(session, uprn) + energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) # Create a record in db property_id, is_new = create_property( @@ -302,32 +356,9 @@ async def trigger_plan(body: PlanTriggerRequest): heat_demand_target=None ) - epc_records = { - 'original_epc': epc_searcher.newest_epc.copy(), - 'full_sap_epc': epc_searcher.full_sap_epc.copy(), - 'old_data': epc_searcher.older_epcs.copy(), - } - - patch = next(( - x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - epc_records = patch_epc(patch, epc_records) - - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data - ) - - property_already_installed = next(( - x for x in already_installed if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) + # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. + # Otherwise, we use the newest EPC + epc_records = create_epc_records(epc_searcher, energy_assessment) input_properties.append( Property( diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 90a51ae6..522cb899 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -72,6 +72,25 @@ class XmlParser: floor_dimensions = None + # The age band lookup is based on the country code + AGE_BAND_LOOKUP = { + # England & Wales + "EAW": { + "A": "England and Wales: before 1900", + "B": "England and Wales: 1900-1929", + "C": "England and Wales: 1930-1949", + "D": "England and Wales: 1950-1966", + "E": "England and Wales: 1967-1975", + "F": "England and Wales: 1976-1982", + "G": "England and Wales: 1983-1990", + "H": "England and Wales: 1991-1995", + "I": "England and Wales: 1996-2002", + "J": "England and Wales: 2003-2006", + "K": "England and Wales: 2007-2011", + "L": "England and Wales: 2012 onwards", + } + } + RATINGS_MAP = { "0": "N/A", "1": "Very Poor", @@ -205,7 +224,9 @@ class XmlParser: **self.get_sap(), **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), - "construction-age-band": self.get_node_value('Construction-Age-Band'), + "construction-age-band": self.AGE_BAND_LOOKUP[ + self.get_node_value('Country-Code') + ][self.get_node_value('Construction-Age-Band')], "mainheat-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating') ], From 2c931b438367f63997760b56de3b64913727d530 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 15:39:47 +0100 Subject: [PATCH 20/49] Updating logic for extracting heat loss perimeter and party walls from xml data --- backend/Property.py | 21 ++++++++++++--- backend/app/db/models/energy_assessments.py | 6 ++--- backend/app/plan/router.py | 24 ++++++++++++++++- etl/xml_survey_extraction/XmlParser.py | 30 ++++++++++++--------- etl/xml_survey_extraction/app.py | 12 +++++++++ 5 files changed, 74 insertions(+), 19 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 4d5a93a7..4f508b9a 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -76,6 +76,7 @@ class Property: already_installed=None, non_invasive_recommendations=None, measures=None, + energy_assessment=None, **kwargs ): @@ -178,6 +179,11 @@ class Property: self.recommendations_scoring_data = [] self.simulation_epcs = {} + # This additional condition data should change how we pass kwargs to this. We should no longer need to pass + # kwargs to this class, but instead, we should pass the energy assessment condition data + self.energy_assessment_condition_data = energy_assessment["condition"] + + # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data self.parse_kwargs(kwargs) @classmethod @@ -188,6 +194,10 @@ class Property: :param kwargs: :return: """ + + # Note - none of this data is contained in an energy asssessment, but we should consider how this is done + # as we collect more data from the energy assessment + n_bathrooms = kwargs.get("n_bathrooms", None) if n_bathrooms not in [None, ""]: # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5 @@ -1034,9 +1044,14 @@ class Property: # TODO: These functions should work on an EPCRecord object, so that the format is more standardised. # They could also be added as attributes to the EPC Record - self.perimeter = estimate_perimeter( - self.floor_area / self.number_of_floors, - self.number_of_rooms / self.number_of_floors, + # Many of these pieces of information are now contained in the condition data + condition_data = self.energy_assessment_condition_data.copy() + + self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \ + if condition_data["perimeter"] is not None \ + else estimate_perimeter( + floor_area=self.floor_area / self.number_of_floors, + num_rooms=self.number_of_rooms / self.number_of_floors ) self.insulation_wall_area = estimate_external_wall_area( diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index efcbc26c..f89cccb7 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -150,13 +150,13 @@ class EnergyAssessment(Base): epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS} # Get everything else - additional = { + condition = { column.name: getattr(self, column.name) for column in self.__table__.columns if column.name not in self.EPC_KEYS } - return {"epc": epc, "additional": additional} + return {"epc": epc, "condition": condition} @staticmethod def empty_response(): - return {"epc": {}, "additional": {}} + return {"epc": {}, "condition": {}} diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 175561e4..2ed19880 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -360,6 +360,27 @@ async def trigger_plan(body: PlanTriggerRequest): # Otherwise, we use the newest EPC epc_records = create_epc_records(epc_searcher, energy_assessment) + patch = next(( + x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + epc_records = patch_epc(patch, epc_records) + + prepared_epc = EPCRecord( + epc_records=epc_records, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + property_already_installed = next(( + x for x in already_installed if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) + ), {}) + input_properties.append( Property( id=property_id, @@ -368,7 +389,8 @@ async def trigger_plan(body: PlanTriggerRequest): epc_record=prepared_epc, already_installed=property_already_installed, non_invasive_recommendations=property_non_invasive_recommendations, - **Property.extract_kwargs(config) + energy_assessment=energy_assessment, + **Property.extract_kwargs(config), # TODO: Depraecate this ) ) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 522cb899..3301b0be 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -645,19 +645,25 @@ class XmlParser: self.number_of_floors = len( [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"] ) - self.heat_loss_perimeter = max( - [ - float(f["heat_loss_perimeter"]) for f in self.floor_dimensions - if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] - ] - ) - self.party_wall_length = max( - [ - float(f["party_wall_length"]) for f in self.floor_dimensions - if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] - ] - ) + # We extract the maximum heat loss perimeter, per building part + max_heat_loss_perimeters = {d['building_part_identifier']: max( + (float(x['heat_loss_perimeter']) for x in self.floor_dimensions if + x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']), + default=float('-inf') + ) for d in self.floor_dimensions} + + self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values()) + + max_party_walls = { + d['building_part_identifier']: max( + (float(x['party_wall_length']) for x in self.floor_dimensions if + x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']), + default=float('-inf') + ) for d in self.floor_dimensions + } + + self.party_wall_length = sum(max_party_walls.values()) self.perimeter = self.heat_loss_perimeter + self.party_wall_length diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index beb47454..7f4e679c 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -48,6 +48,9 @@ def main(): # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to # the database at onece + # TODO: We now have detailed information about primary and secondary walls, so we should use this information + # in our recommendations when we have it + # For each property, we download the xmls and extract the data database_data = [] for uprn, xmls in assessments_map.items(): @@ -117,3 +120,12 @@ def main(): # https://www.ncm-pcdb.org.uk/sap/download # However retrieving this data is not a priority, so we can leave this for now as parsing the database # is a non-trivial task + + # TODO: The condition report contains additional data such as the number of bedrooms and the number of bathrooms + # We can extract this data and store it in the database as well. We can then update our kwargs methodology + # that is passed to the property class, where instead we store this additional data in our database (it could + # be stored in the energy assessment table, or in a separate table) and then when we're passed additional data + # we can query the database for this data and use it to update the property object, instead of storing it + # in the asset list and pulling it out of the asset list + # 1) Bathrooms + # 2) Bedrooms From beb09df342081ea358240efb3048935d39930874 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 15:42:36 +0100 Subject: [PATCH 21/49] Making a note on the recommendations --- etl/xml_survey_extraction/app.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 7f4e679c..edebbece 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -50,6 +50,11 @@ def main(): # TODO: We now have detailed information about primary and secondary walls, so we should use this information # in our recommendations when we have it + # For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where + # the physical dimensions and the fabric of each building is constructed in a way as if each building is + # separate. We should use this information to make recommendations that are specific to each building + # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc + # figures span across the entire property. # For each property, we download the xmls and extract the data database_data = [] From 73b6fb2b70727532edec9d1a37e5210a27e23d8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 15:54:07 +0100 Subject: [PATCH 22/49] notes on extension recommendations --- etl/xml_survey_extraction/XmlParser.py | 5 ++++- etl/xml_survey_extraction/app.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 3301b0be..8391314a 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -345,7 +345,10 @@ class XmlParser: def get_insulation_wall_area(self): """ Extracts the insulation wall area for the main dwelling - :return: + + Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we + don't currently calculate the insulation wall area for them, since it's not used in the recommendations. + """ main_dwelling_floors = [ diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index edebbece..9a813216 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -55,6 +55,9 @@ def main(): # separate. We should use this information to make recommendations that are specific to each building # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc # figures span across the entire property. + # Idea: We can collect all of this information by building part and store it separately in the database + # against the uprn. We can have key data for the EPC, but then also additional data for each building + # part. We can then use this data to make recommendations that are specific to each building part # For each property, we download the xmls and extract the data database_data = [] From 24508b2a84cbbcb33bf5f7feff5ba217d69fe3b1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 16:41:32 +0100 Subject: [PATCH 23/49] added condition data to router --- backend/Property.py | 56 +++++++++++++++------ backend/app/db/models/energy_assessments.py | 3 ++ backend/app/plan/router.py | 2 + etl/bill_savings/EnergyConsumptionModel.py | 1 + etl/xml_survey_extraction/XmlParser.py | 13 +++++ etl/xml_survey_extraction/app.py | 5 ++ recommendations/WindowsRecommendations.py | 4 ++ 7 files changed, 68 insertions(+), 16 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 4f508b9a..6365bb0b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -166,6 +166,7 @@ class Property: ) self.floor_level = None self.number_of_windows = None + self.windows_area = None self.solar_pv_percentage = None self.current_adjusted_energy = None @@ -707,17 +708,20 @@ class Property: # Today's costs todays_heating_cost = energy_consumption_client.convert_cost_to_today( original_cost=float(self.data["heating-cost-current"]), - lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None) ) todays_hot_water_cost = energy_consumption_client.convert_cost_to_today( original_cost=float(self.data["hot-water-cost-current"]), - lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None) ) todays_lighting_cost = energy_consumption_client.convert_cost_to_today( original_cost=float(self.data["lighting-cost-current"]), - lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None) ) + # If we have the kwh figures, we don't need to predict them + condition_data = self.energy_assessment_condition_data.copy() + scoring_df = pd.DataFrame([self.epc_record.prepared_epc]) # Change columns from underscores to hyphens scoring_df.columns = [ @@ -727,13 +731,20 @@ class Property: scoring_df[col] = None energy_consumption_client.data = None - heating_prediction = energy_consumption_client.score_new_data( - new_data=scoring_df, target="heating_kwh" - )[0] - hot_water_prediction = energy_consumption_client.score_new_data( - new_data=scoring_df, target="hot_water_kwh" - )[0] + heating_prediction = ( + float(condition_data["space_heating_kwh"]) if condition_data["space_heating_kwh"] + else energy_consumption_client.score_new_data( + new_data=scoring_df, target="heating_kwh" + )[0] + ) + + hot_water_prediction = ( + float(condition_data["water_heating_kwh"]) if condition_data["water_heating_kwh"] + else energy_consumption_client.score_new_data( + new_data=scoring_df, target="hot_water_kwh" + )[0] + ) # We convert the lighting cost into kwh, just using the price cap lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP @@ -1040,13 +1051,14 @@ class Property: medians across the EPC data :return: """ - - # TODO: These functions should work on an EPCRecord object, so that the format is more standardised. - # They could also be added as attributes to the EPC Record - # Many of these pieces of information are now contained in the condition data condition_data = self.energy_assessment_condition_data.copy() + # We can update the number of floors if we have this information in the condition data + self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \ + if condition_data["number_of_floors"] is not None \ + else self.number_of_floors + self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \ if condition_data["perimeter"] is not None \ else estimate_perimeter( @@ -1054,14 +1066,18 @@ class Property: num_rooms=self.number_of_rooms / self.number_of_floors ) - self.insulation_wall_area = estimate_external_wall_area( + self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \ + if condition_data["insulation_wall_area"] is not None \ + else estimate_external_wall_area( num_floors=self.number_of_floors, floor_height=self.floor_height, perimeter=self.perimeter, built_form=self.data["built-form"], ) - self.insulation_floor_area = self.floor_area / self.number_of_floors + self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \ + if condition_data["main_dwelling_ground_floor_area"] is not None \ + else self.floor_area / self.number_of_floors self.pitched_roof_area = esimtate_pitched_roof_area( floor_area=self.insulation_floor_area, floor_height=self.floor_height @@ -1163,7 +1179,11 @@ class Property: :return: """ - self.number_of_windows = estimate_windows( + condition_data = self.energy_assessment_condition_data.copy() + + self.number_of_windows = int(condition_data["number_of_windows"]) \ + if condition_data["number_of_windows"] is not None \ + else estimate_windows( property_type=self.data["property-type"], built_form=self.data["built-form"], construction_age_band=self.construction_age_band, @@ -1171,6 +1191,10 @@ class Property: number_habitable_rooms=self.number_of_rooms, ) + self.windows_area = float(condition_data["windows_area"]) \ + if condition_data["windows_area"] is not None \ + else None + def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds): """ Sets the approximate area of the solar panels diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index f89cccb7..2c3cc144 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -119,6 +119,9 @@ class EnergyAssessment(Base): cylinder_insulation_type = Column(Text) cylinder_insulation_thickness = Column(Integer) cylinder_thermostat = Column(Boolean) + main_dwelling_ground_floor_area = Column(Float) + number_of_windows = Column(Integer) + windows_area = Column(Float) EPC_KEYS = [ 'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential', diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 2ed19880..e76d4430 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -515,6 +515,8 @@ async def trigger_plan(body: PlanTriggerRequest): # ) print("Implement me") + # TODO: We can set the pitched roof area based on the results of the solar api! + logger.info("Getting components and epc recommendations") recommendations = {} recommendations_scoring_data = [] diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 9a7d6523..dfb0e574 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -102,6 +102,7 @@ class EnergyConsumptionModel: # We also retrieve the newest retail price comparison data which comes from Ofgem: # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators # We use the detail price comparison by company and tariff type data + print("Reading retail price comparison - make sure this is up-to-date") self.read_retail_price_comparison() def read_retail_price_comparison(self): diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 8391314a..0bc3d56b 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -366,6 +366,16 @@ class XmlParser: self.insulation_wall_area = self.get_insulation_wall_area() + # We pull this out which is used as the insulation floor area + main_dwelling_ground_floor_area = [ + f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0" + ][0]["total_floor_area"] + + main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"] + + number_of_windows = len(main_dwelling_windows) + windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows]) + boolean_lookup = { "true": True, "false": False, @@ -400,6 +410,9 @@ class XmlParser: "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')], "cylinder_insulation_thickness": int(self.get_node_value('Cylinder-Insulation-Thickness')), "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')], + "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area), + "number_of_windows": int(number_of_windows), + "windows_area": float(windows_area), } def get_node_value(self, tag_name): diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 9a813216..c4f6091f 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -58,6 +58,11 @@ def main(): # Idea: We can collect all of this information by building part and store it separately in the database # against the uprn. We can have key data for the EPC, but then also additional data for each building # part. We can then use this data to make recommendations that are specific to each building part + # We should probably re-think this data model, so we break up the data in a more considered fasion and produce + # the underlying EPC data as a summary of the building parts. Not only do we have data against the main + # dwelling and extensions, but we also have multiple windows with individiaul pieces of information that + # we can use to make recommendations. We should store this data in a way that we can easily access it and + # use it to make recommendations (e.g. we should have a Windows table) # For each property, we download the xmls and extract the data database_data = [] diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 29c75989..9a30cd2e 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -48,10 +48,14 @@ class WindowsRecommendations: is_secondary_glazing = self.property.restricted_measures or ( self.property.windows["glazing_type"] == "secondary" ) + windows_area = self.property.windows_area if not number_of_windows: raise ValueError("Number of windows not specified") + if windows_area is not None: + raise Exception("We have windows area, we should use this data for our recommendations!!!") + if self.property.windows["has_glazing"] & ( self.property.windows["glazing_coverage"] == "full" ): From 971a74017e97f0699138b8712aaa3c64cbf160b6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Jul 2024 17:50:29 +0100 Subject: [PATCH 24/49] working on unit level solar api integration - need to make adjusted to the energy consumtpion --- backend/app/plan/router.py | 46 ++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e76d4430..4796cd9f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -434,9 +434,11 @@ async def trigger_plan(body: PlanTriggerRequest): "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"], # Energy consumption is adjusted for the property's expected post retrofit state + # We set the target rating to EPC C, which is the typical EPC rating we would expect the + # property to achieve post retrofit of just the fabric "energy_consumption": energy_consumption_client.estimate_new_consumption( current_rating=p.data["current-energy-rating"], - target_rating=body.goal_value, + target_rating="C", current_consumption=p.current_adjusted_energy ), "property_id": p.id, @@ -507,12 +509,42 @@ async def trigger_plan(body: PlanTriggerRequest): p.set_solar_panel_configuration(unit_solar_panel_configuration) else: - # # Model the solar potential at the property level - # for p in input_properties: - # # TODO: Complete me! - we probably won't do this for individual flats - # solar_performance = solar_api_client.get( - # longitude=p.spatial["longitude"], latitude=p.spatial["latitude"] - # ) + # Model the solar potential at the property level + for p in input_properties: + # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT + # BUILDING IDS + + # if the property is already very close to an EPC C, we don't adjust the energy consumption based on + # expected movement to EPC C. + # To extend this, what we could do is adjust the based on the expected movement from the current SAP + # rating to the target SAP rating (ie 69C) + # TODO: Update this! + energy_consumption = energy_consumption_client.estimate_new_consumption( + current_rating=p.data["current-energy-rating"], + target_rating="C", + current_consumption=p.current_adjusted_energy + ) + + # TODO: Should energy_consumption to adjusted to just electricity requirement? + # We should align our calculation of required energy consumption with expectations around decarbonising + # heating and hot water, so worse case we should take just the electrical consumption of the property + # if the property is current using gas for heating and hot water, then we should adjust the kwh demand + # to reflect the 200-400% efficiency of an ASHP with electrified heating, so that the solar panel can + # cover heating generation. While + # If the main fuel is electricity (not community) then we don't need to change the kwh demand, if it's + # gas we should adjust on the suitability of an ashp! + + solar_performance = solar_api_client.get( + longitude=p.spatial["longitude"], + latitude=p.spatial["latitude"], + energy_consumption=energy_consumption, + is_building=False, + session=session, + uprn=p.uprn + ) + + # TODO: Insert the pitched roof area into the property class as we store the solar performance + # in the property class print("Implement me") # TODO: We can set the pitched roof area based on the results of the solar api! From a2a5094b01a93ef73f68e546549303ea320706c6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 27 Jul 2024 22:37:13 +0100 Subject: [PATCH 25/49] working on land registry matches --- etl/customers/goldman/property_ownership.py | 162 +++++++++++++++++++- 1 file changed, 156 insertions(+), 6 deletions(-) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 1b1cf014..7958e93b 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -345,9 +345,6 @@ def app(): company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique()) ] - # Read in land registry - land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv") - # Now we filter properties the other way around properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())] # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match @@ -485,14 +482,167 @@ def app(): # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership) matched_addresses = combined_matching_lookup.merge( - properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename( - columns={"ADDRESS": "epc_address"}), + properties[ + [ + "UPRN", + "ADDRESS", + "ADDRESS1", + "CURRENT_ENERGY_EFFICIENCY", + "CURRENT_ENERGY_RATING", + "POSTCODE" + ] + ].rename( + columns={ + "ADDRESS": "epc_address", + "ADDRESS1": "epc_address1", + "POSTCODE": "epc_postcode" + } + ), how="left", on="UPRN" ).merge( - company_ownership[["Title Number", "Property Address", "Company Registration No. (1)", "Proprietor Name (1)"]], + company_ownership[ + [ + "Title Number", + "Property Address", + "Postcode", + "Company Registration No. (1)", + "Proprietor Name (1)", + + ] + ], how="left", on="Title Number" ) + # Let's try and get the house number + matched_addresses["house_number"] = ( + matched_addresses["epc_address"] + .apply(remove_text_in_brackets) + .apply(SearchEpc.get_house_number) + .str.lower() + .str.replace(",", "") + ) + + # Read in land registry + land_registry = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv", + ) + + # We now perform a match between the land registry data and the matched address, in an attempt to find + # out when these properties last sold. The land registry data has been pre filtered on the postcodes in this + # data, and for sales within the last 5 years, to ensure the file isn't too large. + + land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip() + land_registry["street"] = land_registry["street"].str.lower().str.strip() + land_registry["paon"] = land_registry["paon"].str.lower().str.strip() + land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"]) + + def is_substring(x, match_string): + + if pd.isnull(x): + return False + + return x in match_string.lower() + + def house_number_match(paon, house_number): + # Firstly try and convert to numberic + try: + paon_numeric = int(paon) + house_number_numeric = int(house_number) + return paon_numeric == house_number_numeric + except Exception as e: # noqa + # If we can't convert both to numeric, we do an equality + + return paon == house_number + + def check_equalities(lr_filtered): + all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0]) + if pd.isnull(lr_filtered["saon"].values[0]): + all_saon_equal = all(pd.isnull(lr_filtered["saon"])) + else: + all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0]) + + all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0]) + + return all_paon_equal, all_saon_equal, all_street_equal + + land_registry_matches = [] + for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)): + + # Filter land registry on the postcode + lr_filtered = land_registry[ + (land_registry["postcode"] == match["epc_postcode"].lower().strip()) + ] + + # Filter further, when the street is in in the address + # street should be contained in epc_address + lr_filtered = lr_filtered[ + lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) + ] + + if lr_filtered.empty: + continue + + # We now check if paon is in address 1 + lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"])) + # We also try the secondary match + lr_filtered["saon_match"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"]) + ) + + # We fileter where we have a primary or secondary match + lr_filtered = lr_filtered[ + lr_filtered["paon_match"] | lr_filtered["saon_match"] + ] + + if lr_filtered.empty: + continue + elif lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + elif lr_filtered.shape[0] > 1: + # We make sure all records are the same and take the newest + all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered) + has_paon_match = any(lr_filtered["paon_match"]) + + if all_paon_equal and all_street_equal and all_saon_equal: + # Take the newest record, append and continue + lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) + lr_filtered = lr_filtered.head(1) + land_registry_matches.append( + { + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + elif has_paon_match and all_street_equal: + # Peform filter on paon + lr_filtered = lr_filtered[lr_filtered["paon_match"]] + # Do an addtiioanl equality check + all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered) + if all_paon_equal and all_street_equal and all_saon_equal: + lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) + lr_filtered = lr_filtered.head(1) + land_registry_matches.append( + { + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + else: + raise NotImplementedError("wtf") + else: + raise NotImplementedError("wtf") + else: + raise NotImplementedError("What happened here?") + # shared_freehold_match = pd.DataFrame(shared_freehold_match) # Strore these files # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx") From 2174a85a8bc79bd696e1b814c81b7d609d45b680 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Jul 2024 15:21:05 +0100 Subject: [PATCH 26/49] adding to land registry matching logic --- etl/customers/goldman/property_ownership.py | 111 +++++++++++++++++--- 1 file changed, 94 insertions(+), 17 deletions(-) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 7958e93b..f1f0de38 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -357,6 +357,8 @@ def app(): properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") # TODO: Do we want to filter properties based on lodgement dates? + # E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because + # this could be indicative of a sale happening, and the land registry data may not have caught up yet # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the # the property itself @@ -456,13 +458,9 @@ def app(): freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup) leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup) - # shared_leasehold_match = pd.concat(shared_leasehold_match) - # shared_freehold_match = pd.concat(shared_freehold_match) - # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx") - # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx") - # shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx") - # shared_freehold_match.to_excel("shared_freehold_match_new.xlsx") + # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx") + # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx") # The approximate matches aren't very good freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"] @@ -477,10 +475,6 @@ def app(): # We also have duplicates at a UPRN level combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership) - # There are some cases where we have duplicates - # freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership) - # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership) - matched_addresses = combined_matching_lookup.merge( properties[ [ @@ -534,6 +528,7 @@ def app(): land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip() land_registry["street"] = land_registry["street"].str.lower().str.strip() land_registry["paon"] = land_registry["paon"].str.lower().str.strip() + land_registry["saon"] = land_registry["saon"].str.lower().str.strip() land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"]) def is_substring(x, match_string): @@ -576,8 +571,9 @@ def app(): # Filter further, when the street is in in the address # street should be contained in epc_address lr_filtered = lr_filtered[ - lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) - ] + lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) | + lr_filtered["street"].apply(lambda x: is_substring(x, match["Property Address"].lower())) + ] if lr_filtered.empty: continue @@ -585,10 +581,11 @@ def app(): # We now check if paon is in address 1 lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"])) # We also try the secondary match - lr_filtered["saon_match"] = lr_filtered["saon"].apply( - lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"]) + lr_filtered["saon_match"] = ( + lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"]) + ) ) - # We fileter where we have a primary or secondary match lr_filtered = lr_filtered[ lr_filtered["paon_match"] | lr_filtered["saon_match"] @@ -599,6 +596,7 @@ def app(): elif lr_filtered.shape[0] == 1: land_registry_matches.append( { + "uprn": match["UPRN"], "transaction_id": lr_filtered['transaction_id'].values[0], "price": lr_filtered["price"].values[0], "date_of_transfer": lr_filtered["date_of_transfer"].values[0], @@ -616,11 +614,13 @@ def app(): lr_filtered = lr_filtered.head(1) land_registry_matches.append( { + "uprn": match["UPRN"], "transaction_id": lr_filtered['transaction_id'].values[0], "price": lr_filtered["price"].values[0], "date_of_transfer": lr_filtered["date_of_transfer"].values[0], } ) + continue elif has_paon_match and all_street_equal: # Peform filter on paon lr_filtered = lr_filtered[lr_filtered["paon_match"]] @@ -631,15 +631,92 @@ def app(): lr_filtered = lr_filtered.head(1) land_registry_matches.append( { + "uprn": match["UPRN"], "transaction_id": lr_filtered['transaction_id'].values[0], "price": lr_filtered["price"].values[0], "date_of_transfer": lr_filtered["date_of_transfer"].values[0], } ) else: - raise NotImplementedError("wtf") + # We do a match on saon + lr_filtered["saon_match2"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address"]) + ) + + lr_filtered = lr_filtered[lr_filtered["saon_match2"]] + + if lr_filtered.empty: + continue + elif lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + else: + raise NotImplementedError("wtf") else: - raise NotImplementedError("wtf") + # We have a final check, based on an observed case + lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]]) + + lr_filtered["paon_match2"] = lr_filtered["paon"].apply( + lambda x: False if pd.isnull(x) else is_substring(x, lr_address_1) + ) + + lr_filtered = lr_filtered[lr_filtered["paon_match2"]] + + if lr_filtered.empty: + continue + elif lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + else: + # Check all the same + all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered) + + # Check saon is house number with exact match + lr_filtered["saon_match2"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"]) + ) + + if all_paon_equal and all_saon_equal and all_street_equal: + # Take the newest record + lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) + lr_filtered = lr_filtered.head(1) + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + elif any(lr_filtered["saon_match2"]): + lr_filtered = lr_filtered[lr_filtered["saon_match2"]] + if lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + + raise NotImplementedError("wtf") else: raise NotImplementedError("What happened here?") From b63ab89e8973b4e97ec7a8d17b37b887f4f0b270 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Jul 2024 15:33:57 +0100 Subject: [PATCH 27/49] adding new logic to land registry match --- etl/customers/goldman/property_ownership.py | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index f1f0de38..71c53a74 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -689,6 +689,15 @@ def app(): lr_filtered["saon_match2"] = lr_filtered["saon"].apply( lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"]) ) + # We check if we have a flat + match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower()) + lr_filtered["saon_match3"] = False + if match_flat_number is not None: + # Get out the match + match_flat_number = "flat " + match_flat_number.group(1) + lr_filtered["saon_match3"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else x == match_flat_number + ) if all_paon_equal and all_saon_equal and all_street_equal: # Take the newest record @@ -705,6 +714,23 @@ def app(): continue elif any(lr_filtered["saon_match2"]): lr_filtered = lr_filtered[lr_filtered["saon_match2"]] + all_saon_equal, all_paon_equal, all_street_equal = check_equalities(lr_filtered) + if all_paon_equal and all_saon_equal and all_street_equal: + # Filter on the newest record + lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) + lr_filtered = lr_filtered.head(1) + if lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + elif any(lr_filtered["saon_match3"]): + lr_filtered = lr_filtered[lr_filtered["saon_match3"]] if lr_filtered.shape[0] == 1: land_registry_matches.append( { From e85936ae97f61b2fc64a41cbf1fd2435927832d9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Jul 2024 15:36:48 +0100 Subject: [PATCH 28/49] apartment string match --- etl/customers/goldman/property_ownership.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 71c53a74..5d39f139 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -691,6 +691,7 @@ def app(): ) # We check if we have a flat match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower()) + match_apartment_number = re.match("apartment (\d+)", match["epc_address1"].lower()) lr_filtered["saon_match3"] = False if match_flat_number is not None: # Get out the match @@ -699,6 +700,13 @@ def app(): lambda x: False if pd.isnull(x) else x == match_flat_number ) + if match_apartment_number is not None: + # Get out the match + match_apartment_number = "apartment " + match_apartment_number.group(1) + lr_filtered["saon_match3"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else x == match_apartment_number + ) + if all_paon_equal and all_saon_equal and all_street_equal: # Take the newest record lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) From d07e54ce8829408722084023281b9b50fc455da3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Jul 2024 16:01:49 +0100 Subject: [PATCH 29/49] merged --- etl/customers/goldman/property_ownership.py | 56 ++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 5d39f139..1c828566 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -754,6 +754,39 @@ def app(): else: raise NotImplementedError("What happened here?") + land_registry_matches = pd.DataFrame(land_registry_matches) + # land_registry_matches.to_excel("land_registry_matches.xlsx") + + # Check the matches against the addresses + # lr_to_addresses = matched_addresses[ + # ["UPRN", "epc_address", "epc_postcode", "Property Address", "Postcode"] + # ].merge( + # land_registry_matches, + # how="inner", + # left_on="UPRN", + # right_on="uprn" + # ).drop(columns=["uprn"]).merge( + # land_registry[["transaction_id", "paon", "saon", "street", "postcode"]], + # how="left", on="transaction_id" + # ) + + # Merge onto matched addresses + matched_addresses = matched_addresses.merge( + land_registry_matches, + how="left", + left_on="UPRN", + right_on="uprn" + ).drop(columns=["uprn"]) + + # Flat anything that sold in the last year + # TODO: Decide on what this logic should be! + matched_addresses["sold_recently"] = ( + matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1) + ) + + # Drop anything that sold recently + matched_addresses = matched_addresses[~matched_addresses["sold_recently"]] + # shared_freehold_match = pd.DataFrame(shared_freehold_match) # Strore these files # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx") @@ -785,11 +818,30 @@ def app(): matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"]) ] + # Merge on the owner + al_rayan = investment_50m_properties[ + investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")] + portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])] portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])] - investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False) - investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False) + # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False) + # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False) + + z = pd.read_excel("investment_50m_properties 28th May.xlsx") + new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])] + new_al_rayan = new[ + new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC") + ] + new_al_rayan = new_al_rayan.merge( + properties[["UPRN", "LODGEMENT_DATE"]], + how="left", + on="UPRN" + ).merge( + company_ownership[["Title Number", "Date Proprietor Added"]], + how="left", + on="Title Number", + ) # Store the EPC data portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False) From bd610c88813cbbb2e2d4534d7352302f4602f522 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 14:29:07 +0100 Subject: [PATCH 30/49] working on electrical consumption estimates --- backend/Property.py | 13 ++ backend/app/plan/router.py | 30 +++- etl/bill_savings/EnergyConsumptionModel.py | 21 ++- etl/bill_savings/data_combining.py | 2 +- etl/customers/goldman/property_ownership.py | 170 ++++++++++++-------- recommendations/HeatingRecommender.py | 15 +- 6 files changed, 157 insertions(+), 94 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 6365bb0b..2098a2a4 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1321,3 +1321,16 @@ class Property: self.hot_water_energy_source = self.heating_energy_source else: raise Exception("Investiage me") + + def is_ashp_valid(self, exclusions): + + if "air_source_heat_pump" in self.non_invasive_recommendations: + return True + + if "air_source_heat_pump" in exclusions: + return False + + suitable_property_type = self.data["property-type"] in ["House", "Bungalow"] + has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"] + + return suitable_property_type and not has_air_source_heat_pump diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4796cd9f..e21226fa 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest): # rating to the target SAP rating (ie 69C) # TODO: Update this! energy_consumption = energy_consumption_client.estimate_new_consumption( - current_rating=p.data["current-energy-rating"], - target_rating="C", + current_energy_efficiency=p.data["current-energy-efficiency"], + target_efficiency="69", current_consumption=p.current_adjusted_energy ) + def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions): + if (p.main_fuel["fuel_type"] == "electricity") or ( + p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions) + ): + # if the primary fuel is already electricity, we don't need to adjust the consumpion + return energy_consumption + + if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions): + # if the primary fuel is gas, we need to adjust the consumption to reflect the expected + # efficiency of an ASHP. + # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with + # electrified heating, so that the solar panel can cover heating generation. + heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"] + hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"] + + systems_consumptions = heating_consumption + hot_water_consumption + + adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100) + electric_consumption = ( + adjusted_consumption + + p.energy_consumption_estimates["adjusted"]["lighting"] + + p.energy_consumption_estimates["adjusted"]["appliances"] + ) + + return electric_consumption + # TODO: Should energy_consumption to adjusted to just electricity requirement? # We should align our calculation of required energy consumption with expectations around decarbonising # heating and hot water, so worse case we should take just the electrical consumption of the property diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index dfb0e574..ff225073 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -507,31 +507,36 @@ class EnergyConsumptionModel: return prediction @staticmethod - def calculate_percentage_decrease(start_rating, end_rating, consumption_averages): + def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages): start_consumption = consumption_averages.loc[ - consumption_averages["current-energy-rating"] == start_rating, "total_consumption" + consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption" ].values[0] + end_consumption = consumption_averages.loc[ - consumption_averages["current-energy-rating"] == end_rating, "total_consumption" + consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption" ].values[0] percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100 + # percentage_decrease cannot be nehative + if percentage_decrease < 0: + percentage_decrease = 0 return percentage_decrease - def estimate_new_consumption(self, current_rating, target_rating, current_consumption): + def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption): """ Given then consumption_averages dataset, which is produced as a result of the data_combining.py script, for the energy kwh models, this function will estimate the new consumption based on the current consumption, based on the expected reduction in consumption from the current rating to the target rating. - :param current_rating: - :param target_rating: + :param current_energy_efficiency: + :param target_efficiency: :param current_consumption: - :param df: :return: """ percentage_decrease = self.calculate_percentage_decrease( - current_rating, target_rating, self.consumption_averages + start_efficiency=current_energy_efficiency, + end_efficiency=target_efficiency, + consumption_averages=self.consumption_averages ) new_consumption = current_consumption * (1 - percentage_decrease / 100) return new_consumption diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py index d3a8d679..dece3834 100644 --- a/etl/bill_savings/data_combining.py +++ b/etl/bill_savings/data_combining.py @@ -94,7 +94,7 @@ def app(): # We also estimate the energy consumption reduction from this data, by band df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"] - consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index() + consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index() # Save the consumption averages back to s3 save_dataframe_to_s3_parquet( diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 1c828566..ebd72732 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet # The mode EPC rating is D, so we associate the £238k valuation with an EPC D property # Therefore value_of_F * 1.15 = value_of_D * 1.03 # Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165 -PROPERTY_VALUE_ESTIMATE = 213_165 +PROPERTY_VALUE_ESTIMATE = 200_000 + +# UPRNs of properties we need +MANUAL_EXCLUSIONS = [] def aggregate_matches(matching_lookup, company_ownership, properties): @@ -283,6 +286,36 @@ def filter_land_registry(properties): ) +def is_substring(x, match_string): + if pd.isnull(x): + return False + return x in match_string.lower() + + +def house_number_match(paon, house_number): + # Firstly try and convert to numberic + try: + paon_numeric = int(paon) + house_number_numeric = int(house_number) + return paon_numeric == house_number_numeric + except Exception as e: # noqa + # If we can't convert both to numeric, we do an equality + + return paon == house_number + + +def check_equalities(lr_filtered): + all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0]) + if pd.isnull(lr_filtered["saon"].values[0]): + all_saon_equal = all(pd.isnull(lr_filtered["saon"])) + else: + all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0]) + + all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0]) + + return all_paon_equal, all_saon_equal, all_street_equal + + def app(): """ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs @@ -292,8 +325,8 @@ def app(): # https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency # =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024 # is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's - # it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together - # and performing a singular filter for most recent EPC by UPRN + # it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating + # together and performing a singular filter for most recent EPC by UPRN # paths = [ # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv", @@ -356,10 +389,6 @@ def app(): # Take the newest UPRN properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") - # TODO: Do we want to filter properties based on lodgement dates? - # E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because - # this could be indicative of a sale happening, and the land registry data may not have caught up yet - # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the # the property itself starting_terms = [ @@ -461,6 +490,8 @@ def app(): # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx") # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx") + # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx") + # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx") # The approximate matches aren't very good freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"] @@ -483,7 +514,9 @@ def app(): "ADDRESS1", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING", - "POSTCODE" + "POSTCODE", + "LODGEMENT_DATE", + "TRANSACTION_TYPE" ] ].rename( columns={ @@ -501,7 +534,7 @@ def app(): "Postcode", "Company Registration No. (1)", "Proprietor Name (1)", - + "Date Proprietor Added", ] ], how="left", on="Title Number" @@ -531,35 +564,6 @@ def app(): land_registry["saon"] = land_registry["saon"].str.lower().str.strip() land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"]) - def is_substring(x, match_string): - - if pd.isnull(x): - return False - - return x in match_string.lower() - - def house_number_match(paon, house_number): - # Firstly try and convert to numberic - try: - paon_numeric = int(paon) - house_number_numeric = int(house_number) - return paon_numeric == house_number_numeric - except Exception as e: # noqa - # If we can't convert both to numeric, we do an equality - - return paon == house_number - - def check_equalities(lr_filtered): - all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0]) - if pd.isnull(lr_filtered["saon"].values[0]): - all_saon_equal = all(pd.isnull(lr_filtered["saon"])) - else: - all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0]) - - all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0]) - - return all_paon_equal, all_saon_equal, all_street_equal - land_registry_matches = [] for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)): @@ -779,13 +783,25 @@ def app(): ).drop(columns=["uprn"]) # Flat anything that sold in the last year - # TODO: Decide on what this logic should be! matched_addresses["sold_recently"] = ( matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1) ) - # Drop anything that sold recently - matched_addresses = matched_addresses[~matched_addresses["sold_recently"]] + matched_addresses["sale_lodged_recently"] = ( + (pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) & + (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"])) + ) + + # Drop rows on the booleans + matched_addresses = matched_addresses[ + ~matched_addresses["sold_recently"] & + ~matched_addresses["sale_lodged_recently"] + ] + + # Filter combined_matching_lookup accordingly + combined_matching_lookup = combined_matching_lookup[ + combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"]) + ] # shared_freehold_match = pd.DataFrame(shared_freehold_match) # Strore these files @@ -807,45 +823,19 @@ def app(): properties=properties ) - investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] - investment_20m_properties = matched_addresses[ - matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"]) - ] - investment_50m_properties = matched_addresses[ matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"]) ] - # Merge on the owner - al_rayan = investment_50m_properties[ - investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")] - portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])] - portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])] - # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False) + # Storing data # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False) - z = pd.read_excel("investment_50m_properties 28th May.xlsx") - new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])] - new_al_rayan = new[ - new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC") - ] - new_al_rayan = new_al_rayan.merge( - properties[["UPRN", "LODGEMENT_DATE"]], - how="left", - on="UPRN" - ).merge( - company_ownership[["Title Number", "Date Proprietor Added"]], - how="left", - on="Title Number", - ) - # Store the EPC data - portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False) - portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False) + # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False) # We check if any of these properties are in a conservation area valuations = pd.read_excel("property value.xlsx") @@ -891,6 +881,48 @@ def company_aggregation(): aggregation.to_excel("Company ownership aggregation.xlsx") +def extract_price_info(text): + # Use regex to find the relevant price information + match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text) + if match: + low_price = int(match.group(1).replace(',', '')) * 1000 + est_price = int(match.group(2).replace(',', '')) * 1000 + high_price = int(match.group(3).replace(',', '')) * 1000 + + price_info = { + 'Zoopla Valuation': est_price, + 'Zoopla Lower Bound': low_price, + 'Zoopla Upper Bound': high_price + } + + return price_info + + return None + + +def get_valuations(portfolio_epc_data_50m): + # This gets blocked pretty quickly by Zoopla + import requests + import time + from tqdm import tqdm + valuation_data = [] + for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)): + uprn = property_data["UPRN"] + response = requests.get( + f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/" + ) + + pricing = extract_price_info(response.text) + valuation_data.append( + { + "UPRN": uprn, + **pricing + } + ) + + time.sleep(2) + + def prepare_anonymised_data(): investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0) investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 07bac2cd..1d409be6 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -116,7 +116,7 @@ class HeatingRecommender: # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions # and either allow or prevent the recommendation of an air source heat pump - if self.is_ashp_valid(exclusions=exclusions): + if self.property.is_ashp_valid(exclusions=exclusions): self.recommend_air_source_heat_pump( phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations ) @@ -186,19 +186,6 @@ class HeatingRecommender: description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi " "boiler") - def is_ashp_valid(self, exclusions): - - if "air_source_heat_pump" in self.property.non_invasive_recommendations: - return True - - if "air_source_heat_pump" in exclusions: - return False - - suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"] - has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] - - return suitable_property_type and not has_air_source_heat_pump - def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False): """ This method will implement the recommendation for an air source heat pump From 87de0ce3c9e62d03898fe4374ee3b9ba55b90e3d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 14:43:43 +0100 Subject: [PATCH 31/49] implemented electric consumption --- backend/Property.py | 46 ++++++++++++++++++++++++++++ backend/app/plan/router.py | 63 ++++++++++---------------------------- 2 files changed, 62 insertions(+), 47 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 2098a2a4..f5123b96 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1334,3 +1334,49 @@ class Property: has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"] return suitable_property_type and not has_air_source_heat_pump + + def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions): + """ + Given a property, this method estimates the electrical consumption of the property, based on the energy + consumption, the assumed efficiency of an ASHP and the exclusions. + + What we're trying to do here is size up the future electricicty demand of the property, assuming that the + home is eligible for an ASHP. If the property is not eligible for an ASHP, we don't need to adjust the + consumption. + + This figure is used to size up solar panels, so they can cover heat generation, even if the property + today doesn't generate its heat from electricity + + :param assumed_ashp_efficiency: + :param exclusions: + :return: + """ + + exclusions = [] if exclusions is None else exclusions + + if (self.main_fuel["fuel_type"] == "electricity") or ( + self.main_fuel["fuel_type"] == "mains gas" and not self.is_ashp_valid(exclusions=exclusions) + ): + # if the primary fuel is already electricity, we don't need to adjust the consumpion + return self.current_adjusted_energy + + if self.main_fuel["fuel_type"] == "mains gas" and self.is_ashp_valid(exclusions=exclusions): + # if the primary fuel is gas, we need to adjust the consumption to reflect the expected + # efficiency of an ASHP. + # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with + # electrified heating, so that the solar panel can cover heating generation. + heating_consumption = self.energy_consumption_estimates["adjusted"]["heating"] + hot_water_consumption = self.energy_consumption_estimates["adjusted"]["hot_water"] + + systems_consumptions = heating_consumption + hot_water_consumption + + adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100) + electric_consumption = ( + adjusted_consumption + + self.energy_consumption_estimates["adjusted"]["lighting"] + + self.energy_consumption_estimates["adjusted"]["appliances"] + ) + + return electric_consumption + + raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e21226fa..68dcb916 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -437,9 +437,11 @@ async def trigger_plan(body: PlanTriggerRequest): # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": energy_consumption_client.estimate_new_consumption( - current_rating=p.data["current-energy-rating"], - target_rating="C", - current_consumption=p.current_adjusted_energy + current_energy_efficiency=p.data["current-energy-efficiency"], + target_efficiency="C", + current_consumption=p.estimate_electrical_consumption( + assumed_ashp_efficiency=300, exclusions=body.exclusions + ) ), "property_id": p.id, "uprn": p.uprn @@ -514,56 +516,23 @@ async def trigger_plan(body: PlanTriggerRequest): # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT # BUILDING IDS - # if the property is already very close to an EPC C, we don't adjust the energy consumption based on - # expected movement to EPC C. - # To extend this, what we could do is adjust the based on the expected movement from the current SAP - # rating to the target SAP rating (ie 69C) - # TODO: Update this! - energy_consumption = energy_consumption_client.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], - target_efficiency="69", - current_consumption=p.current_adjusted_energy + electric_consumption = p.estimate_electrical_consumption( + assumed_ashp_efficiency=300, exclusions=body.exclusions ) - def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions): - if (p.main_fuel["fuel_type"] == "electricity") or ( - p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions) - ): - # if the primary fuel is already electricity, we don't need to adjust the consumpion - return energy_consumption - - if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions): - # if the primary fuel is gas, we need to adjust the consumption to reflect the expected - # efficiency of an ASHP. - # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with - # electrified heating, so that the solar panel can cover heating generation. - heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"] - hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"] - - systems_consumptions = heating_consumption + hot_water_consumption - - adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100) - electric_consumption = ( - adjusted_consumption + - p.energy_consumption_estimates["adjusted"]["lighting"] + - p.energy_consumption_estimates["adjusted"]["appliances"] - ) - - return electric_consumption - - # TODO: Should energy_consumption to adjusted to just electricity requirement? - # We should align our calculation of required energy consumption with expectations around decarbonising - # heating and hot water, so worse case we should take just the electrical consumption of the property - # if the property is current using gas for heating and hot water, then we should adjust the kwh demand - # to reflect the 200-400% efficiency of an ASHP with electrified heating, so that the solar panel can - # cover heating generation. While - # If the main fuel is electricity (not community) then we don't need to change the kwh demand, if it's - # gas we should adjust on the suitability of an ashp! + # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C, + # which is the common level we would expect the property to reach when treating the fabric of the + # home + electric_consumption = energy_consumption_client.estimate_new_consumption( + current_energy_efficiency=p.data["current-energy-efficiency"], + target_efficiency="69", + current_consumption=electric_consumption + ) solar_performance = solar_api_client.get( longitude=p.spatial["longitude"], latitude=p.spatial["latitude"], - energy_consumption=energy_consumption, + energy_consumption=electric_consumption, is_building=False, session=session, uprn=p.uprn From eec453670ceb3105d1d041f737db2125518bd27c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 14:56:17 +0100 Subject: [PATCH 32/49] tweaked solar ranking algorithm --- backend/apis/GoogleSolarApi.py | 8 +++++++- backend/app/assumptions.py | 3 +++ backend/app/plan/router.py | 7 ++++--- 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 backend/app/assumptions.py diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 8d08b083..074a9ece 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -311,12 +311,19 @@ class GoogleSolarApi: ) # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi + # Key things we estimate: + # - generation_value: this is the gbp value of the electricity generated + # - roi: the return on investment, calcualated as generation_value / total_cost + # - surplus: this is the amount of additional energy generated, and therefore how much will be exported + # - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant + # - expected_payback_years: the number of years it will take to pay back the initial investment lifetime_energy_consumption = energy_consumption * self.installation_life_span roi_results = [] for _, panel_config in panel_performance.iterrows(): lifetime_ac_kwh = panel_config["lifetime_ac_kwh"] surplus = 0 + generation_deficit = 0 if lifetime_ac_kwh < lifetime_energy_consumption: # We estimate the amount of electricity generated, based on the price cap generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP @@ -329,7 +336,6 @@ class GoogleSolarApi: surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP roi = (generation_value + surplus_value) / panel_config["total_cost"] - generation_deficit = surplus_value # Calculate expected payback years if generation_value > 0: diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py new file mode 100644 index 00000000..13bd913f --- /dev/null +++ b/backend/app/assumptions.py @@ -0,0 +1,3 @@ +# Assumes that the average efficiency of an air source heat pump is 300%, taking the median of the 200-400% range, +# which is often quoted as a sensible efficiency range for air source heat pumps. +AVERAGE_ASHP_EFFICIENCY = 300 diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 68dcb916..c1e0b981 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -10,6 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response +import backend.app.assumptions as assumptions from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine from backend.app.db.functions.materials_functions import get_materials @@ -440,7 +441,7 @@ async def trigger_plan(body: PlanTriggerRequest): current_energy_efficiency=p.data["current-energy-efficiency"], target_efficiency="C", current_consumption=p.estimate_electrical_consumption( - assumed_ashp_efficiency=300, exclusions=body.exclusions + assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions ) ), "property_id": p.id, @@ -517,7 +518,7 @@ async def trigger_plan(body: PlanTriggerRequest): # BUILDING IDS electric_consumption = p.estimate_electrical_consumption( - assumed_ashp_efficiency=300, exclusions=body.exclusions + assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions ) # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C, @@ -529,7 +530,7 @@ async def trigger_plan(body: PlanTriggerRequest): current_consumption=electric_consumption ) - solar_performance = solar_api_client.get( + solar_api_client.get( longitude=p.spatial["longitude"], latitude=p.spatial["latitude"], energy_consumption=electric_consumption, From 754d46073e5715449c2f251db44c54bbc74d034a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 15:01:39 +0100 Subject: [PATCH 33/49] added db save --- backend/app/plan/router.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index c1e0b981..221075f9 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -539,6 +539,16 @@ async def trigger_plan(body: PlanTriggerRequest): uprn=p.uprn ) + # Store the data in the database + # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists + solar_api_client.save_to_db( + session=session, + uprns_to_location=[ + {"uprn": p.uprn, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]} + ], + scenario_type="unit" + ) + # TODO: Insert the pitched roof area into the property class as we store the solar performance # in the property class print("Implement me") From b85fde1b21742bb5edc5e0c5c1f678d3502e2602 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 15:16:48 +0100 Subject: [PATCH 34/49] implemented unit level solar api --- backend/Property.py | 84 ++++++++--------------- backend/app/plan/router.py | 74 +++++++++++--------- recommendations/SolarPvRecommendations.py | 19 +---- 3 files changed, 71 insertions(+), 106 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f5123b96..f15a0d7b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -159,7 +159,7 @@ class Property: self.floor_height = epc_record.prepared_epc.get("floor_height") self.insulation_wall_area = None self.floor_area = epc_record.prepared_epc.get("total_floor_area") - self.pitched_roof_area = None + self.roof_area = None self.insulation_floor_area = None self.number_lighting_outlets = epc_record.prepared_epc.get( "fixed_lighting_outlets_count" @@ -604,18 +604,12 @@ class Property: def get_components( self, cleaned, - photo_supply_lookup, - floor_area_decile_thresholds, energy_consumption_client ): """ Given the cleaning that has been performed, we'll use this to identify the property components, from roof to walls to windows, heating and hot water :param cleaned: This is the dictionary of components found in cleaner.cleaned - :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage - of the roof that is suitable for solar panels - :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the - solar pv roof area :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current energy annual consumption in kWh :return: @@ -680,20 +674,21 @@ class Property: self.set_floor_type() self.set_floor_level() self.set_windows_count() - self.set_solar_panel_area( - photo_supply_lookup=photo_supply_lookup, - floor_area_decile_thresholds=floor_area_decile_thresholds, - ) self.set_energy_source() self.find_energy_sources() self.set_current_energy_bill(energy_consumption_client) - def set_solar_panel_configuration(self, solar_panel_configuration): + def set_solar_panel_configuration( + self, solar_panel_configuration, roof_area + ): """ This funtion inserts the solar panel configuration into the property object """ self.solar_panel_configuration = solar_panel_configuration + # We also set the roof area + self.roof_area = roof_area + def set_current_energy_bill(self, energy_consumption_client): """ Given what we know about the property now, estimates the current energy consumption using the UCL paper @@ -1079,9 +1074,9 @@ class Property: if condition_data["main_dwelling_ground_floor_area"] is not None \ else self.floor_area / self.number_of_floors - self.pitched_roof_area = esimtate_pitched_roof_area( - floor_area=self.insulation_floor_area, floor_height=self.floor_height - ) + # self.pitched_roof_area = esimtate_pitched_roof_area( + # floor_area=self.insulation_floor_area, floor_height=self.floor_height + # ) def set_floor_level(self): self.floor_level = ( @@ -1195,48 +1190,6 @@ class Property: if condition_data["windows_area"] is not None \ else None - def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds): - """ - Sets the approximate area of the solar panels - :return: - """ - - if (self.insulation_floor_area is None) and (self.pitched_roof_area is None): - raise ValueError( - "Need to set insulation floor area and pitched roof area before setting solar pv roof area" - ) - - photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup( - photo_supply_lookup=photo_supply_lookup, - floor_area_decile_thresholds=floor_area_decile_thresholds, - tenure=self.data["tenure"], - built_form=self.data["built-form"], - property_type=self.data["property-type"], - construction_age_band=self.construction_age_band, - is_flat=self.roof["is_flat"], - is_pitched=self.roof["is_pitched"], - is_roof_room=self.roof["is_roof_room"], - floor_area=self.floor_area, - ) - - percentage_of_roof = photo_supply_matched["photo_supply_median"].mean() - percentage_of_roof = percentage_of_roof / 100 - - self.solar_pv_percentage = percentage_of_roof - - def get_solar_pv_roof_area(self, percentage_of_roof): - """ - Given a percentage of the roof, this method will return the estimated area of the solar panels - :param percentage_of_roof: - :return: - """ - - return ( - self.insulation_floor_area * percentage_of_roof - if self.roof["is_flat"] - else self.pitched_roof_area * percentage_of_roof - ) - def set_energy_source(self): """ This method sets the energy source of the property, based on the mains gas flag and energy tariff. @@ -1335,6 +1288,23 @@ class Property: return suitable_property_type and not has_air_source_heat_pump + def is_solar_pv_valid(self): + + # If the property is a flat but we are looking at building solar potential, we can include this + if (self.building_id is not None) and (self.solar_panel_configuration is not None): + return True + + is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"] + is_valid_roof_type = ( + self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"] + ) + # If there is no existing solar PV, the photo-supply field will be None or a missing value + has_no_existing_solar_pv = self.data["photo-supply"] in [ + None, 0, self.DATA_ANOMALY_MATCHES + ] + + return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv + def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions): """ Given a property, this method estimates the electrical consumption of the property, based on the energy diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 221075f9..563134ea 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -408,7 +408,6 @@ async def trigger_plan(body: PlanTriggerRequest): uprn_filenames = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" ) - photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY) dataset_version = "2024-07-08" @@ -425,10 +424,10 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Getting spatial data") for p in input_properties: - p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client) + p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client) p.get_spatial_data(uprn_filenames) - # TODO: Handle the case of modelling some units as buildings and some as properties individually + # TODO: Tidy this up building_ids = [ { "building_id": p.building_id, @@ -439,7 +438,7 @@ async def trigger_plan(body: PlanTriggerRequest): # property to achieve post retrofit of just the fabric "energy_consumption": energy_consumption_client.estimate_new_consumption( current_energy_efficiency=p.data["current-energy-efficiency"], - target_efficiency="C", + target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions ) @@ -448,6 +447,24 @@ async def trigger_plan(body: PlanTriggerRequest): "uprn": p.uprn } for p in input_properties if p.building_id is not None ] + individual_units = [ + { + "longitude": p.spatial["longitude"], + "latitude": p.spatial["latitude"], + # Energy consumption is adjusted for the property's expected post retrofit state + # We set the target rating to EPC C, which is the typical EPC rating we would expect the + # property to achieve post retrofit of just the fabric + "energy_consumption": energy_consumption_client.estimate_new_consumption( + current_energy_efficiency=p.data["current-energy-efficiency"], + target_efficiency="69", + current_consumption=p.estimate_electrical_consumption( + assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions + ), + ), + "property_id": p.id, + "uprn": p.uprn + } for p in input_properties if p.building_id is None + ] if building_ids: # Find the unique longitude and latitude pairs for each building id unique_coordinates = {} @@ -511,32 +528,21 @@ async def trigger_plan(body: PlanTriggerRequest): ) p.set_solar_panel_configuration(unit_solar_panel_configuration) - else: + if individual_units: # Model the solar potential at the property level - for p in input_properties: - # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT - # BUILDING IDS - - electric_consumption = p.estimate_electrical_consumption( - assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions - ) - - # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C, - # which is the common level we would expect the property to reach when treating the fabric of the - # home - electric_consumption = energy_consumption_client.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], - target_efficiency="69", - current_consumption=electric_consumption - ) + for unit in individual_units: + property_instance = [p for p in input_properties if p.id == unit["property_id"]][0] + # At this level, we check if the property is suitable for solar and if now, skip + if not property_instance.is_solar_pv_valid(): + continue solar_api_client.get( - longitude=p.spatial["longitude"], - latitude=p.spatial["latitude"], - energy_consumption=electric_consumption, + longitude=unit["longitude"], + latitude=unit["latitude"], + energy_consumption=unit["energy_consumption"], is_building=False, session=session, - uprn=p.uprn + uprn=unit["uprn"] ) # Store the data in the database @@ -544,16 +550,22 @@ async def trigger_plan(body: PlanTriggerRequest): solar_api_client.save_to_db( session=session, uprns_to_location=[ - {"uprn": p.uprn, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]} + { + "uprn": property_instance.uprn, + "longitude": property_instance.spatial["longitude"], + "latitude": property_instance.spatial["latitude"] + } ], scenario_type="unit" ) - # TODO: Insert the pitched roof area into the property class as we store the solar performance - # in the property class - print("Implement me") - - # TODO: We can set the pitched roof area based on the results of the solar api! + property_instance.set_solar_panel_configuration( + solar_panel_configuration={ + "insights_data": solar_api_client.insights_data, + "panel_performance": solar_api_client.panel_performance + }, + roof_area=solar_api_client.roof_area + ) logger.info("Getting components and epc recommendations") recommendations = {} diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 276573ec..4eece985 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -78,23 +78,6 @@ class SolarPvRecommendations: } ] - def is_solar_pv_valid(self): - - # If the property is a flat but we are looking at building solar potential, we can include this - if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None): - return True - - is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"] - is_valid_roof_type = ( - self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] - ) - # If there is no existing solar PV, the photo-supply field will be None or a missing value - has_no_existing_solar_pv = self.property.data["photo-supply"] in [ - None, 0, self.property.DATA_ANOMALY_MATCHES - ] - - return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv - def recommend_building_analysis(self, phase): """ This recommendation approach handles the case of producing solar PV recommendations at the building level, @@ -159,7 +142,7 @@ class SolarPvRecommendations: :return: """ - if not self.is_solar_pv_valid(): + if not self.property.is_solar_pv_valid(): return # If we have a buiilding level analysis, we implement separate logic From 53b2ca05b628f656855ccb119aa0d6079fa8284f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 15:31:19 +0100 Subject: [PATCH 35/49] inserting county --- backend/app/plan/router.py | 4 ++++ etl/xml_survey_extraction/app.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 563134ea..ced67bfe 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -241,6 +241,10 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): epc = energy_assessment["epc"] energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d") + # We insert county into the epc, since right now this isn't something that we pull out from the energy + # assessment + epc["county"] = epc_searcher.newest_epc["county"] + # We check if the energy assessment is newer than the newest EPC if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]): # In this case, our energy assessment is newer than the EPCs available for this property diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index c4f6091f..18f84ba2 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -29,6 +29,8 @@ def main(): # The data is stored in a folder called {surveyors}/{project_code}/{uprn} # We'll need to get the uprn from the folder name, which we can do with EpcSearcher class + # TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should + # be able to deduce it from just the address # energy_assessments = list_files_and_subfolders_in_s3_folder( bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/" From d45059e40df1b266f26ef2046380ba3128353fd2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 16:28:18 +0100 Subject: [PATCH 36/49] updating solar recommender --- backend/apis/GoogleSolarApi.py | 36 +++++-- backend/app/plan/router.py | 3 +- recommendations/SolarPvRecommendations.py | 118 ++++++++-------------- recommendations/WindowsRecommendations.py | 6 +- 4 files changed, 73 insertions(+), 90 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 074a9ece..c6bb3dde 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -8,6 +8,7 @@ import time from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data from utils.logger import setup_logger from sklearn.preprocessing import MinMaxScaler +from recommendations.Costs import Costs logger = setup_logger() @@ -107,7 +108,14 @@ class GoogleSolarApi: @lru_cache(maxsize=128) def get( - self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None, + self, + longitude, + latitude, + energy_consumption, + property_instance=None, + required_quality="MEDIUM", + is_building=False, + session=None, uprn=None ): """ @@ -116,6 +124,7 @@ class GoogleSolarApi: :param longitude: The longitude of the location. :param latitude: The latitude of the location. :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude. + :param property_instance: The property instance associated to the longitude and latitude. :param required_quality: The required quality of the data (default is "MEDIUM"). :param is_building: Whether the energy consumption is for a building or a unit. :param session: The database session to use for the query (default is None). @@ -158,7 +167,9 @@ class GoogleSolarApi: self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments] # We now start finding the solar panel configurations - self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building) + self.optimise_solar_configuration( + energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance + ) def save_to_db(self, session, uprns_to_location, scenario_type): if self.insights_data is None: @@ -178,7 +189,7 @@ class GoogleSolarApi: "yearly_dc_energy", "total_cost", "panneled_roof_area", - "array_warrage", + "array_wattage", "initial_ac_kwh_per_year", "lifetime_ac_kwh", "roi", @@ -191,7 +202,7 @@ class GoogleSolarApi: "yearly_dc_energy": "yearly_dc_kwh", "total_cost": "cost", "panneled_roof_area": "panelled_roof_area", - "array_warrage": "array_kwhp", + "array_wattage": "array_kwhp", "initial_ac_kwh_per_year": "yearly_ac_kwh", } ) @@ -226,12 +237,14 @@ class GoogleSolarApi: installation_life_span)) / (1 - efficiency_depreciation_factor)) - def optimise_solar_configuration(self, energy_consumption, is_building=False): + def optimise_solar_configuration(self, energy_consumption, is_building=False, property_instance=None): """ Optimise the solar panel configuration for the building. :return: """ + cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None + # Remove any north facing roof segments panel_performance = [] for config in self.insights_data["solarPotential"]["solarPanelConfigs"]: @@ -246,7 +259,14 @@ class GoogleSolarApi: wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"] generated_dc_energy = segment["yearlyEnergyDcKwh"] ratio = generated_dc_energy / wattage - cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000) + + if cost_instance is None: + cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000) + else: + cost = cost_instance.solar_pv( + wattage=wattage, has_battery=False + )["total"] + roi_summary.append( { "segmentIndex": segment["segmentIndex"], @@ -274,7 +294,7 @@ class GoogleSolarApi: "total_cost": total_cost, "weighted_ratio": weighted_ratio, "panneled_roof_area": roi_summary["panneled_roof_area"].sum(), - "array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage + "array_wattage": roi_summary["n_panels"].sum() * self.panel_wattage } ) @@ -290,7 +310,7 @@ class GoogleSolarApi: # Remove anything where the total ac energy is less than half of the array wattage panel_performance = panel_performance[ - (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 + (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5 ] # 2) Calculate the liftime solar energy production diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index ced67bfe..f5eba1de 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -546,7 +546,8 @@ async def trigger_plan(body: PlanTriggerRequest): energy_consumption=unit["energy_consumption"], is_building=False, session=session, - uprn=unit["uprn"] + uprn=unit["uprn"], + property_instance=property_instance ) # Store the data in the database diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 4eece985..18a170e2 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -100,7 +100,7 @@ class SolarPvRecommendations: roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100) # Spread the cost to the individual units - adding a 20% contingency total_cost = recommendation_config["total_cost"] / n_units - kw = np.floor(recommendation_config["array_warrage"] / 100) / 10 + kw = np.floor(recommendation_config["array_wattage"] / 100) / 10 # Default to a weeks work for a team of 3 people doing 8 hour days labour_days = 5 labour_hours = 3 * 8 * labour_days @@ -150,84 +150,46 @@ class SolarPvRecommendations: self.recommend_building_analysis(phase) return - solar_pv_percentage = self.property.solar_pv_percentage - # We round up to the neaest 10% - solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10 + panel_performance = self.property.solar_panel_configuration["panel_performance"] + roof_area = self.property.roof_area - # For the solar recommendations, we produce the following scenarios: - # 1) Solar panels only, we present a high, medium and low coverage - # 2) With and without battery - roof_coverage_scenarios = [ - solar_pv_percentage - 0.1, solar_pv_percentage, - ] - if solar_pv_percentage <= 0.4: - roof_coverage_scenarios.append(solar_pv_percentage + 0.1) - # We make sure we haven't gone too low or high - we allow no more than 60% coverage - roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6] - # If we only have two scenarios, we add a coverage scenario 10% less than the smallest - if len(roof_coverage_scenarios) == 2: - roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1) - battery_scenarios = [False, True] + solar_configurations = panel_performance.head(3).reset_index(drop=True) - scenarios_with_wattage = [] - for roof_coverage in roof_coverage_scenarios: - # We now have a property which is potentially suitable for solar PV - solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage) + # We combine each of these configurations with estimates with and without a battery + for rank, recommendation_config in solar_configurations.iterrows(): + roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100) + for has_battery in [False, True]: + cost_result = self.costs.solar_pv( + wattage=recommendation_config["array_wattage"], has_battery=has_battery + ) + kw = np.floor(recommendation_config["array_wattage"] / 100) / 10 + if has_battery: + description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on " + f"{round(roof_coverage_percent)}% the roof, with a battery storage system.") + else: + description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" + f"anel system on {round(roof_coverage_percent)}% the roof.") - number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA) - solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE + already_installed = "solar_pv" in self.property.already_installed + if already_installed: + cost_result = override_costs(cost_result) - if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE: - continue - - solar_panel_wattage = np.clip( - a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE - ) - scenarios_with_wattage.append((roof_coverage, solar_panel_wattage)) - - # We trim the scenarios, so that we don't have duplicate wattages - scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage) - - # Produce the cross product of the scenarios - scenarios = [ - (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios - ] - # We deduce the wattage of the solar panels based on the roof coverage - - for roof_coverage, solar_panel_wattage, has_battery in scenarios: - # We now have a property which is potentially suitable for solar PV - roof_coverage_percent = round(roof_coverage * 100) - # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database - # of solar PV installations - cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery) - kw = np.floor(solar_panel_wattage / 100) / 10 - - if has_battery: - description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on " - f"{round(roof_coverage_percent)}% the roof, with a battery storage system.") - else: - description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" - f"anel system on {round(roof_coverage_percent)}% the roof.") - - already_installed = "solar_pv" in self.property.already_installed - if already_installed: - cost_result = override_costs(cost_result) - - self.recommendation.append( - { - "phase": phase, - "parts": [], - "type": "solar_pv", - "description": description, - "starting_u_value": None, - "new_u_value": None, - "sap_points": None, - "already_installed": already_installed, - **cost_result, - # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale - # back up here - "photo_supply": 100 * roof_coverage, - "has_battery": has_battery, - "description_simulation": {"photo-supply": 100 * roof_coverage}, - } - ) + self.recommendation.append( + { + "phase": phase, + "parts": [], + "type": "solar_pv", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": already_installed, + **cost_result, + # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we + # scale + # back up here + "photo_supply": roof_coverage_percent, + "has_battery": has_battery, + "description_simulation": {"photo-supply": roof_coverage_percent}, + } + ) diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 9a30cd2e..3826a470 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -53,14 +53,14 @@ class WindowsRecommendations: if not number_of_windows: raise ValueError("Number of windows not specified") - if windows_area is not None: - raise Exception("We have windows area, we should use this data for our recommendations!!!") - if self.property.windows["has_glazing"] & ( self.property.windows["glazing_coverage"] == "full" ): return + if windows_area is not None: + raise Exception("We have windows area, we should use this data for our recommendations!!!") + # We scale the number of windows based on the proportion of existing glazing if self.property.data["multi-glaze-proportion"] != "": n_windows_scalar = 1 - ( From cf3b603a367cac9a04e372cba834d5dd8944e5d1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 18:07:16 +0100 Subject: [PATCH 37/49] filling constituency --- backend/Property.py | 20 ++++++++----------- .../functions/energy_assessment_functions.py | 3 ++- backend/app/db/models/energy_assessments.py | 6 +++--- backend/app/plan/router.py | 1 + etl/xml_survey_extraction/app.py | 4 +++- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f15a0d7b..1586835a 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -728,14 +728,14 @@ class Property: energy_consumption_client.data = None heating_prediction = ( - float(condition_data["space_heating_kwh"]) if condition_data["space_heating_kwh"] + float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None else energy_consumption_client.score_new_data( new_data=scoring_df, target="heating_kwh" )[0] ) hot_water_prediction = ( - float(condition_data["water_heating_kwh"]) if condition_data["water_heating_kwh"] + float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None else energy_consumption_client.score_new_data( new_data=scoring_df, target="hot_water_kwh" )[0] @@ -1051,18 +1051,18 @@ class Property: # We can update the number of floors if we have this information in the condition data self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \ - if condition_data["number_of_floors"] is not None \ + if condition_data.get("number_of_floors") is not None \ else self.number_of_floors self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \ - if condition_data["perimeter"] is not None \ + if condition_data.get("perimeter") is not None \ else estimate_perimeter( floor_area=self.floor_area / self.number_of_floors, num_rooms=self.number_of_rooms / self.number_of_floors ) self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \ - if condition_data["insulation_wall_area"] is not None \ + if condition_data.get("insulation_wall_area") is not None \ else estimate_external_wall_area( num_floors=self.number_of_floors, floor_height=self.floor_height, @@ -1071,13 +1071,9 @@ class Property: ) self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \ - if condition_data["main_dwelling_ground_floor_area"] is not None \ + if condition_data.get("main_dwelling_ground_floor_area") is not None \ else self.floor_area / self.number_of_floors - # self.pitched_roof_area = esimtate_pitched_roof_area( - # floor_area=self.insulation_floor_area, floor_height=self.floor_height - # ) - def set_floor_level(self): self.floor_level = ( FLOOR_LEVEL_MAP[self.data["floor-level"]] @@ -1177,7 +1173,7 @@ class Property: condition_data = self.energy_assessment_condition_data.copy() self.number_of_windows = int(condition_data["number_of_windows"]) \ - if condition_data["number_of_windows"] is not None \ + if condition_data.get("number_of_windows") is not None \ else estimate_windows( property_type=self.data["property-type"], built_form=self.data["built-form"], @@ -1187,7 +1183,7 @@ class Property: ) self.windows_area = float(condition_data["windows_area"]) \ - if condition_data["windows_area"] is not None \ + if condition_data.get("windows_area") is not None \ else None def set_energy_source(self): diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index 45fb2b8b..b223d2f5 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -55,7 +55,8 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by( desc(EnergyAssessment.inspection_date)).first() - return latest_assessment.to_dict() if latest_assessment else latest_assessment.empty_response() + + return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response() except Exception as e: print(f"An error occurred: {e}") return None diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index 2c3cc144..3928f9fa 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean +from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() @@ -94,8 +94,8 @@ class EnergyAssessment(Base): hot_water_cost_potential = Column(Text, nullable=False) lighting_cost_current = Column(Text, nullable=False) energy_consumption_current = Column(Text, nullable=False) - lodgement_date = Column(DateTime(timezone=True), nullable=False) - lodgement_datetime = Column(DateTime(timezone=True), nullable=False) + lodgement_date = Column(Date, nullable=False) + lodgement_datetime = Column(DateTime(timezone=False), nullable=False) mainheat_description = Column(Text, nullable=False) floor_height = Column(Float, nullable=False) glazed_type = Column(Text, nullable=False) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index f5eba1de..2d024f21 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -244,6 +244,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): # We insert county into the epc, since right now this isn't something that we pull out from the energy # assessment epc["county"] = epc_searcher.newest_epc["county"] + epc["constituency"] = epc_searcher.newest_epc["constituency"] # We check if the energy assessment is newer than the newest EPC if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]): diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 18f84ba2..73551d09 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -30,7 +30,9 @@ def main(): # We'll need to get the uprn from the folder name, which we can do with EpcSearcher class # TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should - # be able to deduce it from just the address + # be able to deduce it from just the address. Same for constituency and constituency_label + + # TODO: Store the project code in the database # energy_assessments = list_files_and_subfolders_in_s3_folder( bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/" From 45d74b0d8c8d047ee1b5cf1872876d02f62e38f0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 18:13:37 +0100 Subject: [PATCH 38/49] added description simulation to fireplace recommendations --- recommendations/FireplaceRecommendations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py index 601a8eb0..9a9d7f76 100644 --- a/recommendations/FireplaceRecommendations.py +++ b/recommendations/FireplaceRecommendations.py @@ -50,5 +50,8 @@ class FireplaceRecommendations(Definitions): # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal "labour_hours": 6 * number_open_fireplaces, "labour_days": 6 * number_open_fireplaces / 8, # Assume 8 hour day + "description_simulation": { + "number-open-fireplaces": 0 + } } ] From b73860c742414b51dd28a31a78456f6739608123 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 29 Jul 2024 18:51:15 +0100 Subject: [PATCH 39/49] debugging unit share of energy --- backend/app/plan/router.py | 3 ++- recommendations/SolarPvRecommendations.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 2d024f21..811ec4a3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -568,7 +568,8 @@ async def trigger_plan(body: PlanTriggerRequest): property_instance.set_solar_panel_configuration( solar_panel_configuration={ "insights_data": solar_api_client.insights_data, - "panel_performance": solar_api_client.panel_performance + "panel_performance": solar_api_client.panel_performance, + "unit_share_of_energy": 1 }, roof_area=solar_api_client.roof_area ) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 18a170e2..63519d02 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -190,6 +190,7 @@ class SolarPvRecommendations: # back up here "photo_supply": roof_coverage_percent, "has_battery": has_battery, + "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"], "description_simulation": {"photo-supply": roof_coverage_percent}, } ) From 54d2dce05da102c09804c4010d9391f1b9d8e3e2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 10:23:22 +0100 Subject: [PATCH 40/49] Added check for room roof insulated --- recommendations/RoofRecommendations.py | 27 ++++++++++++++++++++++--- recommendations/recommendation_utils.py | 13 +++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index a1f8c67c..615289de 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -87,6 +87,17 @@ class RoofRecommendations: return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"] + def is_room_roof_insulated(self): + + """ + Check if the room roof is already insulated + """ + + return ( + self.property.roof["is_roof_room"] and + self.property.roof["insulation_thickness"] in ["average", "above_average"] + ) + def recommend(self, phase): if self.property.roof["has_dwelling_above"]: @@ -105,8 +116,8 @@ class RoofRecommendations: if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]: return - if self.property.roof["is_roof_room"]: - raise ValueError("Update convert_thickness_to_numeric for room roof and implement") + if self.is_room_roof_insulated(): + return # If we have a u-value already, need to implement this if u_value: @@ -118,7 +129,17 @@ class RoofRecommendations: return raise NotImplementedError("Implement me") - u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band}) + u_value = get_roof_u_value( + insulation_thickness=self.property.roof["insulation_thickness"], + has_dwelling_above=self.property.roof["has_dwelling_above"], + is_loft=self.property.roof["is_loft"], + is_roof_room=self.property.roof["is_roof_room"], + is_thatched=self.property.roof["is_thatched"], + age_band=self.property.age_band, + is_flat=self.property.roof["is_flat"], + is_pitched=self.property.roof["is_pitched"], + is_at_rafters=self.property.roof["is_at_rafters"], + ) self.estimated_u_value = u_value if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and ( diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 9b5e22d1..4980f30a 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -207,6 +207,17 @@ def get_wall_u_value( def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched): """Get the U-value from table S9 based on the insulation thickness.""" + + if is_roof_room: + # We re-map the thickness + thickness_map = { + "below average": 50, + "average": 100, + "above average": 270, + "none": 0, + } + thickness = thickness_map[thickness] + if thickness in ["below average", "average", "above average", "none", None] or ( not is_loft and not is_roof_room ): @@ -676,7 +687,7 @@ def estimate_windows( property_type, built_form, construction_age_band, floor_area, number_habitable_rooms ): # If there is an extension, that will boost the number of habitable rooms - + # Base window count based on habitable rooms window_count = number_habitable_rooms From d4d9b8e518ae81d307d81657533b4edf4d616840 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 11:17:21 +0100 Subject: [PATCH 41/49] VE recommendations pushed to front end --- backend/Property.py | 6 ++++- backend/app/plan/router.py | 30 ++++++++++++++++--------- backend/ml_models/Valuation.py | 3 +++ etl/xml_survey_extraction/app.py | 2 +- recommendations/recommendation_utils.py | 8 +++---- 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 1586835a..f82c03a7 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -183,6 +183,7 @@ class Property: # This additional condition data should change how we pass kwargs to this. We should no longer need to pass # kwargs to this class, but instead, we should pass the energy assessment condition data self.energy_assessment_condition_data = energy_assessment["condition"] + self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"] # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data self.parse_kwargs(kwargs) @@ -877,7 +878,10 @@ class Property: property_data = { "creation_status": "READY", "uprn": int(self.data["uprn"]), - "building_reference_number": int(self.data["building-reference-number"]), + "building_reference_number": ( + int(self.data["building-reference-number"]) if + self.data["building-reference-number"] is not None else None + ), "has_pre_condition_report": True, "has_recommendations": True, "property_type": self.data["property-type"], diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 811ec4a3..e28b4d27 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -232,11 +232,12 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): """ if not energy_assessment["epc"]: + energy_assessment_is_newer = False return { 'original_epc': epc_searcher.newest_epc.copy(), 'full_sap_epc': epc_searcher.full_sap_epc.copy(), 'old_data': epc_searcher.older_epcs.copy(), - } + }, energy_assessment_is_newer epc = energy_assessment["epc"] energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d") @@ -249,11 +250,12 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): # We check if the energy assessment is newer than the newest EPC if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]): # In this case, our energy assessment is newer than the EPCs available for this property + energy_assessment_is_newer = True return { "original_epc": epc, "full_sap_epc": epc_searcher.full_sap_epc.copy(), "old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()] - } + }, energy_assessment_is_newer # We check if the EPC we have produced is contained in the set of EPCs done for the property # We do this based on inspection-date and SAP @@ -262,6 +264,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): if x["inspection-date"] == energy_assessment_date and x["current-energy-efficiency"] == epc["current-energy-efficiency"] ] + energy_assessment_is_newer = False if epc_in_historicals: # Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest @@ -269,7 +272,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): "original_epc": epc_searcher.newest_epc.copy(), "full_sap_epc": epc_searcher.full_sap_epc.copy(), "old_data": epc_searcher.older_epcs.copy() - } + }, energy_assessment_is_newer # In this case, our EPC is older than the newest publically avaible one, but is not contained in # the historicals, so it can't have been lodged, so we include it in the old data @@ -277,7 +280,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict): 'original_epc': epc_searcher.newest_epc.copy(), 'full_sap_epc': epc_searcher.full_sap_epc.copy(), 'old_data': epc_searcher.older_epcs.copy() + [epc], - } + }, energy_assessment_is_newer router = APIRouter( @@ -364,8 +367,11 @@ async def trigger_plan(body: PlanTriggerRequest): # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC - epc_records = create_epc_records(epc_searcher, energy_assessment) - + # energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that + # has been publically lodged + epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records( + epc_searcher, energy_assessment + ) patch = next(( x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) ), {}) @@ -432,6 +438,7 @@ async def trigger_plan(body: PlanTriggerRequest): p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client) p.get_spatial_data(uprn_filenames) + logger.info("Performing solar analysis") # TODO: Tidy this up building_ids = [ { @@ -729,10 +736,13 @@ async def trigger_plan(body: PlanTriggerRequest): property_value_increase_ranges[p.id] = valuations # Your existing operations - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) - create_property_details_epc(session, property_details_epc) + # If we have an energy assessment, which is more recent than the EPC, we don't need to store + # the EPC details in the database + if not p.energy_assessment_is_newer: + property_details_epc = p.get_property_details_epc( + portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, + ) + create_property_details_epc(session, property_details_epc) update_or_create_property_spatial_details(session, p.uprn, p.spatial) diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index b87f156b..cbcebb9f 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -100,6 +100,9 @@ class PropertyValuation: 200140647: 481_000, 200140648: 373_000, 200140649: 373_000, + # Vander Elliot Intrusive surveys + 12103116: 1_537_000, + 12103117: 1_404_000, } # We base our valuation uplifts on a number of sources diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 73551d09..ab9eae2d 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -127,7 +127,7 @@ def main(): "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": "", - # "exclusions": [], + "exclusions": ["floor_insulation", "fireplace"], "budget": None, } print(body) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 4980f30a..d14a0d4c 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -211,10 +211,10 @@ def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched): if is_roof_room: # We re-map the thickness thickness_map = { - "below average": 50, - "average": 100, - "above average": 270, - "none": 0, + "below average": "50", + "average": "100", + "above average": "270", + "none": "0", } thickness = thickness_map[thickness] From c948c240611130313724511318deb29400db2fec Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 11:37:02 +0100 Subject: [PATCH 42/49] Adding back storage of property details epc --- backend/app/plan/router.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e28b4d27..099d0827 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -734,15 +734,11 @@ async def trigger_plan(body: PlanTriggerRequest): valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc) property_value_increase_ranges[p.id] = valuations - - # Your existing operations - # If we have an energy assessment, which is more recent than the EPC, we don't need to store - # the EPC details in the database - if not p.energy_assessment_is_newer: - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) - create_property_details_epc(session, property_details_epc) + + property_details_epc = p.get_property_details_epc( + portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, + ) + create_property_details_epc(session, property_details_epc) update_or_create_property_spatial_details(session, p.uprn, p.spatial) From e50d82ac24d61296302195419005f0021d9af96d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 11:45:24 +0100 Subject: [PATCH 43/49] rounding photo supply to the nearest 5 --- recommendations/SolarPvRecommendations.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 63519d02..66d2ac78 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -174,6 +174,10 @@ class SolarPvRecommendations: if already_installed: cost_result = override_costs(cost_result) + # We calculate the photo_supply we're going to simulate the impact with and we round this to the + # nearest 5 + photo_supply = round(roof_coverage_percent / 5) * 5 + self.recommendation.append( { "phase": phase, @@ -188,9 +192,9 @@ class SolarPvRecommendations: # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we # scale # back up here - "photo_supply": roof_coverage_percent, + "photo_supply": photo_supply, "has_battery": has_battery, "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"], - "description_simulation": {"photo-supply": roof_coverage_percent}, + "description_simulation": {"photo-supply": photo_supply}, } ) From 996c71c1893deac7df89fa50bc76b5cc11c18880 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 15:52:38 +0100 Subject: [PATCH 44/49] handle recommendations for insulated room roof --- backend/apis/GoogleSolarApi.py | 5 +++-- recommendations/RoofRecommendations.py | 10 +++++++++- recommendations/SolarPvRecommendations.py | 8 ++------ recommendations/recommendation_utils.py | 8 +++++--- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index c6bb3dde..579e985d 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -123,7 +123,8 @@ class GoogleSolarApi: :param longitude: The longitude of the location. :param latitude: The latitude of the location. - :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude. + :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude, + that we wish to size the solar panels up against :param property_instance: The property instance associated to the longitude and latitude. :param required_quality: The required quality of the data (default is "MEDIUM"). :param is_building: Whether the energy consumption is for a building or a unit. @@ -266,7 +267,7 @@ class GoogleSolarApi: cost = cost_instance.solar_pv( wattage=wattage, has_battery=False )["total"] - + roi_summary.append( { "segmentIndex": segment["segmentIndex"], diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 615289de..56f3721a 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -93,11 +93,19 @@ class RoofRecommendations: Check if the room roof is already insulated """ - return ( + full_insulated_room_roof = ( self.property.roof["is_roof_room"] and self.property.roof["insulation_thickness"] in ["average", "above_average"] ) + room_roof_insulated_at_rafters = ( + self.property.roof["is_pitched"] and + self.property.roof["is_at_rafters"] and + self.property.roof["insulation_thickness"] in ["average", "above_average"] + ) + + return full_insulated_room_roof or room_roof_insulated_at_rafters + def recommend(self, phase): if self.property.roof["has_dwelling_above"]: diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 66d2ac78..63519d02 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -174,10 +174,6 @@ class SolarPvRecommendations: if already_installed: cost_result = override_costs(cost_result) - # We calculate the photo_supply we're going to simulate the impact with and we round this to the - # nearest 5 - photo_supply = round(roof_coverage_percent / 5) * 5 - self.recommendation.append( { "phase": phase, @@ -192,9 +188,9 @@ class SolarPvRecommendations: # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we # scale # back up here - "photo_supply": photo_supply, + "photo_supply": roof_coverage_percent, "has_battery": has_battery, "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"], - "description_simulation": {"photo-supply": photo_supply}, + "description_simulation": {"photo-supply": roof_coverage_percent}, } ) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index d14a0d4c..ce32e061 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -205,10 +205,11 @@ def get_wall_u_value( return float(mapped_value) -def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched): +def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched, is_at_rafters): """Get the U-value from table S9 based on the insulation thickness.""" - if is_roof_room: + # If the roof as pitched & insulated at the rafters, it's a room roof + if is_roof_room or is_at_rafters: # We re-map the thickness thickness_map = { "below average": "50", @@ -219,7 +220,7 @@ def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched): thickness = thickness_map[thickness] if thickness in ["below average", "average", "above average", "none", None] or ( - not is_loft and not is_roof_room + not is_loft and not is_roof_room and not is_at_rafters ): return None elif thickness.endswith("+"): @@ -291,6 +292,7 @@ def get_roof_u_value( is_loft=is_loft, is_roof_room=is_roof_room, is_thatched=is_thatched, + is_at_rafters=is_at_rafters ) if u_value is not None: From cadbd4f48a038eaa9774ef7efd374ee990376909 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 17:42:36 +0100 Subject: [PATCH 45/49] Adding scenario structure to backend --- backend/Property.py | 2 + .../app/db/functions/portfolio_functions.py | 2 +- .../db/functions/recommendations_functions.py | 26 +- backend/app/db/models/recommendations.py | 19 ++ backend/app/plan/router.py | 66 +++-- backend/app/plan/schemas.py | 4 + etl/xml_survey_extraction/app.py | 261 ++++++++++++------ 7 files changed, 268 insertions(+), 112 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f82c03a7..a1bfe265 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -77,12 +77,14 @@ class Property: non_invasive_recommendations=None, measures=None, energy_assessment=None, + is_new=True, **kwargs ): self.epc_record = epc_record self.id = id + self.is_new = is_new self.address = address self.postcode = postcode diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index 402675e8..008c4b8b 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -30,7 +30,7 @@ def aggregate_portfolio_recommendations( **aggregated_data } - # Get the portfolio and update the fields + # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 365829e4..cfb3d570 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,8 +1,11 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations -from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \ - PropertyDetailsEpcModel +from backend.app.db.models.recommendations import ( + Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario +) +from backend.app.db.models.portfolio import ( + PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel +) def create_plan(session: Session, plan): @@ -19,6 +22,20 @@ def create_plan(session: Session, plan): return new_plan.id +def create_scenario(session: Session, scenario): + """ + This function will create a record for the scenario in the database if it does not exist. + :param session: The database session + :param scenario: dictionary of data representing a scenario to be created + """ + + new_scenario = Scenario(**scenario) + session.add(new_scenario) + session.flush() + + return new_scenario.id + + def create_recommendation(session: Session, recommendation): """ This function will create a record for the recommendation in the database if it does not exist. @@ -148,6 +165,9 @@ def clear_portfolio(session: Session, portfolio_id: int): # Delete all Plans associated with the portfolio session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) + # Delete all Scenarios associated with the portfolio + session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) + # Delete all Recommendations associated with the properties session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 8ab7908f..6eddae1f 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -50,6 +50,7 @@ class Plan(Base): __tablename__ = 'plan' id = Column(BigInteger, primary_key=True, autoincrement=True) + name = Column(String, nullable=True, default="") portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) @@ -65,3 +66,21 @@ class PlanRecommendations(Base): id = Column(BigInteger, primary_key=True, autoincrement=True) plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False) recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + + +class Scenario(Base): + __tablename__ = 'scenario' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + name = Column(String, nullable=False) + created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) + budget = Column(Float) + portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) + housing_type = Column(String, nullable=False) + goal = Column(String, nullable=False) + trigger_file_path = Column(String, nullable=False) + already_installed_file_path = Column(String) + patches_file_path = Column(String) + non_invasive_recommendations_file_path = Column(String) + exclusions = Column(String) + multi_plan = Column(Boolean, default=False) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 099d0827..1340bae3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import ( update_or_create_property_spatial_details ) from backend.app.db.functions.recommendations_functions import ( - create_plan, create_plan_recommendations, upload_recommendations + create_plan, create_plan_recommendations, upload_recommendations, create_scenario ) from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup @@ -354,16 +354,17 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - if not is_new: + if not is_new and not body.multi_plan: continue - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + if is_new: + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC @@ -396,6 +397,7 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties.append( Property( id=property_id, + is_new=is_new, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, @@ -409,6 +411,25 @@ async def trigger_plan(body: PlanTriggerRequest): if not input_properties: return Response(status_code=204) + # If we have any work to do, we create a new scenario + scenario = create_scenario( + session=session, + scenario={ + "name": body.scenario_name, + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) + # The materials data could be cached or local so we don't need to make # consistent requests to the backend for # the same data @@ -734,18 +755,19 @@ async def trigger_plan(body: PlanTriggerRequest): valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc) property_value_increase_ranges[p.id] = valuations - - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) - create_property_details_epc(session, property_details_epc) - update_or_create_property_spatial_details(session, p.uprn, p.spatial) + if p.is_new: + property_details_epc = p.get_property_details_epc( + portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, + ) + create_property_details_epc(session, property_details_epc) - property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - update_property_data( - session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data - ) + update_or_create_property_spatial_details(session, p.uprn, p.spatial) + + property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) + update_property_data( + session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data + ) if not recommendations_to_upload: continue @@ -753,7 +775,8 @@ async def trigger_plan(body: PlanTriggerRequest): new_plan_id = create_plan(session, { "portfolio_id": body.portfolio_id, "property_id": p.id, - "is_default": True, + "is_default": True if p.is_new else False, + "name": body.scenario_name, "valuation_increase_lower_bound": ( valuations["lower_bound_increased_value"] - valuations["current_value"] ), @@ -807,6 +830,8 @@ async def trigger_plan(body: PlanTriggerRequest): aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, + multi_plan=body.multi_plan, + total_valuation_increase=total_valuation_increase, labour_days=labour_days, aggregated_data=aggregated_data @@ -941,6 +966,7 @@ async def build_mds(body: MdsRequest): # already_installed=property_already_installed, # non_invasive_recommendations=property_non_invasive_recommendations, measures=measures, + is_new=is_new, **Property.extract_kwargs(config) ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 77ac4217..263115af 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -13,6 +13,10 @@ class PlanTriggerRequest(BaseModel): patches_file_path: Optional[str] = None non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[conlist(str, min_items=1)] = None + scenario_name: Optional[str] = "" + # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property + # exists in the portfolio, it will be ignored + multi_plan: Optional[bool] = False # Pre-defined list of possibilities for exclusions _allowed_exclusions = { diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index ab9eae2d..aeaf8abe 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -10,11 +10,97 @@ from io import BytesIO logger = setup_logger() -SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS" -PROJECT_CODE = "VDE001" BUCKET = "retrofit-energy-assessments-dev" -PORTFOLIO_ID = 86 USER_ID = 8 +SCENARIOS = { + 86: { + "project_code": "VDE001", + "surveyor": "JAFFERSONS ENERGY CONSULTANTS", + "bodies": [ + # Scenario A: Cavity wall insulation + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"], + "budget": None, + "scenario_name": "Low Hanging Fruit", + "multi_plan": True, + }, + # Scenario B: CWI, Solar PV, AHSP + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace"], + "budget": None, + "Scenario Name": "Deep Retrofit", + "multi_plan": True, + }, + # Scenario C, CWI, floor insulation, PV, AHSP + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["fireplace"], + "budget": None, + "Scenario Name": "Whole House Retrofit", + "multi_plan": True, + } + ] + }, + 87: { + "project_code": "VDE002", + "surveyor": "JAFFERSONS ENERGY CONSULTANTS", + "bodies": [ + # Scenario A: Solar PV, AHSP + { + "portfolio_id": str(87), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace"], + "budget": None, + "Scenario Name": "Deep Retrofit", + "multi_plan": True, + }, + # Scenario B, floor insulation, PV, AHSP + { + "portfolio_id": str(87), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["fireplace"], + "budget": None, + "Scenario Name": "Whole House Retrofit", + "multi_plan": True, + } + ] + } +} def main(): @@ -34,103 +120,102 @@ def main(): # TODO: Store the project code in the database # - energy_assessments = list_files_and_subfolders_in_s3_folder( - bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/" - ) - logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}") - assessments_map = {} - for assessment in energy_assessments: - uploaded_xmls = list_xmls_in_s3_folder( - bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans") + for scenario_config in SCENARIOS.values(): + energy_assessments = list_files_and_subfolders_in_s3_folder( + bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/" ) - uprn = int(assessment.rstrip("/").split("/")[-1]) - assessments_map[uprn] = uploaded_xmls - logger.info(f"Exatracted XMLS for the energy assessments") - - # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to - # the database at onece - - # TODO: We now have detailed information about primary and secondary walls, so we should use this information - # in our recommendations when we have it - # For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where - # the physical dimensions and the fabric of each building is constructed in a way as if each building is - # separate. We should use this information to make recommendations that are specific to each building - # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc - # figures span across the entire property. - # Idea: We can collect all of this information by building part and store it separately in the database - # against the uprn. We can have key data for the EPC, but then also additional data for each building - # part. We can then use this data to make recommendations that are specific to each building part - # We should probably re-think this data model, so we break up the data in a more considered fasion and produce - # the underlying EPC data as a summary of the building parts. Not only do we have data against the main - # dwelling and extensions, but we also have multiple windows with individiaul pieces of information that - # we can use to make recommendations. We should store this data in a way that we can easily access it and - # use it to make recommendations (e.g. we should have a Windows table) - - # For each property, we download the xmls and extract the data - database_data = [] - for uprn, xmls in assessments_map.items(): - extracted_data = {} - for xml in xmls: - xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) - xml_data_io = BytesIO(xml_data) - xml_parser = XmlParser( - file=xml_data_io, - filekey=os.path.join(f"s3://{BUCKET}", xml), - uprn=uprn, - surveyor_company=SURVEYORS, + logger.info( + f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and " + f"{scenario_config['project_code']}" + ) + assessments_map = {} + for assessment in energy_assessments: + uploaded_xmls = list_xmls_in_s3_folder( + bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans") ) - xml_parser.run() - if xml_parser.is_lig: - logger.info(f"Extracted data from {xml}") - extracted_epc = xml_parser.epc - extracted_additional_data = xml_parser.additional_data + uprn = int(assessment.rstrip("/").split("/")[-1]) + assessments_map[uprn] = uploaded_xmls - data_to_update = { - **extracted_epc, **extracted_additional_data - } + logger.info(f"Exatracted XMLS for the energy assessments") - # We need to update the keys to match the database schema - i.e. we should replace all hyphens with - # underscores - data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()} + # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to + # the database at onece - extracted_data.update(data_to_update) + # TODO: We now have detailed information about primary and secondary walls, so we should use this information + # in our recommendations when we have it + # For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, + # where + # the physical dimensions and the fabric of each building is constructed in a way as if each building is + # separate. We should use this information to make recommendations that are specific to each building + # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, + # CO2, etc + # figures span across the entire property. + # Idea: We can collect all of this information by building part and store it separately in the database + # against the uprn. We can have key data for the EPC, but then also additional data for each + # building + # part. We can then use this data to make recommendations that are specific to each building part + # We should probably re-think this data model, so we break up the data in a more considered fasion and + # produce + # the underlying EPC data as a summary of the building parts. Not only do we have data against the main + # dwelling and extensions, but we also have multiple windows with individiaul pieces of information that + # we can use to make recommendations. We should store this data in a way that we can easily access it and + # use it to make recommendations (e.g. we should have a Windows table) - database_data.append(extracted_data) + # For each property, we download the xmls and extract the data + database_data = [] + for uprn, xmls in assessments_map.items(): + extracted_data = {} + for xml in xmls: + xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) + xml_data_io = BytesIO(xml_data) + xml_parser = XmlParser( + file=xml_data_io, + filekey=os.path.join(f"s3://{BUCKET}", xml), + uprn=uprn, + surveyor_company=scenario_config["surveyor"], + ) + xml_parser.run() + if xml_parser.is_lig: + logger.info(f"Extracted data from {xml}") + extracted_epc = xml_parser.epc + extracted_additional_data = xml_parser.additional_data - logger.info("Uploading data to the database") - session = sessionmaker(bind=db_engine)() - bulk_insert_energy_assessments(session, database_data) - session.close() + data_to_update = { + **extracted_epc, **extracted_additional_data + } - # Create the asset list - asset_list = [ - {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data - ] - asset_list = pd.DataFrame(asset_list) + # We need to update the keys to match the database schema - i.e. we should replace all hyphens with + # underscores + data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()} - # Store the asset list in s3 - filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv" - save_csv_to_s3( - dataframe=asset_list, - bucket_name="retrofit-plan-inputs-dev", - file_name=filename - ) + extracted_data.update(data_to_update) - body = { - "portfolio_id": str(PORTFOLIO_ID), - "housing_type": "Private", - "goal": "Increase EPC", - "goal_value": "A", - "trigger_file_path": filename, - "already_installed_file_path": "", - "patches_file_path": "", - "non_invasive_recommendations_file_path": "", - "exclusions": ["floor_insulation", "fireplace"], - "budget": None, - } - print(body) + database_data.append(extracted_data) + + logger.info("Uploading data to the database") + session = sessionmaker(bind=db_engine)() + bulk_insert_energy_assessments(session, database_data) + session.close() + + # Create the asset list + asset_list = [ + {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data + ] + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + for body in scenario_config["bodies"]: + body["trigger_file_path"] = filename + print(body) # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which # can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat From ccacdaac65865bdff15a0225a05f845ade8130a1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 17:50:55 +0100 Subject: [PATCH 46/49] adding try except for some db functions --- .../db/functions/recommendations_functions.py | 46 +++++++++++-------- backend/app/plan/router.py | 5 +- backend/app/plan/schemas.py | 1 + etl/xml_survey_extraction/app.py | 10 ++-- .../optimiser/optimiser_functions.py | 2 +- 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index cfb3d570..c7765039 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,5 +1,6 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session +from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario ) @@ -14,12 +15,15 @@ def create_plan(session: Session, plan): :param session: The database session :param plan: dictionary of data representing a plan to be created """ - - new_plan = Plan(**plan) - session.add(new_plan) - session.flush() - - return new_plan.id + try: + new_plan = Plan(**plan) + session.add(new_plan) + session.flush() + session.commit() + return new_plan.id + except SQLAlchemyError as e: + session.rollback() + raise e def create_scenario(session: Session, scenario): @@ -28,12 +32,15 @@ def create_scenario(session: Session, scenario): :param session: The database session :param scenario: dictionary of data representing a scenario to be created """ - - new_scenario = Scenario(**scenario) - session.add(new_scenario) - session.flush() - - return new_scenario.id + try: + new_scenario = Scenario(**scenario) + session.add(new_scenario) + session.flush() + session.commit() + return new_scenario + except SQLAlchemyError as e: + session.rollback() + raise e def create_recommendation(session: Session, recommendation): @@ -42,12 +49,15 @@ def create_recommendation(session: Session, recommendation): :param session: The database session :param recommendation: dictionary of data representing a recommendation to be created """ - - new_recommendation = Recommendation(**recommendation) - session.add(new_recommendation) - session.flush() - - return new_recommendation.id + try: + new_recommendation = Recommendation(**recommendation) + session.add(new_recommendation) + session.flush() + session.commit() + return new_recommendation.id + except SQLAlchemyError as e: + session.rollback() + raise e def create_recommendation_material(session: Session, recommendation_id, material_id, depth): diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 1340bae3..4d73778e 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -297,9 +297,6 @@ async def trigger_plan(body: PlanTriggerRequest): session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() - # TODO: We should store the trigger file path in the database with the plan so we can track the file that - # triggered the plan - # TODO: if the measure is already installed, it should actually be the very first phase try: @@ -412,7 +409,7 @@ async def trigger_plan(body: PlanTriggerRequest): return Response(status_code=204) # If we have any work to do, we create a new scenario - scenario = create_scenario( + engine_scenario = create_scenario( session=session, scenario={ "name": body.scenario_name, diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 263115af..b1e3a43a 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -4,6 +4,7 @@ from typing import Optional class PlanTriggerRequest(BaseModel): budget: Optional[float] = None + # This can only have a fixed set of values goal: str housing_type: str goal_value: str diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index aeaf8abe..ed2d20b6 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -21,7 +21,7 @@ SCENARIOS = { { "portfolio_id": str(86), "housing_type": "Private", - "goal": "Increase EPC", + "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": "", "already_installed_file_path": "", @@ -36,7 +36,7 @@ SCENARIOS = { { "portfolio_id": str(86), "housing_type": "Private", - "goal": "Increase EPC", + "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": "", "already_installed_file_path": "", @@ -51,7 +51,7 @@ SCENARIOS = { { "portfolio_id": str(86), "housing_type": "Private", - "goal": "Increase EPC", + "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": "", "already_installed_file_path": "", @@ -72,7 +72,7 @@ SCENARIOS = { { "portfolio_id": str(87), "housing_type": "Private", - "goal": "Increase EPC", + "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": "", "already_installed_file_path": "", @@ -87,7 +87,7 @@ SCENARIOS = { { "portfolio_id": str(87), "housing_type": "Private", - "goal": "Increase EPC", + "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": "", "already_installed_file_path": "", diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 083a7c25..c1123e3d 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -9,7 +9,7 @@ def prepare_input_measures(property_recommendations, goal): """ goal_map = { - "Increase EPC": "sap_points" + "Increasing EPC": "sap_points" } goal_key = goal_map[goal] From b1f4f154ddb9371faa7cd49e9fbc52f02963bcbc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 20:00:32 +0100 Subject: [PATCH 47/49] Refactored recommendation uploading to return ids explicitly on upload --- .../app/db/functions/portfolio_functions.py | 30 +++-- .../db/functions/recommendations_functions.py | 104 +++++++++--------- backend/app/db/models/recommendations.py | 1 + backend/app/plan/router.py | 10 +- backend/app/plan/schemas.py | 3 +- etl/xml_survey_extraction/app.py | 8 +- 6 files changed, 84 insertions(+), 72 deletions(-) diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index 008c4b8b..ffdabfb6 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -1,10 +1,14 @@ from sqlalchemy import func -from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation -from backend.app.db.models.portfolio import Portfolio +from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario def aggregate_portfolio_recommendations( - session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict + session, + portfolio_id: int, + scenario_id: int, + total_valuation_increase: float, + labour_days: float, + aggregated_data: dict ): # Aggregate multiple fields aggregates = ( @@ -17,7 +21,11 @@ def aggregate_portfolio_recommendations( ) .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id) .join(Plan, Plan.id == PlanRecommendations.plan_id) - .filter(Plan.portfolio_id == portfolio_id, Plan.is_default == True, Recommendation.default == True) + .filter( + Plan.portfolio_id == portfolio_id, + Plan.scenario_id == scenario_id, + Recommendation.default == True + ) .one() ) @@ -30,16 +38,16 @@ def aggregate_portfolio_recommendations( **aggregated_data } - # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio - portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one() + # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio + portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() # Update the data for key, value in aggregates_dict.items(): - setattr(portfolio, key, value) + setattr(portfolio_scenario, key, value) # Insert total valuation increase and labour days - portfolio.property_valuation_increase = total_valuation_increase - portfolio.labour_days = labour_days + portfolio_scenario.property_valuation_increase = total_valuation_increase + portfolio_scenario.labour_days = labour_days - # Merge the updated portfolio back into the session - session.merge(portfolio) + # Merge the updated portfolio plan back into the session + session.merge(portfolio_scenario) session.flush() diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index c7765039..7ff09f22 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -95,62 +95,68 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids): session.execute(insert(PlanRecommendations).values(data)) -def upload_recommendations(session: Session, recommendations_to_upload, property_id): - # Prepare data for bulk insert for Recommendation - recommendations_data = [ - { - "property_id": property_id, - "type": rec["type"], - "description": rec["description"], - "estimated_cost": rec["total"], - "default": rec["default"], - "starting_u_value": rec.get("starting_u_value"), - "new_u_value": rec.get("new_u_value"), - "sap_points": rec["sap_points"], - "energy_savings": rec["heat_demand"], - "kwh_savings": rec["kwh_savings"], - "co2_equivalent_savings": rec["co2_equivalent_savings"], - "total_work_hours": rec["labour_hours"], - "energy_cost_savings": rec["energy_cost_savings"], - "labour_days": rec["labour_days"], - "already_installed": rec["already_installed"], - } - for rec in recommendations_to_upload - ] +def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id): + try: + # Prepare data for bulk insert for Recommendation + recommendations_data = [ + { + "property_id": property_id, + "type": rec["type"], + "description": rec["description"], + "estimated_cost": rec["total"], + "default": rec["default"], + "starting_u_value": rec.get("starting_u_value"), + "new_u_value": rec.get("new_u_value"), + "sap_points": rec["sap_points"], + "energy_savings": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "total_work_hours": rec["labour_hours"], + "energy_cost_savings": rec["energy_cost_savings"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + } + for rec in recommendations_to_upload + ] - session.bulk_insert_mappings(Recommendation, recommendations_data) + # Insert the recommendations, get back the IDs + stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data) + result = session.execute(stmt) + uploaded_recommendation_ids = [row[0] for row in result] - # To get the IDs of the newly inserted recommendations, we need to flush the session - session.flush() + # Prepare data for bulk insert for RecommendationMaterials + recommendation_materials_data = [ + { + "recommendation_id": recommendation_id, + "material_id": part["id"], + "depth": int(part["depth"]) if part["depth"] else None, + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["total"], + } + for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids) + for part in rec["parts"] + ] - # Map the uploaded_recommendation_ids with the original data for reference - uploaded_recommendation_ids = [rec.id for rec in session.query(Recommendation).filter( - Recommendation.property_id == property_id, - Recommendation.description.in_([rec["description"] for rec in recommendations_to_upload]) - )] + session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data) - # Prepare data for bulk insert for RecommendationMaterials - # We can have multiple materials per recommendation. The aggregation of the materials will total the - # recommendation figures - recommendation_materials_data = [ - { - "recommendation_id": recommendation_id, - "material_id": part["id"], - "depth": int(part["depth"]) if part["depth"] else None, - "quantity": part["quantity"], - "quantity_unit": part["quantity_unit"], - "estimated_cost": part["total"], - } - for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids) - for part in rec["parts"] - ] + # flush the changes to get the newly created IDs + session.flush() - session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data) + create_plan_recommendations( + session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids + ) - # flush the changes to get the newly created IDs - session.flush() + # Commit the transaction + session.commit() - return uploaded_recommendation_ids + return True + + except SQLAlchemyError as e: + # Rollback the transaction in case of an error + session.rollback() + print(f"An error occurred: {e}") + return False def clear_portfolio(session: Session, portfolio_id: int): diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 6eddae1f..6ccfe7f7 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -53,6 +53,7 @@ class Plan(Base): name = Column(String, nullable=True, default="") portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) + scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) is_default = Column(Boolean, nullable=False) valuation_increase_lower_bound = Column(Float) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4d73778e..a0d4e585 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -772,6 +772,7 @@ async def trigger_plan(body: PlanTriggerRequest): new_plan_id = create_plan(session, { "portfolio_id": body.portfolio_id, "property_id": p.id, + "scenario_id": engine_scenario.id, "is_default": True if p.is_new else False, "name": body.scenario_name, "valuation_increase_lower_bound": ( @@ -785,10 +786,8 @@ async def trigger_plan(body: PlanTriggerRequest): ), }) - uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id) - - create_plan_recommendations( - session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids + upload_recommendations( + session, recommendations_to_upload, p.id, new_plan_id ) property_valuation_increases.append( @@ -827,8 +826,7 @@ async def trigger_plan(body: PlanTriggerRequest): aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, - multi_plan=body.multi_plan, - + scenario_id=engine_scenario.id, total_valuation_increase=total_valuation_increase, labour_days=labour_days, aggregated_data=aggregated_data diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index b1e3a43a..108eb1ae 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -4,7 +4,6 @@ from typing import Optional class PlanTriggerRequest(BaseModel): budget: Optional[float] = None - # This can only have a fixed set of values goal: str housing_type: str goal_value: str @@ -36,7 +35,7 @@ class PlanTriggerRequest(BaseModel): "air_source_heat_pump", } - _allowed_goals = {"Increase EPC"} + _allowed_goals = {"Increasing EPC"} _allowed_housing_types = {"Social", "Private"} diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index ed2d20b6..a8bffc73 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -44,7 +44,7 @@ SCENARIOS = { "non_invasive_recommendations_file_path": "", "exclusions": ["floor_insulation", "fireplace"], "budget": None, - "Scenario Name": "Deep Retrofit", + "scenario_name": "Deep Retrofit", "multi_plan": True, }, # Scenario C, CWI, floor insulation, PV, AHSP @@ -59,7 +59,7 @@ SCENARIOS = { "non_invasive_recommendations_file_path": "", "exclusions": ["fireplace"], "budget": None, - "Scenario Name": "Whole House Retrofit", + "scenario_name": "Whole House Retrofit", "multi_plan": True, } ] @@ -80,7 +80,7 @@ SCENARIOS = { "non_invasive_recommendations_file_path": "", "exclusions": ["floor_insulation", "fireplace"], "budget": None, - "Scenario Name": "Deep Retrofit", + "scenario_name": "Deep Retrofit", "multi_plan": True, }, # Scenario B, floor insulation, PV, AHSP @@ -95,7 +95,7 @@ SCENARIOS = { "non_invasive_recommendations_file_path": "", "exclusions": ["fireplace"], "budget": None, - "Scenario Name": "Whole House Retrofit", + "scenario_name": "Whole House Retrofit", "multi_plan": True, } ] From 096915bf336b54c547cc80200cbe400abee31acc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Jul 2024 21:11:04 +0100 Subject: [PATCH 48/49] added missing fields to scenarios model --- .../app/db/functions/portfolio_functions.py | 1 + backend/app/db/models/recommendations.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index ffdabfb6..ac340ab5 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -40,6 +40,7 @@ def aggregate_portfolio_recommendations( # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() + # Update the data for key, value in aggregates_dict.items(): setattr(portfolio_scenario, key, value) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 6ccfe7f7..ed3f326e 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -85,3 +85,25 @@ class Scenario(Base): non_invasive_recommendations_file_path = Column(String) exclusions = Column(String) multi_plan = Column(Boolean, default=False) + + # Add in the fields we need, which were previously sitting at the portfolio level + cost = Column(Float) + total_work_hours = Column(Float) + energy_savings = Column(Float) + co2_equivalent_savings = Column(Float) + energy_cost_savings = Column(Float) + epc_breakdown_pre_retrofit = Column(String) + epc_breakdown_post_retrofit = Column(String) + number_of_properties = Column(BigInteger) + n_units_to_retrofit = Column(BigInteger) + co2_per_unit_pre_retrofit = Column(String) + co2_per_unit_post_retrofit = Column(String) + energy_bill_per_unit_pre_retrofit = Column(String) + energy_bill_per_unit_post_retrofit = Column(String) + energy_consumption_per_unit_pre_retrofit = Column(String) + energy_consumption_per_unit_post_retrofit = Column(String) + valuation_improvement_per_unit = Column(String) + cost_per_unit = Column(String) + cost_per_co2_saved = Column(String) + cost_per_sap_point = Column(String) + valuation_return_on_investment = Column(String) From 8596878fc010991c647695b13fa3de1abddb9ff3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 31 Jul 2024 11:59:29 +0100 Subject: [PATCH 49/49] recommendaions process working --- backend/app/db/functions/recommendations_functions.py | 6 ++++++ backend/app/db/models/recommendations.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 7ff09f22..b03909ee 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -33,6 +33,12 @@ def create_scenario(session: Session, scenario): :param scenario: dictionary of data representing a scenario to be created """ try: + + # Before creating a new scenario, we check if there is a scenario for this portfolio id already + # If there is, it means that any new scnario created will NOT be the default scenario + existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() + scenario["is_default"] = True if not existing_scenario else False + new_scenario = Scenario(**scenario) session.add(new_scenario) session.flush() diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index ed3f326e..a1743436 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -85,6 +85,7 @@ class Scenario(Base): non_invasive_recommendations_file_path = Column(String) exclusions = Column(String) multi_plan = Column(Boolean, default=False) + is_default = Column(Boolean, default=False, nullable=False) # Add in the fields we need, which were previously sitting at the portfolio level cost = Column(Float) @@ -107,3 +108,5 @@ class Scenario(Base): cost_per_co2_saved = Column(String) cost_per_sap_point = Column(String) valuation_return_on_investment = Column(String) + property_valuation_increase = Column(Float) + labour_days = Column(Float)