import re import numpy as np import usaddress from datetime import datetime from xml.dom.minidom import parseString from backend.app.utils import sap_to_epc from etl.xml_survey_extraction.pcdb import heating_data PROPERTY_TYPE_LOOKUP = { "0": "House", "House": "House", "2": "Flat", "3": "Maisonette", } def get_house_number(address: str) -> str | None: """ This method will use the usaddress library to parse an address and extract the house number :return: """ parsed = usaddress.parse(address) parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")] parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None if parsed_house_number is None: # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat', # we also add a custom approach # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' match = re.search(pattern, address) if match: # Return the first non-None group found return next(g for g in match.groups() if g is not None) else: return None # Remove training commas parsed_house_number = parsed_house_number.replace(",", "") return parsed_house_number class XmlParser: epc = {} additional_data = {} uprn = None # heating/emissions information space_heating_kwh = None water_heating_kwh = None heating_system = None heating_controls = None # Assessor details surveyor_name = None number_of_doors = None number_of_insulated_doors = None windows = None # Property dimensions number_of_floors = None perimeter = None heat_loss_perimeter = None party_wall_length = None total_floor_area = None floor_height = None insulation_wall_area = None floor_dimensions = None # The age band lookup is based on the country code AGE_BAND_LOOKUP = { # England & Wales "EAW": { "A": "England and Wales: before 1900", "B": "England and Wales: 1900-1929", "C": "England and Wales: 1930-1949", "D": "England and Wales: 1950-1966", "E": "England and Wales: 1967-1975", "F": "England and Wales: 1976-1982", "G": "England and Wales: 1983-1990", "H": "England and Wales: 1991-1995", "I": "England and Wales: 1996-2002", "J": "England and Wales: 2003-2006", "K": "England and Wales: 2007-2011", "L": "England and Wales: 2012 onwards", } } RATINGS_MAP = { "0": "N/A", "1": "Very Poor", "2": "Poor", "3": "Average", "4": "Good", "5": "Very Good" } MECHANICAL_VENTILATION_MAP = { "0": "natural" } BUILT_FORM_MAP = { "1": "Detached", "2": "Semi-Detached", "3": "End-Terrace", "4": "Mid-Terrace", } GLAZED_AREA_MAP = { "2": "More than Typical", "4": "Much More Than Typical" } FUEL_TYPE_MAP = { "26": "mains gas (not community)" } TRANSACTION_TYPE_MAP = { "5": "Rented (social)", "13": "ECO assessment", "14": "Stock condition survey", } TENURE_MAP = { "1": "Owner-occupied", "2": "Rented (social)", "3": "Rented (private)", } TARIFF_MAP = { "1": "Dual", "2": "Single", "3": "Unknown" } def __init__(self, file, filekey, surveyor_company, uprn=None): file.seek(0) # Ensure the file pointer is at the beginning xml_string = file.read().decode('utf-8') self.xml = parseString(xml_string) self.filekey = filekey self.surveyor_company = surveyor_company # We check if we have a lig xml or rdsap xml # We look for the presence of the Schema-Version-Original tag self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0 self.get_uprn(uprn) @staticmethod def get_node(node): """ Utility function to get the node value from the xml, where data might be optional :return: """ node_first_child = node.firstChild if node_first_child is None: return None return node_first_child.nodeValue def run(self): if not self.is_lig: return self.get_assessor_details() self.get_heating_and_emissions_data() # self.get_detailed_heating_specs() # Building fabric self.get_doors() self.get_floor_dimensions() self.get_windows() # Get all of the EPC data self.extract_epc() # Put together all of the additional data we capture self.extract_additional_data() def _parse_heat_loss_corridor(self): hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"} if self.is_lig: heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor') else: # For some reason, this tag is spelt incorrectly in the rdsap xml heat_loss_corridor = self.get_node_value('FlatCoridor') return hlc_lookup[heat_loss_corridor] def _parse_heat_loss_corridor_length(self): if self.is_lig: return self.get_node_value('Unheated-Corridor-Length') return self.get_node_value('FlatShelteredWallLength') def _parse_flat_storey_count(self): # in the EPR the tag is Storeys if self.is_lig: storeys = None else: storeys = self.get_node_value('Storeys') return storeys def _parse_flat_top_storey(self): if self.is_lig: return self.get_node_value('Top-Storey') return None def _parse_floor_level(self): if self.is_lig: flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0] return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue return None def extract_epc(self): if self.floor_dimensions is None: raise ValueError("Run get_floor_dimensions() first") if self.windows is None: raise ValueError("Run get_windows() first") property_type = self.get_property_type() if property_type == "Flat": heat_loss_corridor = self._parse_heat_loss_corridor() unheated_corridor_length = self._parse_heat_loss_corridor_length() flat_storey_count = self._parse_flat_storey_count() flat_top_storey = self._parse_flat_top_storey() floor_level = self._parse_floor_level() else: heat_loss_corridor = "NO DATA!" unheated_corridor_length = "" flat_storey_count = "" flat_top_storey = "" floor_level = "NO DATA!" floor_height = np.mean([ float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof'] ]) # Take the most prevelant glazing type glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0'] glazed_type = max(glazed_type, key=glazed_type.count) energy_tariff = ( self.xml.getElementsByTagName("SAP-Energy-Source")[0] .getElementsByTagName("Meter-Type")[0] .firstChild.nodeValue ) energy_tariff = self.TARIFF_MAP[energy_tariff] self.epc = { "uprn": self.uprn, "uprn-source": "Address Matched", "property-type": property_type, "building-reference-number": "", **self.get_sap(), **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), "construction-age-band": self.AGE_BAND_LOOKUP[ self.get_node_value('Country-Code') ][self.get_node_value('Construction-Age-Band')], "mainheat-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating') ], "windows-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating') ], "lighting-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating') ], "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'), "mainheatcont-description": self.get_property_summary_value('Main-Heating-Controls', 'Description'), "sheating-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating') ], "local-authority": "", # Not included in the xml "local-authority-label": "", "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'), "energy-tariff": energy_tariff, "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')], "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'), "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'), "number-heated-rooms": self.get_node_value('Heated-Room-Count'), "floor-description": self.get_property_summary_value('Floor', 'Description'), "energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'), "built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')], "number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'), "windows-description": self.get_property_summary_value('Window', 'Description'), "glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')], "inspection-date": self.get_node_value('Inspection-Date'), "mains-gas-flag": self.get_node_value('Mains-Gas'), "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'), "heat-loss-corridor": heat_loss_corridor, "unheated-corridor-length": unheated_corridor_length, "flat-storey-count": flat_storey_count, "roof-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating') ], "total-floor-area": self.get_node_value('Total-Floor-Area'), "environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'), "roof-description": self.get_property_summary_value('Roof', 'Description'), "floor-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating') ], "number-habitable-rooms": self.get_node_value('Habitable-Room-Count'), "hot-water-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating') ], "mainheatc-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating') ], "main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')], "lighting-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating') ], "windows-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Window', 'Energy-Efficiency-Rating') ], "floor-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating') ], "sheating-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating') ], "lighting-description": self.get_property_summary_value('Lighting', 'Description'), "roof-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating') ], "walls-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating') ], "photo-supply": self.get_photo_supply(), "lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'), "mainheat-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating') ], "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'), "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'), "flat-top-storey": flat_top_storey, "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'), "walls-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating') ], "transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')], "extension-count": self.get_node_value('Extensions-Count'), "mainheatc-env-eff": self.RATINGS_MAP[ self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating') ], "lmk-key": "", # Doesn't exist for non-EPC xmls "wind-turbine-count": self.get_node_value('Wind-Turbines-Count'), "tenure": self.TENURE_MAP[self.get_node_value('Tenure')], "floor-level": floor_level, "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'), "potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))), "hot-water-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating') ], "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'), "walls-description": self.get_property_summary_value('Wall', 'Description'), "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'), "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'), "heating-cost-current": self.get_node_value('Heating-Cost-Current'), "heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'), "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'), "hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'), "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'), "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'), "lodgement-date": self.get_node_value('Inspection-Date'), "lodgement-datetime": datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(), "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'), "floor-height": floor_height, "glazed-type": glazed_type, } def get_insulation_wall_area(self): """ Extracts the insulation wall area for the main dwelling Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we don't currently calculate the insulation wall area for them, since it's not used in the recommendations. """ main_dwelling_floors = [ f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"] ] main_dwelling_windows = [ w for w in self.windows if w["window_location"] == "0" ] wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors]) window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None] if not window_areas: # We discount 10% of the wall area insulation_wall_area = wall_areas * 0.9 else: insulation_wall_area = wall_areas - sum(window_areas) return insulation_wall_area def extract_additional_data(self): self.insulation_wall_area = self.get_insulation_wall_area() # We pull this out which is used as the insulation floor area main_dwelling_ground_floor_area = [ f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0" ][0]["total_floor_area"] main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"] number_of_windows = len(main_dwelling_windows) windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None] windows_area = sum(windows_area) if windows_area else None boolean_lookup = { "true": True, "false": False, "Y": True, "N": False } cylinder_insulation_type = { None: "", "1": "Foam", "2": "Jacket" } cylinder_insulation_thickness = int( self.get_node_value('Cylinder-Insulation-Thickness') ) if self.get_node_value('Cylinder-Insulation-Thickness') else None cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \ if self.get_node_value('Cylinder-Thermostat') else None self.additional_data = { "file_location": self.filekey, "surveyor_name": self.surveyor_name, "surveyor_company": self.surveyor_company, "space_heating_kwh": self.space_heating_kwh, "water_heating_kwh": self.water_heating_kwh, # "heating_system": self.heating_system, # "heating_controls": self.heating_controls, "number_of_doors": self.number_of_doors, "number_of_insulated_doors": self.number_of_insulated_doors, "number_of_floors": self.number_of_floors, "insulation_wall_area": self.insulation_wall_area, "heat_loss_perimeter": self.heat_loss_perimeter, "party_wall_length": self.party_wall_length, "perimeter": self.perimeter, "rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')), "rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')), "room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')), "percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')), "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')], "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')], "cylinder_insulation_thickness": cylinder_insulation_thickness, "cylinder_thermostat": cylinder_thermostat, "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area), "number_of_windows": int(number_of_windows), "windows_area": float(windows_area) if windows_area is not None else windows_area, } def get_node_value(self, tag_name): nodes = self.xml.getElementsByTagName(tag_name) if nodes and nodes[0].firstChild: return nodes[0].firstChild.nodeValue return None def get_node_value_from_floor_dimensions(self, tag_name): nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension') if nodes: tag = nodes[0].getElementsByTagName(tag_name) if tag and tag[0].firstChild: return tag[0].firstChild.nodeValue return None def get_property_summary_value(self, section, tag_name): nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section) if nodes: tag = nodes[0].getElementsByTagName(tag_name) if tag and tag[0].firstChild: return tag[0].firstChild.nodeValue return None def get_energy_assessment_value(self, tag_name): nodes = self.xml.getElementsByTagName('Energy-Assessment')[0] if nodes: tag = nodes.getElementsByTagName(tag_name) if tag and tag[0].firstChild: return tag[0].firstChild.nodeValue return None def get_uprn(self, uprn): if uprn is not None: self.uprn = uprn return uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild if uprn_tag is None: self.uprn = -1 return self.uprn = uprn_tag.nodeValue # If all of the characters in the UPRN are 0, then there is not set UPRN if self.uprn.count("0") == len(self.uprn): self.uprn = 0 else: self.uprn = self.uprn.lower().split("uprn-")[1] def get_property_type(self): if not self.xml: raise ValueError("You need to read the file first") property_type = self.xml.getElementsByTagName('Property-Type') if not property_type: property_type = self.xml.getElementsByTagName('PropertyType1') if len(property_type) > 1: property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type} if len(property_types) > 1: raise ValueError("Multiple property types found") return property_types.pop() return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue] def get_sap(self): sap_score = self.xml.getElementsByTagName('Energy-Rating-Current') sap_score = int(sap_score[0].firstChild.nodeValue) epc_rating = sap_to_epc(sap_score) return { "current-energy-efficiency": str(sap_score), "current-energy-rating": epc_rating } def get_heating_and_emissions_data(self): """ This method will extract the following pieces of information: 1) Space heating requirement 2) Water heating requirement 3) CO2 emissions 4) Heat demand per square meter per year 5) Bills :return: """ self.space_heating_kwh = self.xml.getElementsByTagName( 'Space-Heating-Existing-Dwelling' )[0].firstChild.nodeValue self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue def get_detailed_heating_specs(self): """ Given the heating data that is found in the tag, we extract the detailed about the heating system :return: """ sap_main_heating_details = ( self.xml.getElementsByTagName('SAP-Heating')[0] .getElementsByTagName("Main-Heating-Details")[0] .getElementsByTagName("Main-Heating")[0] ) heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue # Get the heating system heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"] heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}" # Get the heating controls heating_controls_code = ( sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue ) heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"] heating_controls = ( heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}" ) self.heating_system = heating_system self.heating_controls = heating_controls def get_doors(self): # Doors can be found in the SAP-Property-Details tag self.number_of_doors = int( self.xml.getElementsByTagName('SAP-Property-Details')[0] .getElementsByTagName('Door-Count')[0] .firstChild.nodeValue ) self.number_of_insulated_doors = int( self.xml.getElementsByTagName('SAP-Property-Details')[0] .getElementsByTagName('Insulated-Door-Count')[0] .firstChild.nodeValue ) def get_photo_supply(self): photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0] # Check if the "None-Or-No-Details" tag is present if photo_supply_tag.getElementsByTagName("None-Or-No-Details"): return ( photo_supply_tag. getElementsByTagName("None-Or-No-Details")[0]. getElementsByTagName("Percent-Roof-Area")[0]. firstChild.nodeValue ) else: raise NotImplementedError("Implement me") def get_assessor_details(self): energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0] self.surveyor_name = ( energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue ) def get_property_address(self): property_tag = self.xml.getElementsByTagName("Property")[0] address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) address = ", ".join( [x for x in [address1, address2, address3] if x is not None] ) county = property_tag.getElementsByTagName("County") if county: county = county[0].firstChild.nodeValue else: county = "" # Seems to be unavailable in the xml constituency = None constituency_label = None return { "address1": address1, "address2": address2, "address3": address3, "posttown": posttown, "postcode": postcode, "address": address, "county": county, "constituency": constituency, "constituency-label": constituency_label } def get_floor_dimensions(self): """ Extracts physical measurements of the property such as the floor area, room height, etc. across the main dwelling and any extensions. :return: """ def get_part_value(node, tag_name): element = node.getElementsByTagName(tag_name) if element and element[0].firstChild: return element[0].firstChild.nodeValue return None # Each part will correspond to the main sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part") floor_dimensions = [] for building_part in sap_building_parts: building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension") data = [ { 'building_part_identifier': building_part_identifier, 'floor': get_part_value(floor_dimension, 'Floor'), 'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'), 'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'), 'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'), 'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'), 'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'), 'room_height': get_part_value(floor_dimension, 'Room-Height'), "room_roof": False } for floor_dimension in sap_floor_dimensions ] room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof") room_roof_data = [ { "building_part_identifier": building_part_identifier, "floor": str(max([int(d["floor"]) for d in data]) + 1), "floor_construction": "", "floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue, "heat_loss_perimeter": "", "party_wall_length": "", "total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue, "room_height": "", "room_roof": True } for rr in room_roofs ] floor_dimensions.extend(data) floor_dimensions.extend(room_roof_data) self.floor_dimensions = floor_dimensions self.number_of_floors = len( [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"] ) # We extract the maximum heat loss perimeter, per building part max_heat_loss_perimeters = {d['building_part_identifier']: max( (float(x['heat_loss_perimeter']) for x in self.floor_dimensions if x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']), default=float('-inf') ) for d in self.floor_dimensions} self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values()) max_party_walls = { d['building_part_identifier']: max( (float(x['party_wall_length']) for x in self.floor_dimensions if x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']), default=float('-inf') ) for d in self.floor_dimensions } self.party_wall_length = sum(max_party_walls.values()) self.perimeter = self.heat_loss_perimeter + self.party_wall_length @staticmethod def _parse_windows_content(window, glazing_type_lookup, orientation_lookup): # There may not be a pvc frame pvc_frame = window.getElementsByTagName("PVC-Frame") pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None # There may not be a glazing gap for single glazed windows glazing_gap = window.getElementsByTagName("Glazing-Gap") glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None parsed = { "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue, "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue, "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue, "glazing_type": glazing_type_lookup[ window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue ], "pvc_frame": pvc_frame, "glazing_gap": glazing_gap, "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue] } return parsed def get_windows(self): """ Extracts data about the windows in the property, including the number of windows and the window type. :return: """ glazing_type_lookup = { "ND": "Single glazing", "1": "double glazing installed before 2002", "2": "double glazing installed during or after 2002", "3": "double glazing, unknown install date", "5": "Single glazing", } orientation_lookup = { "1": "North", "2": "North East", "3": "East", "4": "South East", "5": "South", "6": "South West", "7": "West", "8": "North West" } sap_windows = self.xml.getElementsByTagName("SAP-Windows") if not sap_windows: # We look for Multi-Glazed-Proportion multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName( "Multiple-Glazing-Type" )[0].firstChild.nodeValue pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName( "PVC-Window-Frames" ) pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName( "Multiple-Glazed-Proportion" )[0].firstChild.nodeValue self.windows = [ { "window_location": "0", "window_area": None, "window_type": None, "glazing_type": glazing_type_lookup[multiple_glazing_type], "pvc_frame": pvc_frame, "glazing_gap": None, "orientation": None, "multple_glazed_proportion": multple_glazed_proportion } ] return sap_windows = sap_windows[0].getElementsByTagName("SAP-Window") self.windows = [ self._parse_windows_content( window=window, glazing_type_lookup=glazing_type_lookup, orientation_lookup=orientation_lookup ) for window in sap_windows ]