Model/etl/xml_survey_extraction/XmlParser.py
2024-11-04 11:24:02 +00:00

844 lines
34 KiB
Python

import re
import numpy as np
import usaddress
from datetime import datetime
from xml.dom.minidom import parseString
from backend.app.utils import sap_to_epc
from etl.xml_survey_extraction.pcdb import heating_data
PROPERTY_TYPE_LOOKUP = {
"0": "House",
"House": "House",
"2": "Flat",
"3": "Maisonette",
}
def get_house_number(address: str) -> str | None:
"""
This method will use the usaddress library to parse an address and extract the house number
:return:
"""
parsed = usaddress.parse(address)
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
if match:
# Return the first non-None group found
return next(g for g in match.groups() if g is not None)
else:
return None
# Remove training commas
parsed_house_number = parsed_house_number.replace(",", "")
return parsed_house_number
class XmlParser:
epc = {}
additional_data = {}
uprn = None
# heating/emissions information
space_heating_kwh = None
water_heating_kwh = None
heating_system = None
heating_controls = None
# Assessor details
surveyor_name = None
number_of_doors = None
number_of_insulated_doors = None
windows = None
# Property dimensions
number_of_floors = None
perimeter = None
heat_loss_perimeter = None
party_wall_length = None
total_floor_area = None
floor_height = None
insulation_wall_area = None
floor_dimensions = None
# The age band lookup is based on the country code
AGE_BAND_LOOKUP = {
# England & Wales
"EAW": {
"A": "England and Wales: before 1900",
"B": "England and Wales: 1900-1929",
"C": "England and Wales: 1930-1949",
"D": "England and Wales: 1950-1966",
"E": "England and Wales: 1967-1975",
"F": "England and Wales: 1976-1982",
"G": "England and Wales: 1983-1990",
"H": "England and Wales: 1991-1995",
"I": "England and Wales: 1996-2002",
"J": "England and Wales: 2003-2006",
"K": "England and Wales: 2007-2011",
"L": "England and Wales: 2012 onwards",
}
}
RATINGS_MAP = {
"0": "N/A",
"1": "Very Poor",
"2": "Poor",
"3": "Average",
"4": "Good",
"5": "Very Good"
}
MECHANICAL_VENTILATION_MAP = {
"0": "natural"
}
BUILT_FORM_MAP = {
"1": "Detached",
"2": "Semi-Detached",
"3": "End-Terrace",
"4": "Mid-Terrace",
}
GLAZED_AREA_MAP = {
"2": "More than Typical",
"4": "Much More Than Typical"
}
FUEL_TYPE_MAP = {
"26": "mains gas (not community)"
}
TRANSACTION_TYPE_MAP = {
"5": "Rented (social)",
"13": "ECO assessment",
"14": "Stock condition survey",
}
TENURE_MAP = {
"1": "Owner-occupied",
"2": "Rented (social)",
"3": "Rented (private)",
}
TARIFF_MAP = {
"1": "Dual",
"2": "Single",
"3": "Unknown"
}
def __init__(self, file, filekey, surveyor_company, uprn=None):
file.seek(0) # Ensure the file pointer is at the beginning
xml_string = file.read().decode('utf-8')
self.xml = parseString(xml_string)
self.filekey = filekey
self.surveyor_company = surveyor_company
# We check if we have a lig xml or rdsap xml
# We look for the presence of the Schema-Version-Original tag
self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
self.get_uprn(uprn)
@staticmethod
def get_node(node):
"""
Utility function to get the node value from the xml, where data might be optional
:return:
"""
node_first_child = node.firstChild
if node_first_child is None:
return None
return node_first_child.nodeValue
def run(self):
if not self.is_lig:
return
self.get_assessor_details()
self.get_heating_and_emissions_data()
# self.get_detailed_heating_specs()
# Building fabric
self.get_doors()
self.get_floor_dimensions()
self.get_windows()
# Get all of the EPC data
self.extract_epc()
# Put together all of the additional data we capture
self.extract_additional_data()
def _parse_heat_loss_corridor(self):
hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"}
if self.is_lig:
heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor')
else:
# For some reason, this tag is spelt incorrectly in the rdsap xml
heat_loss_corridor = self.get_node_value('FlatCoridor')
return hlc_lookup[heat_loss_corridor]
def _parse_heat_loss_corridor_length(self):
if self.is_lig:
return self.get_node_value('Unheated-Corridor-Length')
return self.get_node_value('FlatShelteredWallLength')
def _parse_flat_storey_count(self):
# in the EPR the tag is Storeys
if self.is_lig:
storeys = None
else:
storeys = self.get_node_value('Storeys')
return storeys
def _parse_flat_top_storey(self):
if self.is_lig:
return self.get_node_value('Top-Storey')
return None
def _parse_floor_level(self):
if self.is_lig:
flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0]
return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue
return None
def extract_epc(self):
if self.floor_dimensions is None:
raise ValueError("Run get_floor_dimensions() first")
if self.windows is None:
raise ValueError("Run get_windows() first")
property_type = self.get_property_type()
if property_type == "Flat":
heat_loss_corridor = self._parse_heat_loss_corridor()
unheated_corridor_length = self._parse_heat_loss_corridor_length()
flat_storey_count = self._parse_flat_storey_count()
flat_top_storey = self._parse_flat_top_storey()
floor_level = self._parse_floor_level()
else:
heat_loss_corridor = "NO DATA!"
unheated_corridor_length = ""
flat_storey_count = ""
flat_top_storey = ""
floor_level = "NO DATA!"
floor_height = np.mean([
float(x['room_height']) for x in self.floor_dimensions if
x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
])
# Take the most prevelant glazing type
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
glazed_type = max(glazed_type, key=glazed_type.count)
energy_tariff = (
self.xml.getElementsByTagName("SAP-Energy-Source")[0]
.getElementsByTagName("Meter-Type")[0]
.firstChild.nodeValue
)
energy_tariff = self.TARIFF_MAP[energy_tariff]
self.epc = {
"uprn": self.uprn,
"uprn-source": "Address Matched",
"property-type": property_type,
"building-reference-number": "",
**self.get_sap(),
**self.get_property_address(),
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
"construction-age-band": self.AGE_BAND_LOOKUP[
self.get_node_value('Country-Code')
][self.get_node_value('Construction-Age-Band')],
"mainheat-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
],
"windows-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
],
"lighting-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
],
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
"mainheatcont-description":
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"sheating-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating')
],
"local-authority": "", # Not included in the xml
"local-authority-label": "",
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
"energy-tariff": energy_tariff,
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
"number-heated-rooms": self.get_node_value('Heated-Room-Count'),
"floor-description": self.get_property_summary_value('Floor', 'Description'),
"energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
"built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
"number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
"windows-description": self.get_property_summary_value('Window', 'Description'),
"glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
"inspection-date": self.get_node_value('Inspection-Date'),
"mains-gas-flag": self.get_node_value('Mains-Gas'),
"co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
"heat-loss-corridor": heat_loss_corridor,
"unheated-corridor-length": unheated_corridor_length,
"flat-storey-count": flat_storey_count,
"roof-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
],
"total-floor-area": self.get_node_value('Total-Floor-Area'),
"environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
"roof-description": self.get_property_summary_value('Roof', 'Description'),
"floor-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
],
"number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
"hot-water-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
],
"mainheatc-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
],
"main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
"lighting-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
],
"windows-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
],
"floor-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
],
"sheating-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
],
"lighting-description": self.get_property_summary_value('Lighting', 'Description'),
"roof-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
],
"walls-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
],
"photo-supply": self.get_photo_supply(),
"lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
"mainheat-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
],
"multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
"main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"flat-top-storey": flat_top_storey,
"secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
"walls-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
],
"transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
"extension-count": self.get_node_value('Extensions-Count'),
"mainheatc-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
],
"lmk-key": "", # Doesn't exist for non-EPC xmls
"wind-turbine-count": self.get_node_value('Wind-Turbines-Count'),
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
"floor-level": floor_level,
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
"hot-water-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
],
"low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
"walls-description": self.get_property_summary_value('Wall', 'Description'),
"hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
"co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
"heating-cost-current": self.get_node_value('Heating-Cost-Current'),
"heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'),
"hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
"hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'),
"lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
"energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
"lodgement-date": self.get_node_value('Inspection-Date'),
"lodgement-datetime":
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
"floor-height": floor_height,
"glazed-type": glazed_type,
}
def get_insulation_wall_area(self):
"""
Extracts the insulation wall area for the main dwelling
Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we
don't currently calculate the insulation wall area for them, since it's not used in the recommendations.
"""
main_dwelling_floors = [
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
]
main_dwelling_windows = [
w for w in self.windows if w["window_location"] == "0"
]
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
if not window_areas:
# We discount 10% of the wall area
insulation_wall_area = wall_areas * 0.9
else:
insulation_wall_area = wall_areas - sum(window_areas)
return insulation_wall_area
def extract_additional_data(self):
self.insulation_wall_area = self.get_insulation_wall_area()
# We pull this out which is used as the insulation floor area
main_dwelling_ground_floor_area = [
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0"
][0]["total_floor_area"]
main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
number_of_windows = len(main_dwelling_windows)
windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
windows_area = sum(windows_area) if windows_area else None
boolean_lookup = {
"true": True,
"false": False,
"Y": True,
"N": False
}
cylinder_insulation_type = {
None: "",
"1": "Foam",
"2": "Jacket"
}
cylinder_insulation_thickness = int(
self.get_node_value('Cylinder-Insulation-Thickness')
) if self.get_node_value('Cylinder-Insulation-Thickness') else None
cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \
if self.get_node_value('Cylinder-Thermostat') else None
self.additional_data = {
"file_location": self.filekey,
"surveyor_name": self.surveyor_name,
"surveyor_company": self.surveyor_company,
"space_heating_kwh": self.space_heating_kwh,
"water_heating_kwh": self.water_heating_kwh,
# "heating_system": self.heating_system,
# "heating_controls": self.heating_controls,
"number_of_doors": self.number_of_doors,
"number_of_insulated_doors": self.number_of_insulated_doors,
"number_of_floors": self.number_of_floors,
"insulation_wall_area": self.insulation_wall_area,
"heat_loss_perimeter": self.heat_loss_perimeter,
"party_wall_length": self.party_wall_length,
"perimeter": self.perimeter,
"rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')),
"rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')),
"room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')),
"percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
"has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
"cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
"cylinder_insulation_thickness": cylinder_insulation_thickness,
"cylinder_thermostat": cylinder_thermostat,
"main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
"number_of_windows": int(number_of_windows),
"windows_area": float(windows_area) if windows_area is not None else windows_area,
}
def get_node_value(self, tag_name):
nodes = self.xml.getElementsByTagName(tag_name)
if nodes and nodes[0].firstChild:
return nodes[0].firstChild.nodeValue
return None
def get_node_value_from_floor_dimensions(self, tag_name):
nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_property_summary_value(self, section, tag_name):
nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_energy_assessment_value(self, tag_name):
nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
if nodes:
tag = nodes.getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_uprn(self, uprn):
if uprn is not None:
self.uprn = uprn
return
uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild
if uprn_tag is None:
self.uprn = -1
return
self.uprn = uprn_tag.nodeValue
# If all of the characters in the UPRN are 0, then there is not set UPRN
if self.uprn.count("0") == len(self.uprn):
self.uprn = 0
else:
self.uprn = self.uprn.lower().split("uprn-")[1]
def get_property_type(self):
if not self.xml:
raise ValueError("You need to read the file first")
property_type = self.xml.getElementsByTagName('Property-Type')
if not property_type:
property_type = self.xml.getElementsByTagName('PropertyType1')
if len(property_type) > 1:
property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
if len(property_types) > 1:
raise ValueError("Multiple property types found")
return property_types.pop()
return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
def get_sap(self):
sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
sap_score = int(sap_score[0].firstChild.nodeValue)
epc_rating = sap_to_epc(sap_score)
return {
"current-energy-efficiency": str(sap_score),
"current-energy-rating": epc_rating
}
def get_heating_and_emissions_data(self):
"""
This method will extract the following pieces of information:
1) Space heating requirement
2) Water heating requirement
3) CO2 emissions
4) Heat demand per square meter per year
5) Bills
:return:
"""
self.space_heating_kwh = self.xml.getElementsByTagName(
'Space-Heating-Existing-Dwelling'
)[0].firstChild.nodeValue
self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
def get_detailed_heating_specs(self):
"""
Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
system
:return:
"""
sap_main_heating_details = (
self.xml.getElementsByTagName('SAP-Heating')[0]
.getElementsByTagName("Main-Heating-Details")[0]
.getElementsByTagName("Main-Heating")[0]
)
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
# Get the heating system
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}"
# Get the heating controls
heating_controls_code = (
sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue
)
heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"]
heating_controls = (
heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}"
)
self.heating_system = heating_system
self.heating_controls = heating_controls
def get_doors(self):
# Doors can be found in the SAP-Property-Details tag
self.number_of_doors = int(
self.xml.getElementsByTagName('SAP-Property-Details')[0]
.getElementsByTagName('Door-Count')[0]
.firstChild.nodeValue
)
self.number_of_insulated_doors = int(
self.xml.getElementsByTagName('SAP-Property-Details')[0]
.getElementsByTagName('Insulated-Door-Count')[0]
.firstChild.nodeValue
)
def get_photo_supply(self):
photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
# Check if the "None-Or-No-Details" tag is present
if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
return (
photo_supply_tag.
getElementsByTagName("None-Or-No-Details")[0].
getElementsByTagName("Percent-Roof-Area")[0].
firstChild.nodeValue
)
else:
raise NotImplementedError("Implement me")
def get_assessor_details(self):
energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0]
self.surveyor_name = (
energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue
)
def get_property_address(self):
property_tag = self.xml.getElementsByTagName("Property")[0]
address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
address = ", ".join(
[x for x in [address1, address2, address3] if x is not None]
)
county = property_tag.getElementsByTagName("County")
if county:
county = county[0].firstChild.nodeValue
else:
county = ""
# Seems to be unavailable in the xml
constituency = None
constituency_label = None
return {
"address1": address1,
"address2": address2,
"address3": address3,
"posttown": posttown,
"postcode": postcode,
"address": address,
"county": county,
"constituency": constituency,
"constituency-label": constituency_label
}
def get_floor_dimensions(self):
"""
Extracts physical measurements of the property such as the floor area, room height, etc.
across the main dwelling and any extensions.
:return:
"""
def get_part_value(node, tag_name):
element = node.getElementsByTagName(tag_name)
if element and element[0].firstChild:
return element[0].firstChild.nodeValue
return None
# Each part will correspond to the main
sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part")
floor_dimensions = []
for building_part in sap_building_parts:
building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue
sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension")
data = [
{
'building_part_identifier': building_part_identifier,
'floor': get_part_value(floor_dimension, 'Floor'),
'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
'room_height': get_part_value(floor_dimension, 'Room-Height'),
"room_roof": False
} for floor_dimension in sap_floor_dimensions
]
room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
room_roof_data = [
{
"building_part_identifier": building_part_identifier,
"floor": str(max([int(d["floor"]) for d in data]) + 1),
"floor_construction": "",
"floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
"heat_loss_perimeter": "",
"party_wall_length": "",
"total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
"room_height": "",
"room_roof": True
} for rr in room_roofs
]
floor_dimensions.extend(data)
floor_dimensions.extend(room_roof_data)
self.floor_dimensions = floor_dimensions
self.number_of_floors = len(
[f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
)
# We extract the maximum heat loss perimeter, per building part
max_heat_loss_perimeters = {d['building_part_identifier']: max(
(float(x['heat_loss_perimeter']) for x in self.floor_dimensions if
x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']),
default=float('-inf')
) for d in self.floor_dimensions}
self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values())
max_party_walls = {
d['building_part_identifier']: max(
(float(x['party_wall_length']) for x in self.floor_dimensions if
x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']),
default=float('-inf')
) for d in self.floor_dimensions
}
self.party_wall_length = sum(max_party_walls.values())
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
@staticmethod
def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
# There may not be a pvc frame
pvc_frame = window.getElementsByTagName("PVC-Frame")
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
# There may not be a glazing gap for single glazed windows
glazing_gap = window.getElementsByTagName("Glazing-Gap")
glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
parsed = {
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
"glazing_type": glazing_type_lookup[
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
],
"pvc_frame": pvc_frame,
"glazing_gap": glazing_gap,
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
}
return parsed
def get_windows(self):
"""
Extracts data about the windows in the property, including the number of windows and the window type.
:return:
"""
glazing_type_lookup = {
"ND": "Single glazing",
"1": "double glazing installed before 2002",
"2": "double glazing installed during or after 2002",
"3": "double glazing, unknown install date",
"5": "Single glazing",
}
orientation_lookup = {
"1": "North",
"2": "North East",
"3": "East",
"4": "South East",
"5": "South",
"6": "South West",
"7": "West",
"8": "North West"
}
sap_windows = self.xml.getElementsByTagName("SAP-Windows")
if not sap_windows:
# We look for Multi-Glazed-Proportion
multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazing-Type"
)[0].firstChild.nodeValue
pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"PVC-Window-Frames"
)
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazed-Proportion"
)[0].firstChild.nodeValue
self.windows = [
{
"window_location": "0",
"window_area": None,
"window_type": None,
"glazing_type": glazing_type_lookup[multiple_glazing_type],
"pvc_frame": pvc_frame,
"glazing_gap": None,
"orientation": None,
"multple_glazed_proportion": multple_glazed_proportion
}
]
return
sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
self.windows = [
self._parse_windows_content(
window=window,
glazing_type_lookup=glazing_type_lookup,
orientation_lookup=orientation_lookup
) for window in sap_windows
]