mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
844 lines
34 KiB
Python
844 lines
34 KiB
Python
import re
|
|
import numpy as np
|
|
import usaddress
|
|
from datetime import datetime
|
|
from xml.dom.minidom import parseString
|
|
from backend.app.utils import sap_to_epc
|
|
from etl.xml_survey_extraction.pcdb import heating_data
|
|
|
|
PROPERTY_TYPE_LOOKUP = {
|
|
"0": "House",
|
|
"House": "House",
|
|
"2": "Flat",
|
|
"3": "Maisonette",
|
|
}
|
|
|
|
|
|
def get_house_number(address: str) -> str | None:
|
|
"""
|
|
This method will use the usaddress library to parse an address and extract the house number
|
|
:return:
|
|
"""
|
|
|
|
parsed = usaddress.parse(address)
|
|
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
|
|
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
|
|
|
|
if parsed_house_number is None:
|
|
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
|
|
# we also add a custom approach
|
|
|
|
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
|
|
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
|
|
|
|
match = re.search(pattern, address)
|
|
|
|
if match:
|
|
# Return the first non-None group found
|
|
return next(g for g in match.groups() if g is not None)
|
|
else:
|
|
return None
|
|
|
|
# Remove training commas
|
|
parsed_house_number = parsed_house_number.replace(",", "")
|
|
|
|
return parsed_house_number
|
|
|
|
|
|
class XmlParser:
|
|
epc = {}
|
|
additional_data = {}
|
|
uprn = None
|
|
|
|
# heating/emissions information
|
|
space_heating_kwh = None
|
|
water_heating_kwh = None
|
|
heating_system = None
|
|
heating_controls = None
|
|
|
|
# Assessor details
|
|
surveyor_name = None
|
|
|
|
number_of_doors = None
|
|
number_of_insulated_doors = None
|
|
windows = None
|
|
|
|
# Property dimensions
|
|
number_of_floors = None
|
|
perimeter = None
|
|
heat_loss_perimeter = None
|
|
party_wall_length = None
|
|
total_floor_area = None
|
|
floor_height = None
|
|
insulation_wall_area = None
|
|
|
|
floor_dimensions = None
|
|
|
|
# The age band lookup is based on the country code
|
|
AGE_BAND_LOOKUP = {
|
|
# England & Wales
|
|
"EAW": {
|
|
"A": "England and Wales: before 1900",
|
|
"B": "England and Wales: 1900-1929",
|
|
"C": "England and Wales: 1930-1949",
|
|
"D": "England and Wales: 1950-1966",
|
|
"E": "England and Wales: 1967-1975",
|
|
"F": "England and Wales: 1976-1982",
|
|
"G": "England and Wales: 1983-1990",
|
|
"H": "England and Wales: 1991-1995",
|
|
"I": "England and Wales: 1996-2002",
|
|
"J": "England and Wales: 2003-2006",
|
|
"K": "England and Wales: 2007-2011",
|
|
"L": "England and Wales: 2012 onwards",
|
|
}
|
|
}
|
|
|
|
RATINGS_MAP = {
|
|
"0": "N/A",
|
|
"1": "Very Poor",
|
|
"2": "Poor",
|
|
"3": "Average",
|
|
"4": "Good",
|
|
"5": "Very Good"
|
|
}
|
|
|
|
MECHANICAL_VENTILATION_MAP = {
|
|
"0": "natural"
|
|
}
|
|
|
|
BUILT_FORM_MAP = {
|
|
"1": "Detached",
|
|
"2": "Semi-Detached",
|
|
"3": "End-Terrace",
|
|
"4": "Mid-Terrace",
|
|
}
|
|
|
|
GLAZED_AREA_MAP = {
|
|
"2": "More than Typical",
|
|
"4": "Much More Than Typical"
|
|
}
|
|
|
|
FUEL_TYPE_MAP = {
|
|
"26": "mains gas (not community)"
|
|
}
|
|
|
|
TRANSACTION_TYPE_MAP = {
|
|
"5": "Rented (social)",
|
|
"13": "ECO assessment",
|
|
"14": "Stock condition survey",
|
|
}
|
|
|
|
TENURE_MAP = {
|
|
"1": "Owner-occupied",
|
|
"2": "Rented (social)",
|
|
"3": "Rented (private)",
|
|
}
|
|
|
|
TARIFF_MAP = {
|
|
"1": "Dual",
|
|
"2": "Single",
|
|
"3": "Unknown"
|
|
}
|
|
|
|
def __init__(self, file, filekey, surveyor_company, uprn=None):
|
|
file.seek(0) # Ensure the file pointer is at the beginning
|
|
xml_string = file.read().decode('utf-8')
|
|
self.xml = parseString(xml_string)
|
|
self.filekey = filekey
|
|
self.surveyor_company = surveyor_company
|
|
|
|
# We check if we have a lig xml or rdsap xml
|
|
# We look for the presence of the Schema-Version-Original tag
|
|
self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
|
|
|
|
self.get_uprn(uprn)
|
|
|
|
@staticmethod
|
|
def get_node(node):
|
|
"""
|
|
Utility function to get the node value from the xml, where data might be optional
|
|
:return:
|
|
"""
|
|
|
|
node_first_child = node.firstChild
|
|
if node_first_child is None:
|
|
return None
|
|
|
|
return node_first_child.nodeValue
|
|
|
|
def run(self):
|
|
|
|
if not self.is_lig:
|
|
return
|
|
|
|
self.get_assessor_details()
|
|
|
|
self.get_heating_and_emissions_data()
|
|
|
|
# self.get_detailed_heating_specs()
|
|
|
|
# Building fabric
|
|
self.get_doors()
|
|
|
|
self.get_floor_dimensions()
|
|
|
|
self.get_windows()
|
|
|
|
# Get all of the EPC data
|
|
self.extract_epc()
|
|
|
|
# Put together all of the additional data we capture
|
|
self.extract_additional_data()
|
|
|
|
def _parse_heat_loss_corridor(self):
|
|
hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"}
|
|
if self.is_lig:
|
|
heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor')
|
|
else:
|
|
# For some reason, this tag is spelt incorrectly in the rdsap xml
|
|
heat_loss_corridor = self.get_node_value('FlatCoridor')
|
|
return hlc_lookup[heat_loss_corridor]
|
|
|
|
def _parse_heat_loss_corridor_length(self):
|
|
if self.is_lig:
|
|
return self.get_node_value('Unheated-Corridor-Length')
|
|
return self.get_node_value('FlatShelteredWallLength')
|
|
|
|
def _parse_flat_storey_count(self):
|
|
# in the EPR the tag is Storeys
|
|
if self.is_lig:
|
|
storeys = None
|
|
else:
|
|
storeys = self.get_node_value('Storeys')
|
|
return storeys
|
|
|
|
def _parse_flat_top_storey(self):
|
|
if self.is_lig:
|
|
return self.get_node_value('Top-Storey')
|
|
return None
|
|
|
|
def _parse_floor_level(self):
|
|
if self.is_lig:
|
|
flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0]
|
|
return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue
|
|
return None
|
|
|
|
def extract_epc(self):
|
|
|
|
if self.floor_dimensions is None:
|
|
raise ValueError("Run get_floor_dimensions() first")
|
|
|
|
if self.windows is None:
|
|
raise ValueError("Run get_windows() first")
|
|
|
|
property_type = self.get_property_type()
|
|
|
|
if property_type == "Flat":
|
|
heat_loss_corridor = self._parse_heat_loss_corridor()
|
|
unheated_corridor_length = self._parse_heat_loss_corridor_length()
|
|
flat_storey_count = self._parse_flat_storey_count()
|
|
flat_top_storey = self._parse_flat_top_storey()
|
|
floor_level = self._parse_floor_level()
|
|
|
|
else:
|
|
heat_loss_corridor = "NO DATA!"
|
|
unheated_corridor_length = ""
|
|
flat_storey_count = ""
|
|
flat_top_storey = ""
|
|
floor_level = "NO DATA!"
|
|
|
|
floor_height = np.mean([
|
|
float(x['room_height']) for x in self.floor_dimensions if
|
|
x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
|
|
])
|
|
|
|
# Take the most prevelant glazing type
|
|
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
|
|
glazed_type = max(glazed_type, key=glazed_type.count)
|
|
|
|
energy_tariff = (
|
|
self.xml.getElementsByTagName("SAP-Energy-Source")[0]
|
|
.getElementsByTagName("Meter-Type")[0]
|
|
.firstChild.nodeValue
|
|
)
|
|
energy_tariff = self.TARIFF_MAP[energy_tariff]
|
|
|
|
self.epc = {
|
|
"uprn": self.uprn,
|
|
"uprn-source": "Address Matched",
|
|
"property-type": property_type,
|
|
"building-reference-number": "",
|
|
**self.get_sap(),
|
|
**self.get_property_address(),
|
|
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
|
|
"construction-age-band": self.AGE_BAND_LOOKUP[
|
|
self.get_node_value('Country-Code')
|
|
][self.get_node_value('Construction-Age-Band')],
|
|
"mainheat-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
|
|
],
|
|
"windows-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"lighting-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
|
|
],
|
|
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
|
|
"mainheatcont-description":
|
|
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
|
|
"sheating-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating')
|
|
],
|
|
"local-authority": "", # Not included in the xml
|
|
"local-authority-label": "",
|
|
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
|
|
"energy-tariff": energy_tariff,
|
|
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
|
|
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
|
|
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
|
|
"number-heated-rooms": self.get_node_value('Heated-Room-Count'),
|
|
"floor-description": self.get_property_summary_value('Floor', 'Description'),
|
|
"energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
|
|
"built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
|
|
"number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
|
|
"windows-description": self.get_property_summary_value('Window', 'Description'),
|
|
"glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
|
|
"inspection-date": self.get_node_value('Inspection-Date'),
|
|
"mains-gas-flag": self.get_node_value('Mains-Gas'),
|
|
"co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
|
|
"heat-loss-corridor": heat_loss_corridor,
|
|
"unheated-corridor-length": unheated_corridor_length,
|
|
"flat-storey-count": flat_storey_count,
|
|
"roof-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
|
|
],
|
|
"total-floor-area": self.get_node_value('Total-Floor-Area'),
|
|
"environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
|
|
"roof-description": self.get_property_summary_value('Roof', 'Description'),
|
|
"floor-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
|
|
],
|
|
"number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
|
|
"hot-water-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"mainheatc-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
|
|
],
|
|
"main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
|
|
"lighting-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"windows-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
|
|
],
|
|
"floor-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"sheating-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"lighting-description": self.get_property_summary_value('Lighting', 'Description'),
|
|
"roof-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"walls-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
|
|
],
|
|
"photo-supply": self.get_photo_supply(),
|
|
"lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
|
|
"mainheat-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
|
|
"main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
|
|
"flat-top-storey": flat_top_storey,
|
|
"secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
|
|
"walls-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
|
|
"extension-count": self.get_node_value('Extensions-Count'),
|
|
"mainheatc-env-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
|
|
],
|
|
"lmk-key": "", # Doesn't exist for non-EPC xmls
|
|
"wind-turbine-count": self.get_node_value('Wind-Turbines-Count'),
|
|
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
|
|
"floor-level": floor_level,
|
|
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
|
|
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
|
|
"hot-water-energy-eff": self.RATINGS_MAP[
|
|
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
|
|
],
|
|
"low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
|
|
"walls-description": self.get_property_summary_value('Wall', 'Description'),
|
|
"hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
|
|
"co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
|
|
"heating-cost-current": self.get_node_value('Heating-Cost-Current'),
|
|
"heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'),
|
|
"hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
|
|
"hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'),
|
|
"lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
|
|
"energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
|
|
"lodgement-date": self.get_node_value('Inspection-Date'),
|
|
"lodgement-datetime":
|
|
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
|
|
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
|
|
"floor-height": floor_height,
|
|
"glazed-type": glazed_type,
|
|
}
|
|
|
|
def get_insulation_wall_area(self):
|
|
"""
|
|
Extracts the insulation wall area for the main dwelling
|
|
|
|
Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we
|
|
don't currently calculate the insulation wall area for them, since it's not used in the recommendations.
|
|
|
|
"""
|
|
|
|
main_dwelling_floors = [
|
|
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
|
|
]
|
|
main_dwelling_windows = [
|
|
w for w in self.windows if w["window_location"] == "0"
|
|
]
|
|
|
|
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
|
|
window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
|
|
if not window_areas:
|
|
# We discount 10% of the wall area
|
|
insulation_wall_area = wall_areas * 0.9
|
|
else:
|
|
insulation_wall_area = wall_areas - sum(window_areas)
|
|
return insulation_wall_area
|
|
|
|
def extract_additional_data(self):
|
|
|
|
self.insulation_wall_area = self.get_insulation_wall_area()
|
|
|
|
# We pull this out which is used as the insulation floor area
|
|
main_dwelling_ground_floor_area = [
|
|
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0"
|
|
][0]["total_floor_area"]
|
|
|
|
main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
|
|
|
|
number_of_windows = len(main_dwelling_windows)
|
|
windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
|
|
windows_area = sum(windows_area) if windows_area else None
|
|
|
|
boolean_lookup = {
|
|
"true": True,
|
|
"false": False,
|
|
"Y": True,
|
|
"N": False
|
|
}
|
|
|
|
cylinder_insulation_type = {
|
|
None: "",
|
|
"1": "Foam",
|
|
"2": "Jacket"
|
|
}
|
|
|
|
cylinder_insulation_thickness = int(
|
|
self.get_node_value('Cylinder-Insulation-Thickness')
|
|
) if self.get_node_value('Cylinder-Insulation-Thickness') else None
|
|
|
|
cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \
|
|
if self.get_node_value('Cylinder-Thermostat') else None
|
|
|
|
self.additional_data = {
|
|
"file_location": self.filekey,
|
|
"surveyor_name": self.surveyor_name,
|
|
"surveyor_company": self.surveyor_company,
|
|
"space_heating_kwh": self.space_heating_kwh,
|
|
"water_heating_kwh": self.water_heating_kwh,
|
|
# "heating_system": self.heating_system,
|
|
# "heating_controls": self.heating_controls,
|
|
"number_of_doors": self.number_of_doors,
|
|
"number_of_insulated_doors": self.number_of_insulated_doors,
|
|
"number_of_floors": self.number_of_floors,
|
|
"insulation_wall_area": self.insulation_wall_area,
|
|
"heat_loss_perimeter": self.heat_loss_perimeter,
|
|
"party_wall_length": self.party_wall_length,
|
|
"perimeter": self.perimeter,
|
|
"rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')),
|
|
"rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')),
|
|
"room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')),
|
|
"percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
|
|
"has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
|
|
"cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
|
|
"cylinder_insulation_thickness": cylinder_insulation_thickness,
|
|
"cylinder_thermostat": cylinder_thermostat,
|
|
"main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
|
|
"number_of_windows": int(number_of_windows),
|
|
"windows_area": float(windows_area) if windows_area is not None else windows_area,
|
|
}
|
|
|
|
def get_node_value(self, tag_name):
|
|
nodes = self.xml.getElementsByTagName(tag_name)
|
|
if nodes and nodes[0].firstChild:
|
|
return nodes[0].firstChild.nodeValue
|
|
return None
|
|
|
|
def get_node_value_from_floor_dimensions(self, tag_name):
|
|
nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
|
|
if nodes:
|
|
tag = nodes[0].getElementsByTagName(tag_name)
|
|
if tag and tag[0].firstChild:
|
|
return tag[0].firstChild.nodeValue
|
|
return None
|
|
|
|
def get_property_summary_value(self, section, tag_name):
|
|
nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
|
|
if nodes:
|
|
tag = nodes[0].getElementsByTagName(tag_name)
|
|
if tag and tag[0].firstChild:
|
|
return tag[0].firstChild.nodeValue
|
|
return None
|
|
|
|
def get_energy_assessment_value(self, tag_name):
|
|
nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
|
|
if nodes:
|
|
tag = nodes.getElementsByTagName(tag_name)
|
|
if tag and tag[0].firstChild:
|
|
return tag[0].firstChild.nodeValue
|
|
return None
|
|
|
|
def get_uprn(self, uprn):
|
|
|
|
if uprn is not None:
|
|
self.uprn = uprn
|
|
return
|
|
|
|
uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild
|
|
if uprn_tag is None:
|
|
self.uprn = -1
|
|
return
|
|
|
|
self.uprn = uprn_tag.nodeValue
|
|
# If all of the characters in the UPRN are 0, then there is not set UPRN
|
|
if self.uprn.count("0") == len(self.uprn):
|
|
self.uprn = 0
|
|
else:
|
|
self.uprn = self.uprn.lower().split("uprn-")[1]
|
|
|
|
def get_property_type(self):
|
|
if not self.xml:
|
|
raise ValueError("You need to read the file first")
|
|
|
|
property_type = self.xml.getElementsByTagName('Property-Type')
|
|
if not property_type:
|
|
property_type = self.xml.getElementsByTagName('PropertyType1')
|
|
|
|
if len(property_type) > 1:
|
|
property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
|
|
if len(property_types) > 1:
|
|
raise ValueError("Multiple property types found")
|
|
|
|
return property_types.pop()
|
|
|
|
return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
|
|
|
|
def get_sap(self):
|
|
sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
|
|
sap_score = int(sap_score[0].firstChild.nodeValue)
|
|
epc_rating = sap_to_epc(sap_score)
|
|
|
|
return {
|
|
"current-energy-efficiency": str(sap_score),
|
|
"current-energy-rating": epc_rating
|
|
}
|
|
|
|
def get_heating_and_emissions_data(self):
|
|
"""
|
|
This method will extract the following pieces of information:
|
|
1) Space heating requirement
|
|
2) Water heating requirement
|
|
3) CO2 emissions
|
|
4) Heat demand per square meter per year
|
|
5) Bills
|
|
|
|
:return:
|
|
"""
|
|
|
|
self.space_heating_kwh = self.xml.getElementsByTagName(
|
|
'Space-Heating-Existing-Dwelling'
|
|
)[0].firstChild.nodeValue
|
|
|
|
self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
|
|
|
|
def get_detailed_heating_specs(self):
|
|
"""
|
|
Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
|
|
system
|
|
:return:
|
|
"""
|
|
sap_main_heating_details = (
|
|
self.xml.getElementsByTagName('SAP-Heating')[0]
|
|
.getElementsByTagName("Main-Heating-Details")[0]
|
|
.getElementsByTagName("Main-Heating")[0]
|
|
)
|
|
|
|
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
|
|
|
|
# Get the heating system
|
|
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
|
|
heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}"
|
|
|
|
# Get the heating controls
|
|
heating_controls_code = (
|
|
sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue
|
|
)
|
|
|
|
heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"]
|
|
heating_controls = (
|
|
heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}"
|
|
)
|
|
|
|
self.heating_system = heating_system
|
|
self.heating_controls = heating_controls
|
|
|
|
def get_doors(self):
|
|
|
|
# Doors can be found in the SAP-Property-Details tag
|
|
self.number_of_doors = int(
|
|
self.xml.getElementsByTagName('SAP-Property-Details')[0]
|
|
.getElementsByTagName('Door-Count')[0]
|
|
.firstChild.nodeValue
|
|
)
|
|
|
|
self.number_of_insulated_doors = int(
|
|
self.xml.getElementsByTagName('SAP-Property-Details')[0]
|
|
.getElementsByTagName('Insulated-Door-Count')[0]
|
|
.firstChild.nodeValue
|
|
)
|
|
|
|
def get_photo_supply(self):
|
|
photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
|
|
# Check if the "None-Or-No-Details" tag is present
|
|
if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
|
|
return (
|
|
photo_supply_tag.
|
|
getElementsByTagName("None-Or-No-Details")[0].
|
|
getElementsByTagName("Percent-Roof-Area")[0].
|
|
firstChild.nodeValue
|
|
)
|
|
else:
|
|
raise NotImplementedError("Implement me")
|
|
|
|
def get_assessor_details(self):
|
|
|
|
energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0]
|
|
|
|
self.surveyor_name = (
|
|
energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue
|
|
)
|
|
|
|
def get_property_address(self):
|
|
|
|
property_tag = self.xml.getElementsByTagName("Property")[0]
|
|
|
|
address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
|
|
address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
|
|
address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
|
|
posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
|
|
postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
|
|
address = ", ".join(
|
|
[x for x in [address1, address2, address3] if x is not None]
|
|
)
|
|
county = property_tag.getElementsByTagName("County")
|
|
if county:
|
|
county = county[0].firstChild.nodeValue
|
|
else:
|
|
county = ""
|
|
|
|
# Seems to be unavailable in the xml
|
|
constituency = None
|
|
constituency_label = None
|
|
|
|
return {
|
|
"address1": address1,
|
|
"address2": address2,
|
|
"address3": address3,
|
|
"posttown": posttown,
|
|
"postcode": postcode,
|
|
"address": address,
|
|
"county": county,
|
|
"constituency": constituency,
|
|
"constituency-label": constituency_label
|
|
}
|
|
|
|
def get_floor_dimensions(self):
|
|
|
|
"""
|
|
Extracts physical measurements of the property such as the floor area, room height, etc.
|
|
across the main dwelling and any extensions.
|
|
:return:
|
|
"""
|
|
|
|
def get_part_value(node, tag_name):
|
|
element = node.getElementsByTagName(tag_name)
|
|
if element and element[0].firstChild:
|
|
return element[0].firstChild.nodeValue
|
|
return None
|
|
|
|
# Each part will correspond to the main
|
|
sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part")
|
|
|
|
floor_dimensions = []
|
|
for building_part in sap_building_parts:
|
|
building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue
|
|
sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension")
|
|
|
|
data = [
|
|
{
|
|
'building_part_identifier': building_part_identifier,
|
|
'floor': get_part_value(floor_dimension, 'Floor'),
|
|
'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
|
|
'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
|
|
'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
|
|
'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
|
|
'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
|
|
'room_height': get_part_value(floor_dimension, 'Room-Height'),
|
|
"room_roof": False
|
|
} for floor_dimension in sap_floor_dimensions
|
|
]
|
|
|
|
room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
|
|
room_roof_data = [
|
|
{
|
|
"building_part_identifier": building_part_identifier,
|
|
"floor": str(max([int(d["floor"]) for d in data]) + 1),
|
|
"floor_construction": "",
|
|
"floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
|
|
"heat_loss_perimeter": "",
|
|
"party_wall_length": "",
|
|
"total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
|
|
"room_height": "",
|
|
"room_roof": True
|
|
} for rr in room_roofs
|
|
]
|
|
|
|
floor_dimensions.extend(data)
|
|
floor_dimensions.extend(room_roof_data)
|
|
|
|
self.floor_dimensions = floor_dimensions
|
|
|
|
self.number_of_floors = len(
|
|
[f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
|
|
)
|
|
|
|
# We extract the maximum heat loss perimeter, per building part
|
|
max_heat_loss_perimeters = {d['building_part_identifier']: max(
|
|
(float(x['heat_loss_perimeter']) for x in self.floor_dimensions if
|
|
x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']),
|
|
default=float('-inf')
|
|
) for d in self.floor_dimensions}
|
|
|
|
self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values())
|
|
|
|
max_party_walls = {
|
|
d['building_part_identifier']: max(
|
|
(float(x['party_wall_length']) for x in self.floor_dimensions if
|
|
x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']),
|
|
default=float('-inf')
|
|
) for d in self.floor_dimensions
|
|
}
|
|
|
|
self.party_wall_length = sum(max_party_walls.values())
|
|
|
|
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
|
|
|
|
@staticmethod
|
|
def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
|
|
|
|
# There may not be a pvc frame
|
|
pvc_frame = window.getElementsByTagName("PVC-Frame")
|
|
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
|
|
|
|
# There may not be a glazing gap for single glazed windows
|
|
glazing_gap = window.getElementsByTagName("Glazing-Gap")
|
|
glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
|
|
|
|
parsed = {
|
|
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
|
|
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
|
|
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
|
|
"glazing_type": glazing_type_lookup[
|
|
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
|
|
],
|
|
"pvc_frame": pvc_frame,
|
|
"glazing_gap": glazing_gap,
|
|
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
|
|
}
|
|
return parsed
|
|
|
|
def get_windows(self):
|
|
"""
|
|
Extracts data about the windows in the property, including the number of windows and the window type.
|
|
:return:
|
|
"""
|
|
|
|
glazing_type_lookup = {
|
|
"ND": "Single glazing",
|
|
"1": "double glazing installed before 2002",
|
|
"2": "double glazing installed during or after 2002",
|
|
"3": "double glazing, unknown install date",
|
|
"5": "Single glazing",
|
|
}
|
|
|
|
orientation_lookup = {
|
|
"1": "North",
|
|
"2": "North East",
|
|
"3": "East",
|
|
"4": "South East",
|
|
"5": "South",
|
|
"6": "South West",
|
|
"7": "West",
|
|
"8": "North West"
|
|
}
|
|
|
|
sap_windows = self.xml.getElementsByTagName("SAP-Windows")
|
|
|
|
if not sap_windows:
|
|
# We look for Multi-Glazed-Proportion
|
|
multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
|
|
"Multiple-Glazing-Type"
|
|
)[0].firstChild.nodeValue
|
|
|
|
pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
|
|
"PVC-Window-Frames"
|
|
)
|
|
|
|
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
|
|
|
|
multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
|
|
"Multiple-Glazed-Proportion"
|
|
)[0].firstChild.nodeValue
|
|
|
|
self.windows = [
|
|
{
|
|
"window_location": "0",
|
|
"window_area": None,
|
|
"window_type": None,
|
|
"glazing_type": glazing_type_lookup[multiple_glazing_type],
|
|
"pvc_frame": pvc_frame,
|
|
"glazing_gap": None,
|
|
"orientation": None,
|
|
"multple_glazed_proportion": multple_glazed_proportion
|
|
}
|
|
]
|
|
return
|
|
|
|
sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
|
|
|
|
self.windows = [
|
|
self._parse_windows_content(
|
|
window=window,
|
|
glazing_type_lookup=glazing_type_lookup,
|
|
orientation_lookup=orientation_lookup
|
|
) for window in sap_windows
|
|
]
|