cleaning up epc data and adding additional:

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-25 18:04:09 +01:00
parent a3c2ff06a8
commit bc84ed2c2a
2 changed files with 101 additions and 85 deletions

View file

@ -45,6 +45,7 @@ def get_house_number(address: str) -> str | None:
class XmlParser:
epc = None
additional_data = None
uprn = None
# heating/emissions information
@ -66,20 +67,11 @@ class XmlParser:
heat_loss_perimeter = None
party_wall_length = None
total_floor_area = None
ground_floor_area = None
is_there_party_wall = None
floor_height = None
insulation_wall_area = None
floor_dimensions = None
rrn = None
database_data = None
# We assume that the insulation wall area is 85% of the total wall area, as a standard estimate
INSULATION_WALL_AREA_FACTOR = 0.85
# The value of the URPN tells us about the file type that we're parsing
UPRN_FILETYPE_MAP = {
0: "EPR",
@ -119,6 +111,10 @@ class XmlParser:
'1': "Owner-occupied"
}
TARIFF_MAP = {
"2": "Single"
}
def __init__(self, file, filekey, uprn=None):
file.seek(0) # Ensure the file pointer is at the beginning
xml_string = file.read().decode('utf-8')
@ -161,9 +157,6 @@ class XmlParser:
# Building fabric
self.get_doors()
# Property dimensions
self.get_property_dimensions()
self.get_floor_dimensions()
self.get_windows()
@ -171,6 +164,9 @@ class XmlParser:
# Get all of the EPC data
self.extract_epc()
# Put together all of the additional data we capture
self.extract_additional_data()
def extract_epc(self):
if self.floor_dimensions is None:
@ -191,16 +187,23 @@ class XmlParser:
flat_storey_count = ""
flat_top_storey = ""
floor_level = "NO DATA!"
energy_tariff = "NO DATA!"
floor_height = np.mean([
float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
float(x['room_height']) for x in self.floor_dimensions if
x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
])
# Take the most prevelant glazing type
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
glazed_type = max(glazed_type, key=glazed_type.count)
energy_tariff = (
self.xml.getElementsByTagName("SAP-Energy-Source")[0]
.getElementsByTagName("Meter-Type")[0]
.firstChild.nodeValue
)
energy_tariff = self.TARIFF_MAP[energy_tariff]
self.epc = {
"uprn": self.uprn,
"uprn-source": "Address Matched",
@ -209,8 +212,6 @@ class XmlParser:
**self.get_sap(),
**self.get_property_address(),
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
# TODO: Needs to be done more carefully
# "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
"construction-age-band": self.get_node_value('Construction-Age-Band'),
"mainheat-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
@ -222,8 +223,6 @@ class XmlParser:
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
],
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
# TODO: Needs to be done more careully since we have multiple windows
# "glazed-type": self.get_node_value('Glazing-Type'),
"mainheatcont-description":
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"sheating-energy-eff": self.RATINGS_MAP[
@ -232,8 +231,7 @@ class XmlParser:
"local-authority": "", # Not included in the xml
"local-authority-label": "",
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
# TODO: Doesn't seem to be included in the xml
# "energy-tariff": self.get_node_value('Energy-Tariff'),
"energy-tariff": energy_tariff,
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
@ -328,7 +326,47 @@ class XmlParser:
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
"floor-height": floor_height,
"glazed-type": glazed_type,
"energy-tariff": energy_tariff,
}
def get_insulation_wall_area(self):
"""
Extracts the insulation wall area for the main dwelling
:return:
"""
main_dwelling_floors = [
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
]
main_dwelling_windows = [
w for w in self.windows if w["window_location"] == "0"
]
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
return wall_areas - window_areas
def extract_additional_data(self):
self.insulation_wall_area = self.get_insulation_wall_area()
self.additional_data = {
"file_location": self.filekey,
"surveyor_name": self.surveyor_name,
"space_heating_kwh": self.space_heating_kwh,
"water_heating_kwh": self.water_heating_kwh,
# "heating_system": self.heating_system,
# "heating_controls": self.heating_controls,
"number_of_doors": self.number_of_doors,
"number_of_insulated_doors": self.number_of_insulated_doors,
"number_of_floors": self.number_of_floors,
"insulation_wall_area": self.insulation_wall_area,
"heat_loss_perimeter": self.heat_loss_perimeter,
"party_wall_length": self.party_wall_length,
"perimeter": self.perimeter,
"rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'),
"rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
"room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
"percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
}
def get_node_value(self, tag_name):
@ -516,56 +554,6 @@ class XmlParser:
"constituency-label": constituency_label
}
def get_property_dimensions(self):
"""
This function will extract the relevant property dimensions including the floor area,
number of floors, perimeter, party wall length and the insulation_wall_area.
insulation_wall_area is typically simplified down to perimeter * height * 0.85
:return:
"""
# Each floor has its own SAP-Floor-Dimension tag
floor_dimensions = (
self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0]
.getElementsByTagName("SAP-Floor-Dimension")
)
self.number_of_floors = len(floor_dimensions)
self.heat_loss_perimeter = float(
floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue
)
self.party_wall_length = float(
floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue
)
party_wall_construction_tag = (
self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip()
)
self.is_there_party_wall = (
"Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No"
)
# We pull out all of the floor areas
floor_areas = [
float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions
]
self.total_floor_area = sum(floor_areas)
self.ground_floor_area = floor_areas[0]
self.floor_height = float(
floor_dimensions[0]
.getElementsByTagName("Room-Height")[0]
.firstChild.nodeValue
)
self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
def get_floor_dimensions(self):
"""
@ -594,16 +582,53 @@ class XmlParser:
'floor': get_part_value(floor_dimension, 'Floor'),
'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'),
'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'),
'room_height': get_part_value(floor_dimension, 'Room-Height')
'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
'room_height': get_part_value(floor_dimension, 'Room-Height'),
"room_roof": False
} for floor_dimension in sap_floor_dimensions
]
room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
room_roof_data = [
{
"building_part_identifier": building_part_identifier,
"floor": str(max([int(d["floor"]) for d in data]) + 1),
"floor_construction": "",
"floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
"heat_loss_perimeter": "",
"party_wall_length": "",
"total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
"room_height": "",
"room_roof": True
} for rr in room_roofs
]
floor_dimensions.extend(data)
floor_dimensions.extend(room_roof_data)
self.floor_dimensions = floor_dimensions
self.number_of_floors = len(
[f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
)
self.heat_loss_perimeter = max(
[
float(f["heat_loss_perimeter"]) for f in self.floor_dimensions
if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
]
)
self.party_wall_length = max(
[
float(f["party_wall_length"]) for f in self.floor_dimensions
if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
]
)
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
def get_windows(self):
"""
Extracts data about the windows in the property, including the number of windows and the window type.
@ -612,15 +637,6 @@ class XmlParser:
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
# This is the data in each sap window:
# <Window-Location>2</Window-Location>
# <Window-Area quantity="square metres">1.55</Window-Area>
# <Window-Type>1</Window-Type>
# <Glazing-Type>3</Glazing-Type>
# <PVC-Frame>true</PVC-Frame>
# <Glazing-Gap>16+</Glazing-Gap>
# <Orientation>7</Orientation>
glazing_type_lookup = {
"3": "double glazing, unknown install date"
}

View file

@ -48,7 +48,7 @@ def main():
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn)
xml_parser.run()
logger.info(f"Extracted data from {xml}")