mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
cleaning up epc data and adding additional:
This commit is contained in:
parent
a3c2ff06a8
commit
bc84ed2c2a
2 changed files with 101 additions and 85 deletions
|
|
@ -45,6 +45,7 @@ def get_house_number(address: str) -> str | None:
|
|||
|
||||
class XmlParser:
|
||||
epc = None
|
||||
additional_data = None
|
||||
uprn = None
|
||||
|
||||
# heating/emissions information
|
||||
|
|
@ -66,20 +67,11 @@ class XmlParser:
|
|||
heat_loss_perimeter = None
|
||||
party_wall_length = None
|
||||
total_floor_area = None
|
||||
ground_floor_area = None
|
||||
is_there_party_wall = None
|
||||
floor_height = None
|
||||
insulation_wall_area = None
|
||||
|
||||
floor_dimensions = None
|
||||
|
||||
rrn = None
|
||||
|
||||
database_data = None
|
||||
|
||||
# We assume that the insulation wall area is 85% of the total wall area, as a standard estimate
|
||||
INSULATION_WALL_AREA_FACTOR = 0.85
|
||||
|
||||
# The value of the URPN tells us about the file type that we're parsing
|
||||
UPRN_FILETYPE_MAP = {
|
||||
0: "EPR",
|
||||
|
|
@ -119,6 +111,10 @@ class XmlParser:
|
|||
'1': "Owner-occupied"
|
||||
}
|
||||
|
||||
TARIFF_MAP = {
|
||||
"2": "Single"
|
||||
}
|
||||
|
||||
def __init__(self, file, filekey, uprn=None):
|
||||
file.seek(0) # Ensure the file pointer is at the beginning
|
||||
xml_string = file.read().decode('utf-8')
|
||||
|
|
@ -161,9 +157,6 @@ class XmlParser:
|
|||
# Building fabric
|
||||
self.get_doors()
|
||||
|
||||
# Property dimensions
|
||||
self.get_property_dimensions()
|
||||
|
||||
self.get_floor_dimensions()
|
||||
|
||||
self.get_windows()
|
||||
|
|
@ -171,6 +164,9 @@ class XmlParser:
|
|||
# Get all of the EPC data
|
||||
self.extract_epc()
|
||||
|
||||
# Put together all of the additional data we capture
|
||||
self.extract_additional_data()
|
||||
|
||||
def extract_epc(self):
|
||||
|
||||
if self.floor_dimensions is None:
|
||||
|
|
@ -191,16 +187,23 @@ class XmlParser:
|
|||
flat_storey_count = ""
|
||||
flat_top_storey = ""
|
||||
floor_level = "NO DATA!"
|
||||
energy_tariff = "NO DATA!"
|
||||
|
||||
floor_height = np.mean([
|
||||
float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
|
||||
float(x['room_height']) for x in self.floor_dimensions if
|
||||
x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
|
||||
])
|
||||
|
||||
# Take the most prevelant glazing type
|
||||
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
|
||||
glazed_type = max(glazed_type, key=glazed_type.count)
|
||||
|
||||
energy_tariff = (
|
||||
self.xml.getElementsByTagName("SAP-Energy-Source")[0]
|
||||
.getElementsByTagName("Meter-Type")[0]
|
||||
.firstChild.nodeValue
|
||||
)
|
||||
energy_tariff = self.TARIFF_MAP[energy_tariff]
|
||||
|
||||
self.epc = {
|
||||
"uprn": self.uprn,
|
||||
"uprn-source": "Address Matched",
|
||||
|
|
@ -209,8 +212,6 @@ class XmlParser:
|
|||
**self.get_sap(),
|
||||
**self.get_property_address(),
|
||||
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
|
||||
# TODO: Needs to be done more carefully
|
||||
# "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
|
||||
"construction-age-band": self.get_node_value('Construction-Age-Band'),
|
||||
"mainheat-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
|
||||
|
|
@ -222,8 +223,6 @@ class XmlParser:
|
|||
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
|
||||
# TODO: Needs to be done more careully since we have multiple windows
|
||||
# "glazed-type": self.get_node_value('Glazing-Type'),
|
||||
"mainheatcont-description":
|
||||
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
|
||||
"sheating-energy-eff": self.RATINGS_MAP[
|
||||
|
|
@ -232,8 +231,7 @@ class XmlParser:
|
|||
"local-authority": "", # Not included in the xml
|
||||
"local-authority-label": "",
|
||||
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
|
||||
# TODO: Doesn't seem to be included in the xml
|
||||
# "energy-tariff": self.get_node_value('Energy-Tariff'),
|
||||
"energy-tariff": energy_tariff,
|
||||
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
|
||||
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
|
||||
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
|
||||
|
|
@ -328,7 +326,47 @@ class XmlParser:
|
|||
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
|
||||
"floor-height": floor_height,
|
||||
"glazed-type": glazed_type,
|
||||
"energy-tariff": energy_tariff,
|
||||
}
|
||||
|
||||
def get_insulation_wall_area(self):
|
||||
"""
|
||||
Extracts the insulation wall area for the main dwelling
|
||||
:return:
|
||||
"""
|
||||
|
||||
main_dwelling_floors = [
|
||||
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
|
||||
]
|
||||
main_dwelling_windows = [
|
||||
w for w in self.windows if w["window_location"] == "0"
|
||||
]
|
||||
|
||||
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
|
||||
window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
|
||||
return wall_areas - window_areas
|
||||
|
||||
def extract_additional_data(self):
|
||||
|
||||
self.insulation_wall_area = self.get_insulation_wall_area()
|
||||
|
||||
self.additional_data = {
|
||||
"file_location": self.filekey,
|
||||
"surveyor_name": self.surveyor_name,
|
||||
"space_heating_kwh": self.space_heating_kwh,
|
||||
"water_heating_kwh": self.water_heating_kwh,
|
||||
# "heating_system": self.heating_system,
|
||||
# "heating_controls": self.heating_controls,
|
||||
"number_of_doors": self.number_of_doors,
|
||||
"number_of_insulated_doors": self.number_of_insulated_doors,
|
||||
"number_of_floors": self.number_of_floors,
|
||||
"insulation_wall_area": self.insulation_wall_area,
|
||||
"heat_loss_perimeter": self.heat_loss_perimeter,
|
||||
"party_wall_length": self.party_wall_length,
|
||||
"perimeter": self.perimeter,
|
||||
"rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'),
|
||||
"rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
|
||||
"room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
|
||||
"percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
|
||||
}
|
||||
|
||||
def get_node_value(self, tag_name):
|
||||
|
|
@ -516,56 +554,6 @@ class XmlParser:
|
|||
"constituency-label": constituency_label
|
||||
}
|
||||
|
||||
def get_property_dimensions(self):
|
||||
"""
|
||||
This function will extract the relevant property dimensions including the floor area,
|
||||
number of floors, perimeter, party wall length and the insulation_wall_area.
|
||||
|
||||
insulation_wall_area is typically simplified down to perimeter * height * 0.85
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Each floor has its own SAP-Floor-Dimension tag
|
||||
floor_dimensions = (
|
||||
self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0]
|
||||
.getElementsByTagName("SAP-Floor-Dimension")
|
||||
)
|
||||
|
||||
self.number_of_floors = len(floor_dimensions)
|
||||
|
||||
self.heat_loss_perimeter = float(
|
||||
floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue
|
||||
)
|
||||
|
||||
self.party_wall_length = float(
|
||||
floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue
|
||||
)
|
||||
|
||||
party_wall_construction_tag = (
|
||||
self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip()
|
||||
)
|
||||
|
||||
self.is_there_party_wall = (
|
||||
"Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No"
|
||||
)
|
||||
|
||||
# We pull out all of the floor areas
|
||||
floor_areas = [
|
||||
float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions
|
||||
]
|
||||
|
||||
self.total_floor_area = sum(floor_areas)
|
||||
self.ground_floor_area = floor_areas[0]
|
||||
|
||||
self.floor_height = float(
|
||||
floor_dimensions[0]
|
||||
.getElementsByTagName("Room-Height")[0]
|
||||
.firstChild.nodeValue
|
||||
)
|
||||
|
||||
self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR
|
||||
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
|
||||
|
||||
def get_floor_dimensions(self):
|
||||
|
||||
"""
|
||||
|
|
@ -594,16 +582,53 @@ class XmlParser:
|
|||
'floor': get_part_value(floor_dimension, 'Floor'),
|
||||
'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
|
||||
'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
|
||||
'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
|
||||
'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'),
|
||||
'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'),
|
||||
'room_height': get_part_value(floor_dimension, 'Room-Height')
|
||||
'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
|
||||
'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
|
||||
'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
|
||||
'room_height': get_part_value(floor_dimension, 'Room-Height'),
|
||||
"room_roof": False
|
||||
} for floor_dimension in sap_floor_dimensions
|
||||
]
|
||||
|
||||
room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
|
||||
room_roof_data = [
|
||||
{
|
||||
"building_part_identifier": building_part_identifier,
|
||||
"floor": str(max([int(d["floor"]) for d in data]) + 1),
|
||||
"floor_construction": "",
|
||||
"floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
|
||||
"heat_loss_perimeter": "",
|
||||
"party_wall_length": "",
|
||||
"total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
|
||||
"room_height": "",
|
||||
"room_roof": True
|
||||
} for rr in room_roofs
|
||||
]
|
||||
|
||||
floor_dimensions.extend(data)
|
||||
floor_dimensions.extend(room_roof_data)
|
||||
|
||||
self.floor_dimensions = floor_dimensions
|
||||
|
||||
self.number_of_floors = len(
|
||||
[f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
|
||||
)
|
||||
self.heat_loss_perimeter = max(
|
||||
[
|
||||
float(f["heat_loss_perimeter"]) for f in self.floor_dimensions
|
||||
if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
|
||||
]
|
||||
)
|
||||
|
||||
self.party_wall_length = max(
|
||||
[
|
||||
float(f["party_wall_length"]) for f in self.floor_dimensions
|
||||
if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
|
||||
]
|
||||
)
|
||||
|
||||
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
|
||||
|
||||
def get_windows(self):
|
||||
"""
|
||||
Extracts data about the windows in the property, including the number of windows and the window type.
|
||||
|
|
@ -612,15 +637,6 @@ class XmlParser:
|
|||
|
||||
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
|
||||
|
||||
# This is the data in each sap window:
|
||||
# <Window-Location>2</Window-Location>
|
||||
# <Window-Area quantity="square metres">1.55</Window-Area>
|
||||
# <Window-Type>1</Window-Type>
|
||||
# <Glazing-Type>3</Glazing-Type>
|
||||
# <PVC-Frame>true</PVC-Frame>
|
||||
# <Glazing-Gap>16+</Glazing-Gap>
|
||||
# <Orientation>7</Orientation>
|
||||
|
||||
glazing_type_lookup = {
|
||||
"3": "double glazing, unknown install date"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ def main():
|
|||
for xml in xmls:
|
||||
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
|
||||
xml_data_io = BytesIO(xml_data)
|
||||
xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
|
||||
xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn)
|
||||
xml_parser.run()
|
||||
logger.info(f"Extracted data from {xml}")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue