retrieved all epc fields

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-25 15:55:13 +01:00
parent eaa1c3bca4
commit a3c2ff06a8
2 changed files with 78 additions and 4 deletions

View file

@ -1,4 +1,5 @@
import re
import numpy as np
import usaddress
from datetime import datetime
from xml.dom.minidom import parseString
@ -43,6 +44,7 @@ def get_house_number(address: str) -> str | None:
class XmlParser:
epc = None
uprn = None
# heating/emissions information
@ -56,6 +58,7 @@ class XmlParser:
number_of_doors = None
number_of_insulated_doors = None
windows = None
# Property dimensions
number_of_floors = None
@ -153,7 +156,7 @@ class XmlParser:
self.get_heating_and_emissions_data()
self.get_detailed_heating_specs()
# self.get_detailed_heating_specs()
# Building fabric
self.get_doors()
@ -161,11 +164,21 @@ class XmlParser:
# Property dimensions
self.get_property_dimensions()
self.get_floor_dimensions()
self.get_windows()
# Get all of the EPC data
self.extract_epc()
def extract_epc(self):
if self.floor_dimensions is None:
raise ValueError("Run get_floor_dimensions() first")
if self.windows is None:
raise ValueError("Run get_windows() first")
property_type = self.get_property_type()
if property_type == "Flat":
@ -178,6 +191,15 @@ class XmlParser:
flat_storey_count = ""
flat_top_storey = ""
floor_level = "NO DATA!"
energy_tariff = "NO DATA!"
floor_height = np.mean([
float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
])
# Take the most prevelant glazing type
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
glazed_type = max(glazed_type, key=glazed_type.count)
self.epc = {
"uprn": self.uprn,
@ -286,7 +308,7 @@ class XmlParser:
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
"floor-level": floor_level,
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
"potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
"hot-water-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
],
@ -304,7 +326,9 @@ class XmlParser:
"lodgement-datetime":
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
"floor-height": floor_height,
"glazed-type": glazed_type,
"energy-tariff": energy_tariff,
}
def get_node_value(self, tag_name):
@ -405,7 +429,7 @@ class XmlParser:
.getElementsByTagName("Main-Heating")[0]
)
heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
# Get the heating system
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
@ -579,3 +603,45 @@ class XmlParser:
floor_dimensions.extend(data)
self.floor_dimensions = floor_dimensions
def get_windows(self):
"""
Extracts data about the windows in the property, including the number of windows and the window type.
:return:
"""
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
# This is the data in each sap window:
# <Window-Location>2</Window-Location>
# <Window-Area quantity="square metres">1.55</Window-Area>
# <Window-Type>1</Window-Type>
# <Glazing-Type>3</Glazing-Type>
# <PVC-Frame>true</PVC-Frame>
# <Glazing-Gap>16+</Glazing-Gap>
# <Orientation>7</Orientation>
glazing_type_lookup = {
"3": "double glazing, unknown install date"
}
orientation_lookup = {
"3": "East",
"5": "South",
"1": "North",
"7": "West",
}
self.windows = [
{
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
"glazing_type": glazing_type_lookup[
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
],
"pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
"glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
} for window in sap_windows
]

View file

@ -1,3 +1,5 @@
import pandas as pd
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
from utils.logger import setup_logger
from etl.xml_survey_extraction.XmlParser import XmlParser
@ -51,3 +53,9 @@ def main():
logger.info(f"Extracted data from {xml}")
# TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
# TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
# can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
# https://www.ncm-pcdb.org.uk/sap/download
# However retrieving this data is not a priority, so we can leave this for now as parsing the database
# is a non-trivial task