mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
retrieved all epc fields
This commit is contained in:
parent
eaa1c3bca4
commit
a3c2ff06a8
2 changed files with 78 additions and 4 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import numpy as np
|
||||
import usaddress
|
||||
from datetime import datetime
|
||||
from xml.dom.minidom import parseString
|
||||
|
|
@ -43,6 +44,7 @@ def get_house_number(address: str) -> str | None:
|
|||
|
||||
|
||||
class XmlParser:
|
||||
epc = None
|
||||
uprn = None
|
||||
|
||||
# heating/emissions information
|
||||
|
|
@ -56,6 +58,7 @@ class XmlParser:
|
|||
|
||||
number_of_doors = None
|
||||
number_of_insulated_doors = None
|
||||
windows = None
|
||||
|
||||
# Property dimensions
|
||||
number_of_floors = None
|
||||
|
|
@ -153,7 +156,7 @@ class XmlParser:
|
|||
|
||||
self.get_heating_and_emissions_data()
|
||||
|
||||
self.get_detailed_heating_specs()
|
||||
# self.get_detailed_heating_specs()
|
||||
|
||||
# Building fabric
|
||||
self.get_doors()
|
||||
|
|
@ -161,11 +164,21 @@ class XmlParser:
|
|||
# Property dimensions
|
||||
self.get_property_dimensions()
|
||||
|
||||
self.get_floor_dimensions()
|
||||
|
||||
self.get_windows()
|
||||
|
||||
# Get all of the EPC data
|
||||
self.extract_epc()
|
||||
|
||||
def extract_epc(self):
|
||||
|
||||
if self.floor_dimensions is None:
|
||||
raise ValueError("Run get_floor_dimensions() first")
|
||||
|
||||
if self.windows is None:
|
||||
raise ValueError("Run get_windows() first")
|
||||
|
||||
property_type = self.get_property_type()
|
||||
|
||||
if property_type == "Flat":
|
||||
|
|
@ -178,6 +191,15 @@ class XmlParser:
|
|||
flat_storey_count = ""
|
||||
flat_top_storey = ""
|
||||
floor_level = "NO DATA!"
|
||||
energy_tariff = "NO DATA!"
|
||||
|
||||
floor_height = np.mean([
|
||||
float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
|
||||
])
|
||||
|
||||
# Take the most prevelant glazing type
|
||||
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
|
||||
glazed_type = max(glazed_type, key=glazed_type.count)
|
||||
|
||||
self.epc = {
|
||||
"uprn": self.uprn,
|
||||
|
|
@ -286,7 +308,7 @@ class XmlParser:
|
|||
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
|
||||
"floor-level": floor_level,
|
||||
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
|
||||
"potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
|
||||
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
|
||||
"hot-water-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
|
|
@ -304,7 +326,9 @@ class XmlParser:
|
|||
"lodgement-datetime":
|
||||
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
|
||||
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
|
||||
|
||||
"floor-height": floor_height,
|
||||
"glazed-type": glazed_type,
|
||||
"energy-tariff": energy_tariff,
|
||||
}
|
||||
|
||||
def get_node_value(self, tag_name):
|
||||
|
|
@ -405,7 +429,7 @@ class XmlParser:
|
|||
.getElementsByTagName("Main-Heating")[0]
|
||||
)
|
||||
|
||||
heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue
|
||||
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
|
||||
|
||||
# Get the heating system
|
||||
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
|
||||
|
|
@ -579,3 +603,45 @@ class XmlParser:
|
|||
floor_dimensions.extend(data)
|
||||
|
||||
self.floor_dimensions = floor_dimensions
|
||||
|
||||
def get_windows(self):
|
||||
"""
|
||||
Extracts data about the windows in the property, including the number of windows and the window type.
|
||||
:return:
|
||||
"""
|
||||
|
||||
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
|
||||
|
||||
# This is the data in each sap window:
|
||||
# <Window-Location>2</Window-Location>
|
||||
# <Window-Area quantity="square metres">1.55</Window-Area>
|
||||
# <Window-Type>1</Window-Type>
|
||||
# <Glazing-Type>3</Glazing-Type>
|
||||
# <PVC-Frame>true</PVC-Frame>
|
||||
# <Glazing-Gap>16+</Glazing-Gap>
|
||||
# <Orientation>7</Orientation>
|
||||
|
||||
glazing_type_lookup = {
|
||||
"3": "double glazing, unknown install date"
|
||||
}
|
||||
|
||||
orientation_lookup = {
|
||||
"3": "East",
|
||||
"5": "South",
|
||||
"1": "North",
|
||||
"7": "West",
|
||||
}
|
||||
|
||||
self.windows = [
|
||||
{
|
||||
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
|
||||
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
|
||||
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
|
||||
"glazing_type": glazing_type_lookup[
|
||||
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
|
||||
],
|
||||
"pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
|
||||
"glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
|
||||
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
|
||||
} for window in sap_windows
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import pandas as pd
|
||||
|
||||
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
|
||||
from utils.logger import setup_logger
|
||||
from etl.xml_survey_extraction.XmlParser import XmlParser
|
||||
|
|
@ -51,3 +53,9 @@ def main():
|
|||
logger.info(f"Extracted data from {xml}")
|
||||
|
||||
# TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
|
||||
|
||||
# TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
|
||||
# can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
|
||||
# https://www.ncm-pcdb.org.uk/sap/download
|
||||
# However retrieving this data is not a priority, so we can leave this for now as parsing the database
|
||||
# is a non-trivial task
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue