mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
retrieved all epc fields
This commit is contained in:
parent
eaa1c3bca4
commit
a3c2ff06a8
2 changed files with 78 additions and 4 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
import numpy as np
|
||||||
import usaddress
|
import usaddress
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from xml.dom.minidom import parseString
|
from xml.dom.minidom import parseString
|
||||||
|
|
@ -43,6 +44,7 @@ def get_house_number(address: str) -> str | None:
|
||||||
|
|
||||||
|
|
||||||
class XmlParser:
|
class XmlParser:
|
||||||
|
epc = None
|
||||||
uprn = None
|
uprn = None
|
||||||
|
|
||||||
# heating/emissions information
|
# heating/emissions information
|
||||||
|
|
@ -56,6 +58,7 @@ class XmlParser:
|
||||||
|
|
||||||
number_of_doors = None
|
number_of_doors = None
|
||||||
number_of_insulated_doors = None
|
number_of_insulated_doors = None
|
||||||
|
windows = None
|
||||||
|
|
||||||
# Property dimensions
|
# Property dimensions
|
||||||
number_of_floors = None
|
number_of_floors = None
|
||||||
|
|
@ -153,7 +156,7 @@ class XmlParser:
|
||||||
|
|
||||||
self.get_heating_and_emissions_data()
|
self.get_heating_and_emissions_data()
|
||||||
|
|
||||||
self.get_detailed_heating_specs()
|
# self.get_detailed_heating_specs()
|
||||||
|
|
||||||
# Building fabric
|
# Building fabric
|
||||||
self.get_doors()
|
self.get_doors()
|
||||||
|
|
@ -161,11 +164,21 @@ class XmlParser:
|
||||||
# Property dimensions
|
# Property dimensions
|
||||||
self.get_property_dimensions()
|
self.get_property_dimensions()
|
||||||
|
|
||||||
|
self.get_floor_dimensions()
|
||||||
|
|
||||||
|
self.get_windows()
|
||||||
|
|
||||||
# Get all of the EPC data
|
# Get all of the EPC data
|
||||||
self.extract_epc()
|
self.extract_epc()
|
||||||
|
|
||||||
def extract_epc(self):
|
def extract_epc(self):
|
||||||
|
|
||||||
|
if self.floor_dimensions is None:
|
||||||
|
raise ValueError("Run get_floor_dimensions() first")
|
||||||
|
|
||||||
|
if self.windows is None:
|
||||||
|
raise ValueError("Run get_windows() first")
|
||||||
|
|
||||||
property_type = self.get_property_type()
|
property_type = self.get_property_type()
|
||||||
|
|
||||||
if property_type == "Flat":
|
if property_type == "Flat":
|
||||||
|
|
@ -178,6 +191,15 @@ class XmlParser:
|
||||||
flat_storey_count = ""
|
flat_storey_count = ""
|
||||||
flat_top_storey = ""
|
flat_top_storey = ""
|
||||||
floor_level = "NO DATA!"
|
floor_level = "NO DATA!"
|
||||||
|
energy_tariff = "NO DATA!"
|
||||||
|
|
||||||
|
floor_height = np.mean([
|
||||||
|
float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
|
||||||
|
])
|
||||||
|
|
||||||
|
# Take the most prevelant glazing type
|
||||||
|
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
|
||||||
|
glazed_type = max(glazed_type, key=glazed_type.count)
|
||||||
|
|
||||||
self.epc = {
|
self.epc = {
|
||||||
"uprn": self.uprn,
|
"uprn": self.uprn,
|
||||||
|
|
@ -286,7 +308,7 @@ class XmlParser:
|
||||||
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
|
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
|
||||||
"floor-level": floor_level,
|
"floor-level": floor_level,
|
||||||
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
|
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
|
||||||
"potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
|
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
|
||||||
"hot-water-energy-eff": self.RATINGS_MAP[
|
"hot-water-energy-eff": self.RATINGS_MAP[
|
||||||
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
|
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
|
||||||
],
|
],
|
||||||
|
|
@ -304,7 +326,9 @@ class XmlParser:
|
||||||
"lodgement-datetime":
|
"lodgement-datetime":
|
||||||
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
|
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
|
||||||
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
|
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
|
||||||
|
"floor-height": floor_height,
|
||||||
|
"glazed-type": glazed_type,
|
||||||
|
"energy-tariff": energy_tariff,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_node_value(self, tag_name):
|
def get_node_value(self, tag_name):
|
||||||
|
|
@ -405,7 +429,7 @@ class XmlParser:
|
||||||
.getElementsByTagName("Main-Heating")[0]
|
.getElementsByTagName("Main-Heating")[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue
|
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
|
||||||
|
|
||||||
# Get the heating system
|
# Get the heating system
|
||||||
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
|
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
|
||||||
|
|
@ -579,3 +603,45 @@ class XmlParser:
|
||||||
floor_dimensions.extend(data)
|
floor_dimensions.extend(data)
|
||||||
|
|
||||||
self.floor_dimensions = floor_dimensions
|
self.floor_dimensions = floor_dimensions
|
||||||
|
|
||||||
|
def get_windows(self):
|
||||||
|
"""
|
||||||
|
Extracts data about the windows in the property, including the number of windows and the window type.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
|
||||||
|
|
||||||
|
# This is the data in each sap window:
|
||||||
|
# <Window-Location>2</Window-Location>
|
||||||
|
# <Window-Area quantity="square metres">1.55</Window-Area>
|
||||||
|
# <Window-Type>1</Window-Type>
|
||||||
|
# <Glazing-Type>3</Glazing-Type>
|
||||||
|
# <PVC-Frame>true</PVC-Frame>
|
||||||
|
# <Glazing-Gap>16+</Glazing-Gap>
|
||||||
|
# <Orientation>7</Orientation>
|
||||||
|
|
||||||
|
glazing_type_lookup = {
|
||||||
|
"3": "double glazing, unknown install date"
|
||||||
|
}
|
||||||
|
|
||||||
|
orientation_lookup = {
|
||||||
|
"3": "East",
|
||||||
|
"5": "South",
|
||||||
|
"1": "North",
|
||||||
|
"7": "West",
|
||||||
|
}
|
||||||
|
|
||||||
|
self.windows = [
|
||||||
|
{
|
||||||
|
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
|
||||||
|
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
|
||||||
|
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
|
||||||
|
"glazing_type": glazing_type_lookup[
|
||||||
|
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
|
||||||
|
],
|
||||||
|
"pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
|
||||||
|
"glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
|
||||||
|
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
|
||||||
|
} for window in sap_windows
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
|
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
from etl.xml_survey_extraction.XmlParser import XmlParser
|
from etl.xml_survey_extraction.XmlParser import XmlParser
|
||||||
|
|
@ -51,3 +53,9 @@ def main():
|
||||||
logger.info(f"Extracted data from {xml}")
|
logger.info(f"Extracted data from {xml}")
|
||||||
|
|
||||||
# TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
|
# TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
|
||||||
|
|
||||||
|
# TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
|
||||||
|
# can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
|
||||||
|
# https://www.ncm-pcdb.org.uk/sap/download
|
||||||
|
# However retrieving this data is not a priority, so we can leave this for now as parsing the database
|
||||||
|
# is a non-trivial task
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue