mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
set up fundamental epc extraction
This commit is contained in:
parent
3257485248
commit
791e22146e
3 changed files with 198 additions and 12 deletions
|
|
@ -133,8 +133,8 @@ def app():
|
|||
energy_consumption_data = []
|
||||
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
||||
# Skip the first 50
|
||||
if i < 250:
|
||||
continue
|
||||
# if i < 344:
|
||||
# continue
|
||||
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
# Rename the columns to the same format as the api returns
|
||||
|
|
@ -146,12 +146,12 @@ def app():
|
|||
# Take just the newest EPC per uprn, based on lodgement-date
|
||||
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
|
||||
|
||||
data = data.sample(sample_size)
|
||||
data = data.sample(sample_size, replace=False)
|
||||
# We use the addreess data to find the related information
|
||||
|
||||
collected_data = []
|
||||
for _, property_data in data.iterrows():
|
||||
time.sleep(np.random.uniform(0.3, 2))
|
||||
time.sleep(np.random.uniform(0.2, 1.5))
|
||||
|
||||
uprn = int(property_data["uprn"])
|
||||
address = property_data["address1"]
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ def app():
|
|||
|
||||
# We also estimate the energy consumption reduction from this data, by band
|
||||
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
||||
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
|
||||
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
|
||||
|
||||
# Save the consumption averages back to s3
|
||||
save_dataframe_to_s3_parquet(
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ class XmlParser:
|
|||
hot_water_cost_current = None
|
||||
lighting_cost_current = None
|
||||
energy_consumption_current = None
|
||||
energy_consumption_potential = None
|
||||
heating_system = None
|
||||
heating_controls = None
|
||||
|
||||
|
|
@ -140,6 +141,30 @@ class XmlParser:
|
|||
"5": "Very Good"
|
||||
}
|
||||
|
||||
MECHANICAL_VENTILATION_MAP = {
|
||||
"0": "natural"
|
||||
}
|
||||
|
||||
BUILT_FORM_MAP = {
|
||||
"1": "Detached",
|
||||
}
|
||||
|
||||
GLAZED_AREA_MAP = {
|
||||
"4": "Much More Than Typical"
|
||||
}
|
||||
|
||||
FUEL_TYPE_MAP = {
|
||||
"26": "mains gas (not community)"
|
||||
}
|
||||
|
||||
TRANSACTION_TYPE_MAP = {
|
||||
"13": "ECO assessment"
|
||||
}
|
||||
|
||||
TENURE_MAP = {
|
||||
'1': "Owner-occupied"
|
||||
}
|
||||
|
||||
def __init__(self, file, filekey, uprn=None):
|
||||
file.seek(0) # Ensure the file pointer is at the beginning
|
||||
xml_string = file.read().decode('utf-8')
|
||||
|
|
@ -151,7 +176,7 @@ class XmlParser:
|
|||
# In order to identify the file type, we can look for the presence of the 'UPRN' tag
|
||||
# If the UPRN tag is present, we can assume that the file is an EPC
|
||||
# If the UPRN tag is not present, we can assume that the file is an EPR
|
||||
self.get_uprn()
|
||||
self.get_uprn(uprn)
|
||||
|
||||
self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
|
||||
|
||||
|
|
@ -180,6 +205,7 @@ class XmlParser:
|
|||
self.get_assessor_details()
|
||||
|
||||
self.get_heating_and_emissions_data()
|
||||
|
||||
self.get_detailed_heating_specs()
|
||||
|
||||
# Building fabric
|
||||
|
|
@ -191,11 +217,160 @@ class XmlParser:
|
|||
self.get_hot_water()
|
||||
self.get_lighting()
|
||||
self.get_doors()
|
||||
self.get_photo_supply()
|
||||
|
||||
# Property dimensions
|
||||
self.get_property_dimensions()
|
||||
|
||||
# Get all of the EPC data
|
||||
self.extract_epc()
|
||||
|
||||
def extract_epc(self):
|
||||
# Property Summary
|
||||
low_energy_fixed_light_count = None
|
||||
construction_age_band = None
|
||||
self.epc = {
|
||||
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
|
||||
# TODO: Needs to be done more carefully
|
||||
# "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
|
||||
"construction-age-band": self.get_node_value('Construction-Age-Band'),
|
||||
"mainheat-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"windows-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"lighting-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
|
||||
# TODO: Needs to be done more careully since we have multiple windows
|
||||
# "glazed-type": self.get_node_value('Glazing-Type'),
|
||||
"mainheatcont-description":
|
||||
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
|
||||
"sheating-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'),
|
||||
],
|
||||
# TODO: Doesn't seem to be included in the xml
|
||||
# "local-authority": self.get_node_value('Local-Authority'),
|
||||
"local-authority-label": self.get_node_value('Local-Authority-Label'),
|
||||
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
|
||||
# TODO: Doesn't seem to be included in the xml
|
||||
# "energy-tariff": self.get_node_value('Energy-Tariff'),
|
||||
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
|
||||
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
|
||||
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
|
||||
"number-heated-rooms": self.get_node_value('Heated-Room-Count'),
|
||||
"floor-description": self.get_property_summary_value('Floor', 'Description'),
|
||||
"energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
|
||||
"built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
|
||||
"number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
|
||||
"windows-description": self.get_property_summary_value('Window', 'Description'),
|
||||
"glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
|
||||
"inspection-date": self.get_node_value('Inspection-Date'),
|
||||
"mains-gas-flag": self.get_node_value('Mains-Gas'),
|
||||
"co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
|
||||
# TODO: Not included in the xml for houses - need an example of flats
|
||||
# "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'),
|
||||
# TODO: Need an example of flats
|
||||
# "flat-storey-count": self.get_node_value('Flat-Storey-Count'),
|
||||
"roof-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"total-floor-area": self.get_node_value('Total-Floor-Area'),
|
||||
"environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
|
||||
"roof-description": self.get_property_summary_value('Roof', 'Description'),
|
||||
"floor-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
|
||||
"hot-water-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"mainheatc-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
|
||||
"lighting-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"windows-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"floor-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"sheating-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"lighting_description": self.get_property_summary_value('Lighting', 'Description'),
|
||||
"roof-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"walls-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"photo-supply": self.get_photo_supply(),
|
||||
"lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
|
||||
"mainheat-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
|
||||
"main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
|
||||
# TODO: NEdd an example of flats
|
||||
# "flat-top-storey": self.get_node_value('Flat-Top-Storey'),
|
||||
"secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
|
||||
"walls-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
|
||||
"extension-count": self.get_node_value('Extensions-Count'),
|
||||
"mainheatc-env-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
|
||||
],
|
||||
"lmk-key": "", # Doesn't exist for non-EPC xmls
|
||||
"wind-turbines-count": self.get_node_value('Wind-Turbines-Count'),
|
||||
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
|
||||
# TODO: Need an example of flats
|
||||
# "floor-level": self.get_node_value('Floor-Level'),
|
||||
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
|
||||
"hot-water-energy-eff": self.RATINGS_MAP[
|
||||
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
|
||||
],
|
||||
"low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
|
||||
"walls-description": self.get_property_summary_value('Wall', 'Description'),
|
||||
"hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
|
||||
}
|
||||
|
||||
def get_node_value(self, tag_name):
|
||||
nodes = self.xml.getElementsByTagName(tag_name)
|
||||
if nodes and nodes[0].firstChild:
|
||||
return nodes[0].firstChild.nodeValue
|
||||
return None
|
||||
|
||||
def get_node_value_from_floor_dimensions(self, tag_name):
|
||||
nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
|
||||
if nodes:
|
||||
tag = nodes[0].getElementsByTagName(tag_name)
|
||||
if tag and tag[0].firstChild:
|
||||
return tag[0].firstChild.nodeValue
|
||||
return None
|
||||
|
||||
def get_property_summary_value(self, section, tag_name):
|
||||
nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
|
||||
if nodes:
|
||||
tag = nodes[0].getElementsByTagName(tag_name)
|
||||
if tag and tag[0].firstChild:
|
||||
return tag[0].firstChild.nodeValue
|
||||
return None
|
||||
|
||||
def get_energy_assessment_value(self, tag_name):
|
||||
nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
|
||||
if nodes:
|
||||
tag = nodes.getElementsByTagName(tag_name)
|
||||
if tag and tag[0].firstChild:
|
||||
return tag[0].firstChild.nodeValue
|
||||
return None
|
||||
|
||||
def get_uprn(self, uprn):
|
||||
|
||||
if uprn is not None:
|
||||
|
|
@ -253,9 +428,14 @@ class XmlParser:
|
|||
self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue
|
||||
self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue
|
||||
self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue
|
||||
|
||||
# Energy consumption
|
||||
self.energy_consumption_current = (
|
||||
self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue
|
||||
)
|
||||
self.energy_consumption_potential = (
|
||||
self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue
|
||||
)
|
||||
|
||||
def get_detailed_heating_specs(self):
|
||||
"""
|
||||
|
|
@ -457,11 +637,17 @@ class XmlParser:
|
|||
)
|
||||
|
||||
def get_photo_supply(self):
|
||||
self.photo_supply = float(
|
||||
self.xml.getElementsByTagName('Photovoltaic-Supply')[0]
|
||||
.getElementsByTagName('Percent-Roof-Area')[0]
|
||||
.firstChild.nodeValue
|
||||
)
|
||||
photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
|
||||
# Check if the "None-Or-No-Details" tag is present
|
||||
if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
|
||||
return (
|
||||
photo_supply_tag.
|
||||
getElementsByTagName("None-Or-No-Details")[0].
|
||||
getElementsByTagName("Percent-Roof-Area")[0].
|
||||
firstChild.nodeValue
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
def get_assessor_details(self):
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue