set up fundamental epc extraction

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-25 12:18:48 +01:00
parent 3257485248
commit 791e22146e
3 changed files with 198 additions and 12 deletions

View file

@ -133,8 +133,8 @@ def app():
energy_consumption_data = []
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
# Skip the first 50
if i < 250:
continue
# if i < 344:
# continue
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
@ -146,12 +146,12 @@ def app():
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
data = data.sample(sample_size)
data = data.sample(sample_size, replace=False)
# We use the addreess data to find the related information
collected_data = []
for _, property_data in data.iterrows():
time.sleep(np.random.uniform(0.3, 2))
time.sleep(np.random.uniform(0.2, 1.5))
uprn = int(property_data["uprn"])
address = property_data["address1"]

View file

@ -94,7 +94,7 @@ def app():
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(

View file

@ -55,6 +55,7 @@ class XmlParser:
hot_water_cost_current = None
lighting_cost_current = None
energy_consumption_current = None
energy_consumption_potential = None
heating_system = None
heating_controls = None
@ -140,6 +141,30 @@ class XmlParser:
"5": "Very Good"
}
MECHANICAL_VENTILATION_MAP = {
"0": "natural"
}
BUILT_FORM_MAP = {
"1": "Detached",
}
GLAZED_AREA_MAP = {
"4": "Much More Than Typical"
}
FUEL_TYPE_MAP = {
"26": "mains gas (not community)"
}
TRANSACTION_TYPE_MAP = {
"13": "ECO assessment"
}
TENURE_MAP = {
'1': "Owner-occupied"
}
def __init__(self, file, filekey, uprn=None):
file.seek(0) # Ensure the file pointer is at the beginning
xml_string = file.read().decode('utf-8')
@ -151,7 +176,7 @@ class XmlParser:
# In order to identify the file type, we can look for the presence of the 'UPRN' tag
# If the UPRN tag is present, we can assume that the file is an EPC
# If the UPRN tag is not present, we can assume that the file is an EPR
self.get_uprn()
self.get_uprn(uprn)
self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
@ -180,6 +205,7 @@ class XmlParser:
self.get_assessor_details()
self.get_heating_and_emissions_data()
self.get_detailed_heating_specs()
# Building fabric
@ -191,11 +217,160 @@ class XmlParser:
self.get_hot_water()
self.get_lighting()
self.get_doors()
self.get_photo_supply()
# Property dimensions
self.get_property_dimensions()
# Get all of the EPC data
self.extract_epc()
def extract_epc(self):
# Property Summary
low_energy_fixed_light_count = None
construction_age_band = None
self.epc = {
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
# TODO: Needs to be done more carefully
# "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
"construction-age-band": self.get_node_value('Construction-Age-Band'),
"mainheat-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
],
"windows-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
],
"lighting-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
],
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
# TODO: Needs to be done more careully since we have multiple windows
# "glazed-type": self.get_node_value('Glazing-Type'),
"mainheatcont-description":
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"sheating-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'),
],
# TODO: Doesn't seem to be included in the xml
# "local-authority": self.get_node_value('Local-Authority'),
"local-authority-label": self.get_node_value('Local-Authority-Label'),
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
# TODO: Doesn't seem to be included in the xml
# "energy-tariff": self.get_node_value('Energy-Tariff'),
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
"number-heated-rooms": self.get_node_value('Heated-Room-Count'),
"floor-description": self.get_property_summary_value('Floor', 'Description'),
"energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
"built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
"number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
"windows-description": self.get_property_summary_value('Window', 'Description'),
"glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
"inspection-date": self.get_node_value('Inspection-Date'),
"mains-gas-flag": self.get_node_value('Mains-Gas'),
"co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
# TODO: Not included in the xml for houses - need an example of flats
# "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'),
# TODO: Need an example of flats
# "flat-storey-count": self.get_node_value('Flat-Storey-Count'),
"roof-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
],
"total-floor-area": self.get_node_value('Total-Floor-Area'),
"environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
"roof-description": self.get_property_summary_value('Roof', 'Description'),
"floor-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
],
"number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
"hot-water-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
],
"mainheatc-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
],
"main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
"lighting-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
],
"windows-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
],
"floor-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
],
"sheating-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
],
"lighting_description": self.get_property_summary_value('Lighting', 'Description'),
"roof-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
],
"walls-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
],
"photo-supply": self.get_photo_supply(),
"lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
"mainheat-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
],
"multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
"main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
# TODO: NEdd an example of flats
# "flat-top-storey": self.get_node_value('Flat-Top-Storey'),
"secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
"walls-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
],
"transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
"extension-count": self.get_node_value('Extensions-Count'),
"mainheatc-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
],
"lmk-key": "", # Doesn't exist for non-EPC xmls
"wind-turbines-count": self.get_node_value('Wind-Turbines-Count'),
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
# TODO: Need an example of flats
# "floor-level": self.get_node_value('Floor-Level'),
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
"hot-water-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
],
"low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
"walls-description": self.get_property_summary_value('Wall', 'Description'),
"hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
}
def get_node_value(self, tag_name):
nodes = self.xml.getElementsByTagName(tag_name)
if nodes and nodes[0].firstChild:
return nodes[0].firstChild.nodeValue
return None
def get_node_value_from_floor_dimensions(self, tag_name):
nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_property_summary_value(self, section, tag_name):
nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_energy_assessment_value(self, tag_name):
nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
if nodes:
tag = nodes.getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_uprn(self, uprn):
if uprn is not None:
@ -253,9 +428,14 @@ class XmlParser:
self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue
self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue
self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue
# Energy consumption
self.energy_consumption_current = (
self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue
)
self.energy_consumption_potential = (
self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue
)
def get_detailed_heating_specs(self):
"""
@ -457,11 +637,17 @@ class XmlParser:
)
def get_photo_supply(self):
self.photo_supply = float(
self.xml.getElementsByTagName('Photovoltaic-Supply')[0]
.getElementsByTagName('Percent-Roof-Area')[0]
.firstChild.nodeValue
)
photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
# Check if the "None-Or-No-Details" tag is present
if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
return (
photo_supply_tag.
getElementsByTagName("None-Or-No-Details")[0].
getElementsByTagName("Percent-Roof-Area")[0].
firstChild.nodeValue
)
else:
raise NotImplementedError("Implement me")
def get_assessor_details(self):