Model/etl/epc_clean/epc_attributes/FloorAttributes.py
2026-01-22 22:56:58 +00:00

160 lines
6.5 KiB
Python

import re
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_thermal_transmittance,
extract_component_types,
handle_mixed_translation
)
class FloorAttributes(Definitions):
DWELLING_BELOW = ["another dwelling below", "other premises below"]
FLOOR_TYPES = [
"assumed",
"to unheated space",
"to external air",
"suspended",
"solid",
]
# For the short term, while we are still exploring the data, we maintain a list of error cases which
# we want to ignore and consider as no data.
OBSERVED_ERRORS = ["Conservatory", "insulated", "Basement"]
WELSH_TEXT = {
"(anheddiad arall islaw)": "(another dwelling below)",
"solet, dim inswleiddio (rhagdybiaeth)": "solid, no insulation (assumed)",
"solet, dim inswleiddio": "solid, no insulation)",
"crog, dim inswleiddio (rhagdybiaeth)": "suspended, no insulation (assumed)",
"crog, dim inswleiddio": "suspended, no insulation",
"(eiddo arall islaw)": "(other premises below)",
"solet, inswleiddio cyfyngedig (rhagdybiaeth)": "solid, limited insulation (assumed)",
"solet, inswleiddio cyfyngedig": "solid, limited insulation",
"crog, wedigçöi inswleiddio (rhagdybiaeth)": "suspended, insulated (assumed)",
"crog, wedigçöi inswleiddio": "suspended, insulated",
"igçör awyr y tu allan, dim inswleiddio (rhagdybiaeth)": "to external air, no insulation (assumed)",
"igçör awyr y tu allan, dim inswleiddio": "to external air, no insulation",
"i ofod heb ei wresogi, wedigçöi inswleiddio (rhagdybiaeth)": "to unheated space, insulated (assumed)",
"i ofod heb ei wresogi, wedigçöi inswleiddio": "to unheated space, insulated",
"solet, wedigçöi inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)",
"solet, wedigçöi inswleiddio": "solid, insulated",
"solet, wedi???i inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)",
"i ofod heb ei wresogi, dim inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)",
"i ofod heb ei wresogi, heb ei inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)",
"i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation",
"igçör awyr y tu allan, wedigçöi inswleiddio (rhagdybiaeth)": "to external air, insulated (assumed)",
"crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)",
}
REMAP = {
# Have only seen this once - though perhaps need to investigate older EPCs in the production of EPC clean.
# When looking at a newer EPC, which had been re-assessed as another dwelling below
"above unheated space or full exposed": "(another dwelling below)",
}
def __init__(self, description: str):
self.description: str = description.lower()
self.nodata = (
(not description)
or (description in self.DATA_ANOMALY_MATCHES)
or (description in self.OBSERVED_ERRORS)
or (self.description == "sap05:floor")
or not self.description
)
# Try and perform a translation, incase it's in welsh
self.translate_welsh_text()
# Remap known issues
if self.description in self.REMAP:
self.description = self.REMAP[self.description]
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
if not self.nodata and not any(
rt in self.description
for rt in self.FLOOR_TYPES
+ self.DWELLING_BELOW
+ ["average thermal transmittance"]
):
raise ValueError("Invalid description")
def translate_welsh_text(self):
uvalue_match = re.search(
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k",
self.description,
)
uvalue_match2 = re.search(
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k",
self.description,
)
# Step 2: Generalized translation with placeholder
if uvalue_match is not None or uvalue_match2 is not None:
if uvalue_match is not None:
uvalue = uvalue_match.group(1)
else:
uvalue = uvalue_match2.group(1)
self.description = f"average thermal transmittance {uvalue} w/m-¦k"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
def process(self) -> Dict[str, Union[str, bool, int, None]]:
if self.nodata:
return {
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True
}
result: Dict[str, Union[float, str, bool, None]] = {}
description = self.description
# thermal transmittance
result, description = extract_thermal_transmittance(result, description)
# floor type
result, description = extract_component_types(
result, description, list_of_components=self.FLOOR_TYPES
)
# check if there is another dwelling below
result["another_property_below"] = (
"(another dwelling below)" in description
or "(other premises below)" in description
or "another dwelling below" in description
or "other premises below" in description
)
thickness_map = {
"external insulation": "average",
"internal insulation": "average",
"limited insulation": "below average",
"partial insulation": "below average",
"no insulation": "none",
"additional insulation": "above average",
"insulated": "average",
}
for key, value in thickness_map.items():
if key in description:
result["insulation_thickness"] = value
break
else:
result["insulation_thickness"] = None
if result["another_property_below"]:
result["thermal_transmittance"] = 0
result["thermal_transmittance_unit"] = "w/m-¦k"
return result