mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
160 lines
6.5 KiB
Python
160 lines
6.5 KiB
Python
import re
|
|
from typing import Dict, Union
|
|
from BaseUtility import Definitions
|
|
from etl.epc_clean.epc_attributes.attribute_utils import (
|
|
extract_thermal_transmittance,
|
|
extract_component_types,
|
|
handle_mixed_translation
|
|
)
|
|
|
|
|
|
class FloorAttributes(Definitions):
|
|
DWELLING_BELOW = ["another dwelling below", "other premises below"]
|
|
FLOOR_TYPES = [
|
|
"assumed",
|
|
"to unheated space",
|
|
"to external air",
|
|
"suspended",
|
|
"solid",
|
|
]
|
|
|
|
# For the short term, while we are still exploring the data, we maintain a list of error cases which
|
|
# we want to ignore and consider as no data.
|
|
|
|
OBSERVED_ERRORS = ["Conservatory", "insulated", "Basement"]
|
|
|
|
WELSH_TEXT = {
|
|
"(anheddiad arall islaw)": "(another dwelling below)",
|
|
"solet, dim inswleiddio (rhagdybiaeth)": "solid, no insulation (assumed)",
|
|
"solet, dim inswleiddio": "solid, no insulation)",
|
|
"crog, dim inswleiddio (rhagdybiaeth)": "suspended, no insulation (assumed)",
|
|
"crog, dim inswleiddio": "suspended, no insulation",
|
|
"(eiddo arall islaw)": "(other premises below)",
|
|
"solet, inswleiddio cyfyngedig (rhagdybiaeth)": "solid, limited insulation (assumed)",
|
|
"solet, inswleiddio cyfyngedig": "solid, limited insulation",
|
|
"crog, wedigçöi inswleiddio (rhagdybiaeth)": "suspended, insulated (assumed)",
|
|
"crog, wedigçöi inswleiddio": "suspended, insulated",
|
|
"igçör awyr y tu allan, dim inswleiddio (rhagdybiaeth)": "to external air, no insulation (assumed)",
|
|
"igçör awyr y tu allan, dim inswleiddio": "to external air, no insulation",
|
|
"i ofod heb ei wresogi, wedigçöi inswleiddio (rhagdybiaeth)": "to unheated space, insulated (assumed)",
|
|
"i ofod heb ei wresogi, wedigçöi inswleiddio": "to unheated space, insulated",
|
|
"solet, wedigçöi inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)",
|
|
"solet, wedigçöi inswleiddio": "solid, insulated",
|
|
"solet, wedi???i inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)",
|
|
"i ofod heb ei wresogi, dim inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)",
|
|
"i ofod heb ei wresogi, heb ei inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)",
|
|
"i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation",
|
|
"igçör awyr y tu allan, wedigçöi inswleiddio (rhagdybiaeth)": "to external air, insulated (assumed)",
|
|
"crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)",
|
|
}
|
|
|
|
REMAP = {
|
|
# Have only seen this once - though perhaps need to investigate older EPCs in the production of EPC clean.
|
|
# When looking at a newer EPC, which had been re-assessed as another dwelling below
|
|
"above unheated space or full exposed": "(another dwelling below)",
|
|
}
|
|
|
|
def __init__(self, description: str):
|
|
self.description: str = description.lower()
|
|
|
|
self.nodata = (
|
|
(not description)
|
|
or (description in self.DATA_ANOMALY_MATCHES)
|
|
or (description in self.OBSERVED_ERRORS)
|
|
or (self.description == "sap05:floor")
|
|
or not self.description
|
|
)
|
|
|
|
# Try and perform a translation, incase it's in welsh
|
|
self.translate_welsh_text()
|
|
|
|
# Remap known issues
|
|
if self.description in self.REMAP:
|
|
self.description = self.REMAP[self.description]
|
|
|
|
# We handle seemind occurances of mixed translations
|
|
self.description = handle_mixed_translation(self.description)
|
|
|
|
if not self.nodata and not any(
|
|
rt in self.description
|
|
for rt in self.FLOOR_TYPES
|
|
+ self.DWELLING_BELOW
|
|
+ ["average thermal transmittance"]
|
|
):
|
|
raise ValueError("Invalid description")
|
|
|
|
def translate_welsh_text(self):
|
|
|
|
uvalue_match = re.search(
|
|
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k",
|
|
self.description,
|
|
)
|
|
|
|
uvalue_match2 = re.search(
|
|
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k",
|
|
self.description,
|
|
)
|
|
|
|
# Step 2: Generalized translation with placeholder
|
|
if uvalue_match is not None or uvalue_match2 is not None:
|
|
if uvalue_match is not None:
|
|
uvalue = uvalue_match.group(1)
|
|
else:
|
|
uvalue = uvalue_match2.group(1)
|
|
self.description = f"average thermal transmittance {uvalue} w/m-¦k"
|
|
|
|
else:
|
|
translation = self.WELSH_TEXT.get(self.description)
|
|
if translation:
|
|
self.nodata = False
|
|
self.description = translation
|
|
|
|
def process(self) -> Dict[str, Union[str, bool, int, None]]:
|
|
|
|
if self.nodata:
|
|
return {
|
|
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
|
|
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
|
|
'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True
|
|
}
|
|
|
|
result: Dict[str, Union[float, str, bool, None]] = {}
|
|
description = self.description
|
|
|
|
# thermal transmittance
|
|
result, description = extract_thermal_transmittance(result, description)
|
|
|
|
# floor type
|
|
result, description = extract_component_types(
|
|
result, description, list_of_components=self.FLOOR_TYPES
|
|
)
|
|
|
|
# check if there is another dwelling below
|
|
result["another_property_below"] = (
|
|
"(another dwelling below)" in description
|
|
or "(other premises below)" in description
|
|
or "another dwelling below" in description
|
|
or "other premises below" in description
|
|
)
|
|
|
|
thickness_map = {
|
|
"external insulation": "average",
|
|
"internal insulation": "average",
|
|
"limited insulation": "below average",
|
|
"partial insulation": "below average",
|
|
"no insulation": "none",
|
|
"additional insulation": "above average",
|
|
"insulated": "average",
|
|
}
|
|
for key, value in thickness_map.items():
|
|
if key in description:
|
|
result["insulation_thickness"] = value
|
|
break
|
|
else:
|
|
result["insulation_thickness"] = None
|
|
|
|
if result["another_property_below"]:
|
|
result["thermal_transmittance"] = 0
|
|
result["thermal_transmittance_unit"] = "w/m-¦k"
|
|
|
|
return result
|