Model/etl/epc_clean/epc_attributes/WallAttributes.py
2026-01-22 22:56:58 +00:00

208 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_component_types,
extract_thermal_transmittance,
handle_mixed_translation
)
class WallAttributes(Definitions):
WALL_TYPES = [
"cavity wall",
"filled cavity",
"solid brick",
"system built",
"timber frame",
"granite or whinstone",
"as built",
"cob",
"assumed",
"sandstone or limestone",
"park home",
]
WELSH_TEXT = {
"Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation "
"(assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, "
"partial insulation (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation"
" (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, "
"no insulation (assumed)",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation",
"Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, "
"insulated (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated",
"Waliau ceudod, fel yu hadeiladwyd, wediu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated ("
"assumed)",
"Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, "
"as built, no insulation ("
"assumed)",
"Waliau ceudod,": "Cavity wall, as built, no insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, "
"insulated (assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated",
"Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, "
"as built, "
"no insulation ("
"assumed)",
"Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation",
"Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation",
"Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, "
"partial insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System "
"built, "
"as built, "
"insulated ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, "
"insulated",
"WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation",
"Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, "
"as built, "
"partial "
"insulation ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, "
"partial insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation "
"(assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation",
"Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation",
"Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation",
"Briciau solet, gydag inswleiddio allanol": "Solid brick, with external insulation",
# Add in some corrections:
"Co with external insulation": "Cob, with external insulation",
"Cowith external insulation": "Cob, with external insulation",
}
DEFAULT_KEYS = [
"thermal_transmittance",
"thermal_transmittance_unit",
"is_cavity_wall",
"is_filled_cavity",
"is_solid_brick",
"is_system_built",
"is_timber_frame",
"is_granite_or_whinstone",
"is_as_built",
"is_cob",
"is_assumed",
"is_sandstone_or_limestone",
"insulation_thickness",
"external_insulation",
"internal_insulation",
]
CORRECTIONS = {
"Granite or whin, as built, no insulation (assumed)": "Granite or whinstone, as built, no insulation (assumed)",
}
def __init__(self, description: str):
"""
:param description: Description of the walls.
"""
self.description: str = description
if self.description in self.CORRECTIONS:
self.description = self.CORRECTIONS[self.description]
self.welsh_translation_search()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
def welsh_translation_search(self):
"""
For some descriptions, we need to translate from Welsh to English
:return:
"""
uvalue_search = re.search(
r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description
)
if uvalue_search:
uvalue = uvalue_search.group(1)
self.description = f"Average thermal transmittance {uvalue} W/m-¦K"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
for key in self.DEFAULT_KEYS:
result[key] = False
result["thermal_transmittance"] = None
result["thermal_transmittance_unit"] = None
result["insulation_thickness"] = "none"
result["is_park_home"] = False
return result
description = self.description.lower()
# thermal transmittance - it can be negative which is errneous however we'll still pull it out
result, description = extract_thermal_transmittance(result, description)
# wall type
result, description = extract_component_types(
result, description, list_of_components=self.WALL_TYPES
)
# Handle some edge cases
if "sandstone" in description and not result["is_sandstone_or_limestone"]:
result["is_sandstone_or_limestone"] = True
# insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this
# generally however this is sufficient for mvp
thickness_map = {
"external insulation": "average",
"internal insulation": "average",
"partial insulation": "below average",
"no insulation": "none",
"additional insulation": "above average",
"insulated": "average",
}
for key, value in thickness_map.items():
if key in description:
result["insulation_thickness"] = value
break
else:
result["insulation_thickness"] = None
# insulation type
result["external_insulation"] = "external insulation" in description
result["internal_insulation"] = "internal insulation" in description
if result["is_filled_cavity"]:
# If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation
if result["external_insulation"]:
result["insulation_thickness"] = "above average"
elif result["internal_insulation"]:
result["insulation_thickness"] = "above average"
else:
result["insulation_thickness"] = "average"
if (
result["is_cavity_wall"]
& result["is_as_built"]
& (result["insulation_thickness"] == "average")
):
result["is_filled_cavity"] = True
return result