mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
102 lines
4.2 KiB
Python
102 lines
4.2 KiB
Python
from typing import Dict, Union
|
|
from BaseUtility import Definitions
|
|
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation
|
|
|
|
|
|
class WindowAttributes(Definitions):
|
|
GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"]
|
|
GLAZING_COVERAGE = ["fully", "mostly", "partial", "some", "full", "thoughout"]
|
|
GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance", "single"]
|
|
|
|
coverage_map = {
|
|
"full": "full",
|
|
"fully": "full",
|
|
"mostly": "most",
|
|
"partial": "partial",
|
|
"some": "partial",
|
|
"throughout": "full"
|
|
}
|
|
|
|
WELSH_TEXT = {
|
|
"gwydrau dwbl llawn": "full double glazing",
|
|
"gwydrau dwbl rhannol": "partial double glazing",
|
|
"gwydrau dwbl gan mwyaf": "mostly double glazing",
|
|
"rhai gwydrau dwbl": "some double glazing",
|
|
"gwydrau sengl": "single glazed",
|
|
"ffenestri perfformiad uchel": "high performance glazing",
|
|
"gwydrau triphlyg llawn": "fully triple glazed",
|
|
"gwydrau triphlyg rhannol": "partial triple glazed",
|
|
"gwydrau triphlyg mwyaf": "mostly triple glazed",
|
|
"gwydrau triphlyg gan mwyaf": "mostly triple glazed",
|
|
"gwydrau eilaidd llawn": "full secondary glazing",
|
|
"gwydrau eilaidd mwyaf": "mostly secondary glazing",
|
|
"gwydrau eilaidd rhannol": "partial secondary glazing",
|
|
"gwydrau lluosog ym mhobman": "multiple glazing throughout",
|
|
}
|
|
|
|
# These are observed data anomalies that we want to ignore
|
|
NO_DATA_CASES = [
|
|
"SAP05:Windows",
|
|
"Solid, no insulation (assumed)", # A description typically associated with floors, not windows
|
|
"Suspended, no insulation (assumed)", # A description typically associated with floors, not windows
|
|
]
|
|
|
|
def __init__(self, description: str):
|
|
self.description: str = clean_description(description.lower())
|
|
|
|
# In the case of an empty description, we want to return a dictionary with all values set to False
|
|
# and indicate there was no data
|
|
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description in self.NO_DATA_CASES
|
|
|
|
translation = self.WELSH_TEXT.get(self.description)
|
|
if translation:
|
|
self.nodata = False
|
|
self.description = translation
|
|
|
|
# We handle seemind occurances of mixed translations
|
|
self.description = handle_mixed_translation(self.description)
|
|
|
|
if not self.nodata:
|
|
if not any(
|
|
rt in self.description for rt in
|
|
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
|
):
|
|
raise ValueError('Invalid description')
|
|
|
|
def process(self) -> Dict[str, Union[str, bool]]:
|
|
result: Dict[str, Union[str, bool, None]] = {
|
|
"has_glazing": False,
|
|
"glazing_coverage": None,
|
|
"glazing_type": None,
|
|
"no_data": self.nodata
|
|
}
|
|
|
|
if self.nodata:
|
|
return result
|
|
|
|
# We consolidate GLAZING_KEYWORDS into a single attribute
|
|
result["has_glazing"] = any(keyword in self.description for keyword in self.GLAZING_KEYWORDS)
|
|
|
|
# For coverage and type, we will only store the first one we find
|
|
for part in self.description.split(','):
|
|
part = part.strip() # remove leading/trailing white spaces
|
|
if not result["glazing_coverage"]:
|
|
for coverage in self.GLAZING_COVERAGE:
|
|
if coverage in part:
|
|
result["glazing_coverage"] = self.coverage_map[coverage]
|
|
break
|
|
if not result["glazing_type"]:
|
|
for glazing_type in self.GLAZING_TYPES:
|
|
if glazing_type in part:
|
|
result["glazing_type"] = glazing_type
|
|
break
|
|
|
|
# If we didn't find any coverage or type, we assume full coverage
|
|
if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
|
|
result["glazing_coverage"] = "full"
|
|
|
|
# We reset some values if the glazing is single
|
|
if result["glazing_type"] == "single":
|
|
result["has_glazing"] = False
|
|
|
|
return result
|