Model/model_data/epc_attributes/RoofAttributes.py
2023-09-07 16:16:23 +03:00

100 lines
3.8 KiB
Python

import re
from typing import Dict, Union
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
class RoofAttributes(Definitions):
ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed']
DWELLING_ABOVE = ["another dwelling above", "other premises above"]
WELSH_TEXT = {
"ar oleddf, dim inswleiddio": "pitched, no insulation",
"(annedd arall uwchben)": "(another dwelling above)"
}
def __init__(self, description: str):
"""
:param description: Description of the roof.
"""
self.description: str = description.lower()
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
self.welsh_translation_search()
if not self.nodata and not any(
rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"]
):
raise ValueError('Invalid description')
def welsh_translation_search(self):
"""
For some descriptions,
we want to translate, however they have a consistent structure, where the only change
is the thickness of insulation. Instead of manually adding a record for each translation, we
search for regular expressions and translate
"""
insulation_thickness_match = re.search(r"(\d+ mm) o inswleiddio yn y llofft", self.description)
# Step 2: Generalized translation with placeholder
if insulation_thickness_match:
insulation_thickness = insulation_thickness_match.group(1)
self.description = self.description.replace(insulation_thickness_match.group(0), "")
self.description = f"pitched, {insulation_thickness} loft insulation"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
return result
description = self.description
# thermal transmittance
result, description = extract_thermal_transmittance(result, description)
# roof type
result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES)
result["has_dwelling_above"] = (
"another dwelling above" in description or "other premises above" in description
)
for dwelling_above in self.DWELLING_ABOVE:
description = description.replace(dwelling_above, "")
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
"insulated": "average",
"limited": "below average",
"no insulation": "none",
"limited insulation": "below average",
"additional insulation": "above average",
}
for key, value in thickness_map.items():
if key in description:
result['insulation_thickness'] = value
# Remove the match from the description
# description = description.replace(key, "")
break
else:
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
if "insulation_thickness" not in result:
result['insulation_thickness'] = None
return result