mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
169 lines
8 KiB
Python
169 lines
8 KiB
Python
import re
|
|
from typing import Dict, Union
|
|
from BaseUtility import Definitions
|
|
from etl.epc_clean.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
|
|
|
|
|
class RoofAttributes(Definitions):
|
|
ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed']
|
|
DWELLING_ABOVE = ["another dwelling above", "other premises above", "other dwelling above"]
|
|
|
|
WELSH_TEXT = {
|
|
"ar oleddf, dim inswleiddio": "pitched, no insulation",
|
|
"ar oleddf, dim inswleiddio (rhagdybiaeth)": "pitched, no insulation (assumed)",
|
|
"ar oleddf, wedigçöi inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
|
|
"ar oleddf, wedi?i inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
|
|
"ar oleddf, wedigçöi hinswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
|
|
"ar oleddf, wedigçöi inswleiddio": "pitched, insulated",
|
|
"ar oleddf, wedi?i inswleiddio": "pitched, insulated",
|
|
"ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)",
|
|
"ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation",
|
|
"ar oleddf, wedigçöi inswleiddio wrth y trawstiau": 'pitched, insulated at rafters',
|
|
"ar oleddf, wedi?i inswleiddio wrth y trawstiau": 'pitched, insulated at rafters',
|
|
"ar oleddf, wedi?i inswleiddio wrth y trawstia": 'pitched, insulated at rafters',
|
|
"ar oleddf, wedigçöi inswleiddio wrth y trawstia": 'pitched, insulated at rafters',
|
|
"yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)",
|
|
"yn wastad, inswleiddio cyfyngedig": "flat, limited insulation",
|
|
"yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)",
|
|
"yn wastad, dim inswleiddio": "flat, no insulation",
|
|
"yn wastad, wedigçöi inswleiddio (rhagdybiaeth)": "flat, insulated (assumed)",
|
|
"yn wastad, wedi?i hinswleiddio (rhagdybiaeth)": "flat, insulated (assumed)",
|
|
"yn wastad, wedigçöi inswleiddio": "flat, insulated",
|
|
"(eiddo arall uwchben)": "(another dwelling above)",
|
|
"(annedd arall uwchben)": "(another dwelling above)",
|
|
"ystafell(oedd) to, wedigçöi hinswleiddio": "roof room(s), insulated",
|
|
"ystafell(oedd) to, wedi?i hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)",
|
|
"ystafell(oedd) to, wedigçöi hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)",
|
|
"ystafell(oedd) to, inswleiddio cyfyngedig (rhagdybiaeth)": "roof room(s), limited insulation (assumed)",
|
|
"ystafell(oedd) to, inswleiddio cyfyngedig": "roof room(s), limited insulation",
|
|
"ystafell(oedd) to, nenfwd wedigçöi inswleiddio": "roof room(s), ceiling insulated",
|
|
"ystafell(oedd) to, dim inswleiddio (rhagdybiaeth)": "roof room(s), no insulation (assumed)",
|
|
"ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation",
|
|
"to gwellt, gydag inswleiddio ychwanegol": "thatched, with additional insulation",
|
|
}
|
|
|
|
DEFAULT_KEYS = [
|
|
'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room',
|
|
'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above',
|
|
'is_valid', 'insulation_thickness'
|
|
]
|
|
|
|
def __init__(self, description: str):
|
|
"""
|
|
:param description: Description of the roof.
|
|
"""
|
|
|
|
self.description: str = description.lower().strip()
|
|
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof"
|
|
|
|
self.welsh_translation_search()
|
|
|
|
if not self.nodata and not any(
|
|
rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"]
|
|
):
|
|
raise ValueError('Invalid description')
|
|
|
|
def welsh_translation_search(self):
|
|
"""
|
|
For some descriptions,
|
|
we want to translate, however they have a consistent structure, where the only change
|
|
is the thickness of insulation. Instead of manually adding a record for each translation, we
|
|
search for regular expressions and translate
|
|
"""
|
|
|
|
loft_insulation_regexes = [
|
|
r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft",
|
|
r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof",
|
|
r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof",
|
|
r"ar oleddf, (\d+mm) o inswleiddio yn y llofft",
|
|
r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft"
|
|
]
|
|
li_thickness_match = None
|
|
for regex in loft_insulation_regexes:
|
|
li_thickness_match = re.search(regex, self.description)
|
|
if li_thickness_match:
|
|
break
|
|
|
|
uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description)
|
|
uvalue_search2 = re.search(
|
|
r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description, re.IGNORECASE
|
|
)
|
|
|
|
# Step 2: Generalized translation with placeholder
|
|
if li_thickness_match is not None:
|
|
insulation_thickness = li_thickness_match.group(1)
|
|
|
|
self.description = f"pitched, {insulation_thickness} loft insulation"
|
|
elif uvalue_search is not None or uvalue_search2 is not None:
|
|
if uvalue_search is not None:
|
|
uvalue = uvalue_search.group(1)
|
|
else:
|
|
uvalue = uvalue_search2.group(1)
|
|
self.description = f"average thermal transmittance {uvalue} W/m-¦K"
|
|
else:
|
|
translation = self.WELSH_TEXT.get(self.description)
|
|
if translation:
|
|
self.nodata = False
|
|
self.description = translation
|
|
|
|
def process(self) -> Dict[str, Union[float, str, bool, None]]:
|
|
|
|
result: Dict[str, Union[float, str, bool, None]] = {}
|
|
|
|
if self.nodata:
|
|
for key in self.DEFAULT_KEYS:
|
|
result[key] = False
|
|
return result
|
|
|
|
description = self.description
|
|
|
|
# thermal transmittance
|
|
result, description = extract_thermal_transmittance(result, description)
|
|
|
|
# roof type
|
|
result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES)
|
|
|
|
result["has_dwelling_above"] = any([x in description for x in self.DWELLING_ABOVE])
|
|
|
|
for dwelling_above in self.DWELLING_ABOVE:
|
|
description = description.replace(dwelling_above, "")
|
|
|
|
result["is_valid"] = "invalid" not in description
|
|
description = description.replace("invalid", "")
|
|
|
|
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
|
|
if result["is_pitched"] or result["is_loft"]:
|
|
# Search for a regular expression that matches 150 insulation
|
|
match = re.search(r"(\d+\+?)\s*insulation", description)
|
|
if match:
|
|
result['insulation_thickness'] = match.group(1)
|
|
|
|
# insulation thickness
|
|
thickness_map = {
|
|
"ceiling insulated": "average",
|
|
"insulated": "average",
|
|
"limited": "below average",
|
|
"no insulation": "none",
|
|
"limited insulation": "below average",
|
|
"additional insulation": "above average",
|
|
}
|
|
for key, value in thickness_map.items():
|
|
if key in description:
|
|
result['insulation_thickness'] = value
|
|
# Remove the match from the description
|
|
# description = description.replace(key, "")
|
|
break
|
|
|
|
# Extract insulation thickness in mm, if present
|
|
match = re.search(r'(\d+\+?)\s*mm', description)
|
|
if match:
|
|
result['insulation_thickness'] = match.group(1)
|
|
|
|
if "insulation_thickness" not in result:
|
|
result['insulation_thickness'] = None
|
|
|
|
if result["has_dwelling_above"]:
|
|
result["thermal_transmittance"] = 0
|
|
result["thermal_transmittance_unit"] = 'w/m-¦k'
|
|
|
|
return result
|