Model/etl/epc_clean/epc_attributes/RoofAttributes.py

219 lines
8.9 KiB
Python

import re
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_component_types,
extract_thermal_transmittance,
handle_mixed_translation
)
class RoofAttributes(Definitions):
ROOF_TYPES = [
"pitched",
"roof room",
"loft",
"flat",
"thatched",
"at rafters",
"assumed",
]
DWELLING_ABOVE = [
"another dwelling above",
"other premises above",
"other dwelling above",
"(same dwelling above)",
]
WELSH_TEXT = {
"ar oleddf, dim inswleiddio": "pitched, no insulation",
"ar oleddf, dim inswleiddio (rhagdybiaeth)": "pitched, no insulation (assumed)",
"ar oleddf, wedigçöi inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
"ar oleddf, wedi?i inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
"ar oleddf, wedigçöi hinswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)",
"ar oleddf, wedigçöi inswleiddio": "pitched, insulated",
"ar oleddf, wedi?i inswleiddio": "pitched, insulated",
"ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)",
"ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation",
"ar oleddf, wedigçöi inswleiddio wrth y trawstiau": "pitched, insulated at rafters",
"ar oleddf, wedi?i inswleiddio wrth y trawstiau": "pitched, insulated at rafters",
"ar oleddf, wedi?i inswleiddio wrth y trawstia": "pitched, insulated at rafters",
"ar oleddf, wedigçöi inswleiddio wrth y trawstia": "pitched, insulated at rafters",
"yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)",
"yn wastad, inswleiddio cyfyngedig": "flat, limited insulation",
"yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)",
"yn wastad, dim inswleiddio": "flat, no insulation",
"yn wastad, wedigçöi inswleiddio (rhagdybiaeth)": "flat, insulated (assumed)",
"yn wastad, wedi?i hinswleiddio (rhagdybiaeth)": "flat, insulated (assumed)",
"yn wastad, wedigçöi inswleiddio": "flat, insulated",
"(eiddo arall uwchben)": "(another dwelling above)",
"(annedd arall uwchben)": "(another dwelling above)",
"ystafell(oedd) to, wedigçöi hinswleiddio": "roof room(s), insulated",
"ystafell(oedd) to, wedi?i hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)",
"ystafell(oedd) to, wedigçöi hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)",
"ystafell(oedd) to, inswleiddio cyfyngedig (rhagdybiaeth)": "roof room(s), limited insulation (assumed)",
"ystafell(oedd) to, inswleiddio cyfyngedig": "roof room(s), limited insulation",
"ystafell(oedd) to, nenfwd wedigçöi inswleiddio": "roof room(s), ceiling insulated",
"ystafell(oedd) to, dim inswleiddio (rhagdybiaeth)": "roof room(s), no insulation (assumed)",
"ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation",
"to gwellt, gydag inswleiddio ychwanegol": "thatched, with additional insulation",
}
DEFAULT_KEYS = [
"thermal_transmittance",
"thermal_transmittance_unit",
"is_pitched",
"is_roof_room",
"is_loft",
"is_flat",
"is_thatched",
"is_at_rafters",
"is_assumed",
"has_dwelling_above",
"is_valid",
"insulation_thickness",
]
NODATA_NULLS = ["insulation_thickness", "thermal_transmittance", "thermal_transmittance_unit"]
def __init__(self, description: str):
"""
:param description: Description of the roof.
"""
self.description: str = description.lower().strip()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = (
not description
or description in self.DATA_ANOMALY_MATCHES
or self.description == "sap05:roof"
)
self.welsh_translation_search()
if not self.nodata and not any(
rt in self.description
for rt in self.ROOF_TYPES
+ self.DWELLING_ABOVE
+ ["average thermal transmittance"]
):
raise ValueError("Invalid description")
def welsh_translation_search(self):
"""
For some descriptions,
we want to translate, however they have a consistent structure, where the only change
is the thickness of insulation. Instead of manually adding a record for each translation, we
search for regular expressions and translate
"""
loft_insulation_regexes = [
r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft",
r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof",
r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof",
r"ar oleddf, (\d+mm) o inswleiddio yn y llofft",
r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft",
]
li_thickness_match = None
for regex in loft_insulation_regexes:
li_thickness_match = re.search(regex, self.description)
if li_thickness_match:
break
uvalue_search = re.search(
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k",
self.description,
)
uvalue_search2 = re.search(
r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k",
self.description,
re.IGNORECASE,
)
# Step 2: Generalized translation with placeholder
if li_thickness_match is not None:
insulation_thickness = li_thickness_match.group(1)
self.description = f"pitched, {insulation_thickness} loft insulation"
elif uvalue_search is not None or uvalue_search2 is not None:
if uvalue_search is not None:
uvalue = uvalue_search.group(1)
else:
uvalue = uvalue_search2.group(1)
self.description = f"average thermal transmittance {uvalue} W/m-¦K"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
for key in self.DEFAULT_KEYS:
result[key] = False
# Insulation thickness, thermal transmittance and thermal transmittance unit are set to None for nodata
# cases
for k in self.NODATA_NULLS:
result[k] = None
return result
description = self.description
# thermal transmittance
result, description = extract_thermal_transmittance(result, description)
# roof type
result, description = extract_component_types(
result, description, list_of_components=self.ROOF_TYPES
)
result["has_dwelling_above"] = any(
[x in description for x in self.DWELLING_ABOVE]
)
for dwelling_above in self.DWELLING_ABOVE:
description = description.replace(dwelling_above, "")
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
if result["is_pitched"] or result["is_loft"]:
# Search for a regular expression that matches 150 insulation
match = re.search(r"(\d+\+?)\s*insulation", description)
if match:
result["insulation_thickness"] = match.group(1)
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
"insulated": "average",
"limited": "below average",
"no insulation": "none",
"limited insulation": "below average",
"additional insulation": "above average",
}
for key, value in thickness_map.items():
if key in description:
result["insulation_thickness"] = value
# Remove the match from the description
# description = description.replace(key, "")
break
# Extract insulation thickness in mm, if present
match = re.search(r"(\d+\+?)\s*mm", description)
if match:
result["insulation_thickness"] = match.group(1)
if "insulation_thickness" not in result:
result["insulation_thickness"] = None
if result["has_dwelling_above"]:
result["thermal_transmittance"] = 0
result["thermal_transmittance_unit"] = "w/m-¦k"
return result