Merge pull request #575 from Hestia-Homes/eco-eligiblity-bug

implemented some handling for mixed translation descriptions
This commit is contained in:
KhalimCK 2025-12-01 02:24:11 +08:00 committed by GitHub
commit 36c087397d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 126 additions and 36 deletions

View file

@ -368,6 +368,8 @@ class Funding:
starting_str = "1.7"
elif closest_starting == 1:
starting_str = "1.0"
elif closest_starting == 0.6:
starting_str = "0.6"
else:
starting_str = f"{closest_starting:.2f}"

View file

@ -803,12 +803,13 @@ class SearchEpc:
# Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
# so we avoid comparing it to new builds
# TODO - this is experimental
# TODO - this is experimental - if we have the year the property was built, we should utilise that
# here
newer_age_bands = [
"England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
"England and Wales: 2012 onwards"
]
# We also remove EPCs that are for new dwellings
if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
# We have some older age bands, so we need to filter them out
epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
@ -975,9 +976,10 @@ class SearchEpc:
# Before we return, we check if we need to overwrite a SAP05 EPC
# If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return
is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "")
if (
(not is_sap_o5) and (not overwrite_sap05) and (response["status"] == 200)
):
good_data = not is_sap_o5 and (response["status"] == 200)
if good_data or not overwrite_sap05:
# If the data is fine, or we're preventing SAP05 overwrites, we just exit here
return
# By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC

View file

@ -89,6 +89,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85},
"Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85},
"Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it

View file

@ -454,10 +454,7 @@ class TrainingDataset(BaseDataset):
lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1
)
floor_ending_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_floor_uvalue(
row, is_end=True
),
axis=1,
lambda row: self._lambda_function_to_generate_floor_uvalue(row, is_end=True), axis=1
)
floor_starting_uvalue = pd.to_numeric(

View file

@ -48,7 +48,6 @@ def app():
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= "2011-01-01"]
# Convert to list of dictioaries as returned by the api
data = data.to_dict("records")

View file

@ -4,6 +4,7 @@ from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_thermal_transmittance,
extract_component_types,
handle_mixed_translation
)
@ -61,6 +62,9 @@ class FloorAttributes(Definitions):
# Try and perform a translation, incase it's in welsh
self.translate_welsh_text()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
if not self.nodata and not any(
rt in self.description
for rt in self.FLOOR_TYPES

View file

@ -1,6 +1,6 @@
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation
class HotWaterAttributes(Definitions):
@ -153,6 +153,9 @@ class HotWaterAttributes(Definitions):
self.nodata = False
self.description = translation
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
if not self.nodata and not any(
self._keyword_in_description(keywords)
for keywords in [

View file

@ -1,6 +1,6 @@
import re
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation
from etl.epc_clean.utils import correct_spelling
@ -25,6 +25,9 @@ class LightingAttributes(Definitions):
self.description = correct_spelling(self.description)
self.averages = averages
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting")

View file

@ -1,6 +1,8 @@
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
from etl.epc_clean.epc_attributes.attribute_utils import (
clean_description, remove_punctuation, find_keyword, handle_mixed_translation
)
class MainFuelAttributes(Definitions):
@ -56,6 +58,8 @@ class MainFuelAttributes(Definitions):
def __init__(self, description: str):
self.description: str = remove_punctuation(clean_description(description.lower()))
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.is_community = 'community' in self.description and 'not community' not in self.description
self.is_unknown = False

View file

@ -1,5 +1,7 @@
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, process_part, switch_chars
from etl.epc_clean.epc_attributes.attribute_utils import (
clean_description, process_part, switch_chars, handle_mixed_translation
)
from typing import Dict, Union
@ -77,7 +79,17 @@ class MainHeatAttributes(Definitions):
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas',
"bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, "
"mains gas, Room heaters, "
"electric"
"electric",
# an unusual example, containing both english and welsh that was found in the data
"boiler and radiators, |bwyler a rheiddiaduron, |mains gas|nwy prif gyflenwad": "boiler and radiators, "
"mains gas",
"room heaters, |gwresogyddion ystafell, |electric|trydan": "room heaters, electric",
"air source heat pump, |pwmp gwres sy'n tarddu yn yr awyr, |, radiators, |, rheiddiaduron, |electric|trydan":
"air source heat pump, radiators, electric",
"boiler and underfloor heating, |bwyler a gwres dan y llawr, |wood pellets|pelenni coed": "boiler and "
"underfloor "
"heating, "
"wood pellets",
}
REMAP = {
@ -95,6 +107,7 @@ class MainHeatAttributes(Definitions):
"air sourceheat pump, radiators, electric": "air source heat pump, radiators, electric",
"bwyler gyda rheiddiaduron a gwres dan y llawr, nwy prif gyflenwad": "Boiler and radiators, mains gas, "
"Boiler and underfloor heating, mains gas",
}
edge_case_result = {}
@ -115,6 +128,9 @@ class MainHeatAttributes(Definitions):
self.nodata = False
self.description = translation
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
remapped = []
for term in self.description.split(", "):
remap = self.REMAP.get(term)

View file

@ -1,6 +1,6 @@
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation
class MainheatControlAttributes(Definitions):
@ -136,6 +136,8 @@ class MainheatControlAttributes(Definitions):
def __init__(self, description: str):
self.description: str = clean_description(description.lower()).strip()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
description in self.NO_DATA_DESCRIPTIONS
)

View file

@ -4,6 +4,7 @@ from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_component_types,
extract_thermal_transmittance,
handle_mixed_translation
)
@ -79,6 +80,8 @@ class RoofAttributes(Definitions):
"""
self.description: str = description.lower().strip()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = (
not description
or description in self.DATA_ANOMALY_MATCHES
@ -90,8 +93,8 @@ class RoofAttributes(Definitions):
if not self.nodata and not any(
rt in self.description
for rt in self.ROOF_TYPES
+ self.DWELLING_ABOVE
+ ["average thermal transmittance"]
+ self.DWELLING_ABOVE
+ ["average thermal transmittance"]
):
raise ValueError("Invalid description")

View file

@ -4,6 +4,7 @@ from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_component_types,
extract_thermal_transmittance,
handle_mixed_translation
)
@ -23,33 +24,56 @@ class WallAttributes(Definitions):
]
WELSH_TEXT = {
"Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, partial insulation (assumed)",
"Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation "
"(assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, "
"partial insulation (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation"
" (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, no insulation (assumed)",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, "
"no insulation (assumed)",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation",
"Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, "
"insulated (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated",
"Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, as built, no insulation (assumed)",
"Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, "
"as built, no insulation ("
"assumed)",
"Waliau ceudod,": "Cavity wall, as built, no insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, insulated (assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, "
"insulated (assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated",
"Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, as built, no insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, "
"as built, "
"no insulation ("
"assumed)",
"Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation",
"Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation",
"Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, partial insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System built, as built, insulated (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, insulated",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, "
"partial insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System "
"built, "
"as built, "
"insulated ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, "
"insulated",
"WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation",
"Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, as built, partial insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, partial insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, "
"as built, "
"partial "
"insulation ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, "
"partial insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation "
"(assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation",
"Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation",
"Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation",
@ -92,6 +116,9 @@ class WallAttributes(Definitions):
self.welsh_translation_search()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
def welsh_translation_search(self):

View file

@ -1,6 +1,6 @@
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation
class WindowAttributes(Definitions):
@ -53,6 +53,9 @@ class WindowAttributes(Definitions):
self.nodata = False
self.description = translation
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
if not self.nodata:
if not any(
rt in self.description for rt in

View file

@ -155,3 +155,19 @@ def find_keyword(description, keywords, synonyms=None):
return synonyms.get(keyword, keyword)
return None
def handle_mixed_translation(description):
"""
We handle some edge cases where welsh and english are mixed in the same description
:param description: str description to process
:return:
"""
if "|" not in description:
return description
parts = description.split("|")
# The pattern that we see is that in index 0, we have english. Then welsh and then english again
# So, the even indexes are english
remapped_parts = [parts[i] for i in range(len(parts)) if i % 2 == 0]
return "".join(remapped_parts)

View file

@ -205,7 +205,7 @@ def get_wall_u_value(
mapped_value = wall_uvalues_df[
wall_uvalues_df["Wall_type"] == mapped_description
][age_band].values[0]
][age_band].values[0]
if pd.isnull(mapped_value) and "Park home" in mapped_description:
# We don't know enough in this case so we default to 0
@ -553,7 +553,15 @@ def get_floor_u_value(
lambda_ins = 0.035 # thermal conductivity of floor insulation in W/m·K
wall_thickness = [
x[age_band] for x in default_wall_thickness if x["type"] == wall_type
][0]
]
if not wall_thickness:
# In some cases, we may estimate an EPC and end up with a slightly mixed EPC, with some fields associated
# to a new build and others to an existing. So we might end up with a None wall type here, because of this.
# If this happens, nothing will be in the wall_thickness list so this is the fallback, the defauly thickness
# for many EPC assessment systems like Elmhurst
wall_thickness = 300
else:
wall_thickness = wall_thickness[0]
if wall_thickness is None and wall_type == "park home":
# We don't know enough and likely won't make recommendations
return 0
@ -563,7 +571,7 @@ def get_floor_u_value(
insulation_lookup = s11[
s11["Age_band"].str.contains(age_band) & s11["Floor_construction"]
== floor_type
]
]
if insulation_lookup.empty:
insulation_thickness = 0
else: