diff --git a/backend/Funding.py b/backend/Funding.py index fe5165f6..4ec57f8a 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -368,6 +368,8 @@ class Funding: starting_str = "1.7" elif closest_starting == 1: starting_str = "1.0" + elif closest_starting == 0.6: + starting_str = "0.6" else: starting_str = f"{closest_starting:.2f}" diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 61c9cc30..6cb767e7 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -803,12 +803,13 @@ class SearchEpc: # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build # so we avoid comparing it to new builds - # TODO - this is experimental + # TODO - this is experimental - if we have the year the property was built, we should utilise that + # here newer_age_bands = [ "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", "England and Wales: 2012 onwards" ] - + # We also remove EPCs that are for new dwellings if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): # We have some older age bands, so we need to filter them out epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() @@ -975,9 +976,10 @@ class SearchEpc: # Before we return, we check if we need to overwrite a SAP05 EPC # If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - if ( - (not is_sap_o5) and (not overwrite_sap05) and (response["status"] == 200) - ): + good_data = not is_sap_o5 and (response["status"] == 200) + + if good_data or not overwrite_sap05: + # If the data is fine, or we're preventing SAP05 overwrites, we just exit here return # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 1c46a5c8..8edc03db 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -89,6 +89,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 3fa84136..2e5646ac 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -454,10 +454,7 @@ class TrainingDataset(BaseDataset): lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1 ) floor_ending_uvalue = self.df.apply( - lambda row: self._lambda_function_to_generate_floor_uvalue( - row, is_end=True - ), - axis=1, + lambda row: self._lambda_function_to_generate_floor_uvalue(row, is_end=True), axis=1 ) floor_starting_uvalue = pd.to_numeric( diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 1f320a9b..99de1d03 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -48,7 +48,6 @@ def app(): data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold data = data[data["lodgement-date"] >= "2011-01-01"] - # Convert to list of dictioaries as returned by the api data = data.to_dict("records") diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 62767638..27f133cc 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_thermal_transmittance, extract_component_types, + handle_mixed_translation ) @@ -61,6 +62,9 @@ class FloorAttributes(Definitions): # Try and perform a translation, incase it's in welsh self.translate_welsh_text() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( rt in self.description for rt in self.FLOOR_TYPES diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index d1124e08..9966edea 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class HotWaterAttributes(Definitions): @@ -153,6 +153,9 @@ class HotWaterAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( self._keyword_in_description(keywords) for keywords in [ diff --git a/etl/epc_clean/epc_attributes/LightingAttributes.py b/etl/epc_clean/epc_attributes/LightingAttributes.py index 78b31142..52cae764 100644 --- a/etl/epc_clean/epc_attributes/LightingAttributes.py +++ b/etl/epc_clean/epc_attributes/LightingAttributes.py @@ -1,6 +1,6 @@ import re from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation from etl.epc_clean.utils import correct_spelling @@ -25,6 +25,9 @@ class LightingAttributes(Definitions): self.description = correct_spelling(self.description) self.averages = averages + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting") diff --git a/etl/epc_clean/epc_attributes/MainFuelAttributes.py b/etl/epc_clean/epc_attributes/MainFuelAttributes.py index 9bb53ff1..a818a043 100644 --- a/etl/epc_clean/epc_attributes/MainFuelAttributes.py +++ b/etl/epc_clean/epc_attributes/MainFuelAttributes.py @@ -1,6 +1,8 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, remove_punctuation, find_keyword, handle_mixed_translation +) class MainFuelAttributes(Definitions): @@ -56,6 +58,8 @@ class MainFuelAttributes(Definitions): def __init__(self, description: str): self.description: str = remove_punctuation(clean_description(description.lower())) + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.is_community = 'community' in self.description and 'not community' not in self.description self.is_unknown = False diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index d20d9290..283c4724 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -1,5 +1,7 @@ from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, process_part, switch_chars +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, process_part, switch_chars, handle_mixed_translation +) from typing import Dict, Union @@ -77,7 +79,17 @@ class MainHeatAttributes(Definitions): 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas', "bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, " "mains gas, Room heaters, " - "electric" + "electric", + # an unusual example, containing both english and welsh that was found in the data + "boiler and radiators, |bwyler a rheiddiaduron, |mains gas|nwy prif gyflenwad": "boiler and radiators, " + "mains gas", + "room heaters, |gwresogyddion ystafell, |electric|trydan": "room heaters, electric", + "air source heat pump, |pwmp gwres sy'n tarddu yn yr awyr, |, radiators, |, rheiddiaduron, |electric|trydan": + "air source heat pump, radiators, electric", + "boiler and underfloor heating, |bwyler a gwres dan y llawr, |wood pellets|pelenni coed": "boiler and " + "underfloor " + "heating, " + "wood pellets", } REMAP = { @@ -95,6 +107,7 @@ class MainHeatAttributes(Definitions): "air sourceheat pump, radiators, electric": "air source heat pump, radiators, electric", "bwyler gyda rheiddiaduron a gwres dan y llawr, nwy prif gyflenwad": "Boiler and radiators, mains gas, " "Boiler and underfloor heating, mains gas", + } edge_case_result = {} @@ -115,6 +128,9 @@ class MainHeatAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + remapped = [] for term in self.description.split(", "): remap = self.REMAP.get(term) diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 3e2df543..b9ef4eca 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class MainheatControlAttributes(Definitions): @@ -136,6 +136,8 @@ class MainheatControlAttributes(Definitions): def __init__(self, description: str): self.description: str = clean_description(description.lower()).strip() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or ( description in self.NO_DATA_DESCRIPTIONS ) diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 153fb548..98998e5a 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, + handle_mixed_translation ) @@ -79,6 +80,8 @@ class RoofAttributes(Definitions): """ self.description: str = description.lower().strip() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.nodata = ( not description or description in self.DATA_ANOMALY_MATCHES @@ -90,8 +93,8 @@ class RoofAttributes(Definitions): if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES - + self.DWELLING_ABOVE - + ["average thermal transmittance"] + + self.DWELLING_ABOVE + + ["average thermal transmittance"] ): raise ValueError("Invalid description") diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 8cf32a0b..a390e0a5 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, + handle_mixed_translation ) @@ -23,33 +24,56 @@ class WallAttributes(Definitions): ] WELSH_TEXT = { - "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation (assumed)", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, partial insulation (assumed)", + "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation " + "(assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, " + "partial insulation (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation" + " (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation", - "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, no insulation (assumed)", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, " + "no insulation (assumed)", "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation", "Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity", - "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, " + "insulated (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", - "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, as built, no insulation (assumed)", + "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, " + "as built, no insulation (" + "assumed)", "Waliau ceudod,": "Cavity wall, as built, no insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, insulated (assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, " + "insulated (assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated", "Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, as built, no insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, " + "as built, " + "no insulation (" + "assumed)", "Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation", "Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation", "Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, partial insulation (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System built, as built, insulated (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, insulated", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, " + "partial insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System " + "built, " + "as built, " + "insulated (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, " + "insulated", "WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation", "Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, as built, partial insulation (assumed)", - "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, partial insulation", - "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, " + "as built, " + "partial " + "insulation (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, " + "partial insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation " + "(assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation", "Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation", "Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation", @@ -92,6 +116,9 @@ class WallAttributes(Definitions): self.welsh_translation_search() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES def welsh_translation_search(self): diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index 2b1dc172..f5edac2d 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation class WindowAttributes(Definitions): @@ -53,6 +53,9 @@ class WindowAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata: if not any( rt in self.description for rt in diff --git a/etl/epc_clean/epc_attributes/attribute_utils.py b/etl/epc_clean/epc_attributes/attribute_utils.py index 28f958a8..2e98d869 100644 --- a/etl/epc_clean/epc_attributes/attribute_utils.py +++ b/etl/epc_clean/epc_attributes/attribute_utils.py @@ -155,3 +155,19 @@ def find_keyword(description, keywords, synonyms=None): return synonyms.get(keyword, keyword) return None + + +def handle_mixed_translation(description): + """ + We handle some edge cases where welsh and english are mixed in the same description + :param description: str description to process + :return: + """ + if "|" not in description: + return description + + parts = description.split("|") + # The pattern that we see is that in index 0, we have english. Then welsh and then english again + # So, the even indexes are english + remapped_parts = [parts[i] for i in range(len(parts)) if i % 2 == 0] + return "".join(remapped_parts) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 7c39668a..6acc04f9 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -205,7 +205,7 @@ def get_wall_u_value( mapped_value = wall_uvalues_df[ wall_uvalues_df["Wall_type"] == mapped_description - ][age_band].values[0] + ][age_band].values[0] if pd.isnull(mapped_value) and "Park home" in mapped_description: # We don't know enough in this case so we default to 0 @@ -553,7 +553,15 @@ def get_floor_u_value( lambda_ins = 0.035 # thermal conductivity of floor insulation in W/m·K wall_thickness = [ x[age_band] for x in default_wall_thickness if x["type"] == wall_type - ][0] + ] + if not wall_thickness: + # In some cases, we may estimate an EPC and end up with a slightly mixed EPC, with some fields associated + # to a new build and others to an existing. So we might end up with a None wall type here, because of this. + # If this happens, nothing will be in the wall_thickness list so this is the fallback, the defauly thickness + # for many EPC assessment systems like Elmhurst + wall_thickness = 300 + else: + wall_thickness = wall_thickness[0] if wall_thickness is None and wall_type == "park home": # We don't know enough and likely won't make recommendations return 0 @@ -563,7 +571,7 @@ def get_floor_u_value( insulation_lookup = s11[ s11["Age_band"].str.contains(age_band) & s11["Floor_construction"] == floor_type - ] + ] if insulation_lookup.empty: insulation_thickness = 0 else: