diff --git a/backend/Property.py b/backend/Property.py index 4bd77ec8..2358cbc2 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -3,6 +3,7 @@ import re from epc_api.client import EpcClient from model_data.config import EPC_AUTH_TOKEN from model_data.BaseUtility import Definitions +from recommendations.rdsap_tables import england_wales_age_band_lookup class Property(Definitions): @@ -29,6 +30,7 @@ class Property(Definitions): lighting = None coordinates = None + age_band = None def __init__(self, id, postcode, address1, epc_client=None, data=None): self.id = id @@ -245,6 +247,7 @@ class Property(Definitions): self.set_floor_height() self.set_wall_area() self.set_floor_area() + self.set_age_band() for description, attribute in cleaned.items(): @@ -263,6 +266,17 @@ class Property(Definitions): raise ValueError("Either No attributes or multiple found for %s" % description) setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) + def set_age_band(self): + """ + Sets a cleaned version of the age band of the property given the EPC data + :return: + """ + + if not self.data: + raise ValueError("Property does not contain data") + + self.age_band = england_wales_age_band_lookup[self.data["construction-age-band"]] + def set_is_in_conservation_area(self, in_conservation_area): """ Sets whether the property is in a conservation area given the output of the ConservationAreaClient diff --git a/backend/tests/test_property.py b/backend/tests/test_property.py index 679b25b0..8d392bab 100644 --- a/backend/tests/test_property.py +++ b/backend/tests/test_property.py @@ -30,7 +30,8 @@ mock_epc_response = { "unheated-corridor-length": 0, "mains-gas-flag": "Y", "floor-height": 2.5, - "total-floor-area": 100 + "total-floor-area": 100, + "construction-age-band": "England and Wales: 1967-1975" }, { "inspection-date": "2023-05-01", @@ -53,7 +54,8 @@ mock_epc_response = { "unheated-corridor-length": 0, "mains-gas-flag": "Y", "floor-height": 2.5, - "total-floor-area": 100 + "total-floor-area": 100, + "construction-age-band": "England and Wales: 1967-1975" } ] } @@ -77,7 +79,8 @@ mock_epc_response_dupe = { "unheated-corridor-length": 0, "mains-gas-flag": "Y", "floor-height": 2.5, - "total-floor-area": 100 + "total-floor-area": 100, + "construction-age-band": "England and Wales: 1967-1975" }, { 'inspection-date': '2023-05-01', 'some-other-key': 'some-other-value', @@ -97,7 +100,8 @@ mock_epc_response_dupe = { "unheated-corridor-length": 0, "mains-gas-flag": "Y", "floor-height": 2.5, - "total-floor-area": 100 + "total-floor-area": 100, + "construction-age-band": "England and Wales: 1967-1975" }, { 'inspection-date': '2023-06-01', 'some-other-key': 'duplicate-date', @@ -117,7 +121,8 @@ mock_epc_response_dupe = { "unheated-corridor-length": 0, "mains-gas-flag": "Y", "floor-height": 2.5, - "total-floor-area": 100 + "total-floor-area": 100, + "construction-age-band": "England and Wales: 1967-1975" } ] } @@ -126,11 +131,13 @@ mock_epc_response_dupe = { class TestProperty: @pytest.fixture(autouse=True) def property_instance(self, mock_epc_client, mock_open_uprn_client, mock_cleaner): - return Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client) + property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client) + return property_instance @pytest.fixture(autouse=True) def property_instance_dupe_data(self, mock_epc_client_dupe_data): - return Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data) + property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data) + return property_instance_dupe_data @pytest.fixture def mock_epc_client(self): diff --git a/model_data/cleaner_app.py b/model_data/cleaner_app.py index 1ccb6238..40011404 100644 --- a/model_data/cleaner_app.py +++ b/model_data/cleaner_app.py @@ -4,7 +4,6 @@ import pandas as pd import msgpack from model_data.EpcClean import EpcClean -from model_data.analysis.UvalueEstimations import UvalueEstimations from model_data.simulation_system.core.Settings import EARLIEST_EPC_DATE from pathlib import Path from utils.s3 import save_data_to_s3 diff --git a/model_data/epc_attributes/FloorAttributes.py b/model_data/epc_attributes/FloorAttributes.py index 5a901679..847b183a 100644 --- a/model_data/epc_attributes/FloorAttributes.py +++ b/model_data/epc_attributes/FloorAttributes.py @@ -65,7 +65,7 @@ class FloorAttributes(Definitions): uvalue = uvalue_match.group(1) else: uvalue = uvalue_match2.group(1) - self.description = f"average thermal transmittance {uvalue} w/m-¦K" + self.description = f'average thermal transmittance {uvalue} w/m-¦k' else: translation = self.WELSH_TEXT.get(self.description) diff --git a/model_data/epc_attributes/LightingAttributes.py b/model_data/epc_attributes/LightingAttributes.py index 94285170..83e9ef5f 100644 --- a/model_data/epc_attributes/LightingAttributes.py +++ b/model_data/epc_attributes/LightingAttributes.py @@ -24,15 +24,16 @@ class LightingAttributes: expression and perform the translation """ lel_match = re.search(r"goleuadau ynni-isel mewn (\d+)%? ogçör mannau gosod", self.description) + lel_match2 = re.search(r"goleuadau ynni-isel mewn (\d+)%? o'r mannau gosod", self.description) - if lel_match: + if lel_match is not None or lel_match2 is not None: + # Perform the actual translation - percentage = lel_match.group(1) + percentage = lel_match.group(1) if lel_match is not None else lel_match2.group(1) self.description = f"low energy lighting in {percentage}% of fixed outlets" else: translation = self.WELSH_TEXT.get(self.description) if translation: - self.nodata = False self.description = translation def process(self): diff --git a/model_data/epc_attributes/MainheatAttributes.py b/model_data/epc_attributes/MainheatAttributes.py index 3fc1468b..9471ee1d 100644 --- a/model_data/epc_attributes/MainheatAttributes.py +++ b/model_data/epc_attributes/MainheatAttributes.py @@ -54,7 +54,8 @@ class MainHeatAttributes(Definitions): "gwresogyddion ystafell, pelenni coed": "room heaters, wood pellets", "gwresogyddion ystafell, glo": "room heaters, coal", "bwyler a gwres dan y llawr, lpg": "boiler and underfloor heating, lpg", - "bwyler a gwres dan y llawr, trydan": "boiler and underfloor heating, electric" + "bwyler a gwres dan y llawr, trydan": "boiler and underfloor heating, electric", + "boiler and radiators, nwy prif gyflenwad, mains gas": "boiler and radiators, mains gas", } REMAP = { diff --git a/model_data/epc_attributes/MainheatControlAttributes.py b/model_data/epc_attributes/MainheatControlAttributes.py index 8bb90f16..64f5afba 100644 --- a/model_data/epc_attributes/MainheatControlAttributes.py +++ b/model_data/epc_attributes/MainheatControlAttributes.py @@ -102,9 +102,16 @@ class MainheatControlAttributes(Definitions): "dim rheolaeth thermostatig ar dymheredd yr ystafell": "no thermostatic control of room temperature", "thermostatau ar y cyfarpar": "appliance thermostats", "rhaglennydd a thermostatau ystafell": "programmer and room thermostats", - "system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd a thermostat ystafell": ( - "charging system linked to use of community heating, programmer and room thermostat" - ), + "system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd a thermostat ystafell": + "charging system linked to use of community heating, programmer and room thermostat", + 'system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd a trvs': + "charging system linked to use of community heating, programmer and trvs", + 'system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, trvs': + 'charging system linked to use of community heating, trvs', + 't+-ól un gyfradd, trvs': 'single rate heating, trvs', + 't+ól un gyfradd, rhaglennydd a trvs': 'single rate heating, programmer, trvs', + 't+ól un gyfradd, trvs': 'single rate heating, trvs', + 'trvs a falf osgoi': 'trvs and bypass' } def __init__(self, description: str): diff --git a/model_data/epc_attributes/WallAttributes.py b/model_data/epc_attributes/WallAttributes.py index a0601029..e2f7be07 100644 --- a/model_data/epc_attributes/WallAttributes.py +++ b/model_data/epc_attributes/WallAttributes.py @@ -1,15 +1,71 @@ +import re from typing import Dict, Union from model_data.BaseUtility import Definitions -from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance +from model_data.epc_attributes.attribute_utils import ( + extract_component_types, + extract_thermal_transmittance +) class WallAttributes(Definitions): WALL_TYPES = ['cavity wall', 'filled cavity', 'solid brick', 'system built', 'timber frame', 'granite or whinstone', - 'as built', 'cob', 'assumed', 'sandstone or limestone'] + 'as built', 'cob', 'assumed', 'sandstone or limestone', "park home"] WELSH_TEXT = { "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation (assumed)", + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': + 'Cavity wall, as built, partial insulation (assumed)', + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': + 'Cavity wall, as built, partial insulation', + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': + 'Cavity wall, as built, no insulation (assumed)', + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio': + 'Cavity wall, as built, no insulation', + 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': + 'Sandstone or limestone, as built, no insulation (assumed)', + 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio': + 'Sandstone or limestone, as built, no insulation', + 'Waliau ceudod, ceudod wediGÇÖi lenwi': 'Cavity wall, filled cavity', + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': + 'Cavity wall, as built, insulated (assumed)', + 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': + 'Cavity wall, as built, insulated', + 'Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': + 'Granite or whinstone, as built, no insulation (assumed)', + 'Waliau ceudod,': 'Cavity wall, as built, no insulation', + 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': + 'Timber frame, as built, insulated (assumed)', + 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': + 'Timber frame, as built, insulated', + 'Gwenithfaen neu risgraig, gydag inswleiddio allanol': 'Granite or whinstone, with external insulation', + 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': + 'System built, as built, no insulation (assumed)', + 'Tywodfaen, gydag inswleiddio mewnol': 'Sandstone or limestone, with internal insulation', + 'Waliau ceudod, ynysydd allanol a llenwi ceudod': 'Cavity wall, filled cavity and external insulation', + 'Gwenithfaen neu risgraig, gydag inswleiddio mewnol': 'Granite or whinstone, with internal insulation', + 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': + 'Timber frame, as built, partial insulation (assumed)', + 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': + 'System built, as built, insulated (assumed)', + 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': + 'System built, as built, insulated', + 'WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol': 'System built, with external insulation', + 'Briciau solet, gydag inswleiddio mewnol': 'Solid brick, with internal insulation', + 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': + 'System built, as built, partial insulation (assumed)', + 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': + 'System built, as built, partial insulation', + 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': + 'Timber frame, as built, no insulation (assumed)', + 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio': + 'Timber frame, as built, no insulation', + 'Tywodfaen, gydag inswleiddio allanol': 'Sandstone or limestone, with external insulation', + 'Waliau ceudod, gydag inswleiddio allanol': 'Cavity wall, with external insulation', + 'Briciau solet, gydag inswleiddio allanol': 'Solid brick, with external insulation', + # Add in some corrections: + 'Co with external insulation': 'Cob, with external insulation', + 'Cowith external insulation': 'Cob, with external insulation', } def __init__(self, description: str): @@ -18,13 +74,27 @@ class WallAttributes(Definitions): """ self.description: str = description - translation = self.WELSH_TEXT.get(self.description) - if translation: - self.nodata = False - self.description = translation + self.welsh_translation_search() self.nodata = not description or description in self.DATA_ANOMALY_MATCHES + def welsh_translation_search(self): + """ + For some descriptions, we need to translate from Welsh to English + :return: + """ + + uvalue_search = re.search(r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description) + + if uvalue_search: + uvalue = uvalue_search.group(1) + self.description = f"Average thermal transmittance {uvalue} W/m-¦K" + else: + translation = self.WELSH_TEXT.get(self.description) + if translation: + self.nodata = False + self.description = translation + def process(self) -> Dict[str, Union[float, str, bool, None]]: result: Dict[str, Union[float, str, bool, None]] = {} if self.nodata: @@ -38,6 +108,10 @@ class WallAttributes(Definitions): # wall type result, description = extract_component_types(result, description, list_of_components=self.WALL_TYPES) + # Handle some edge cases + if "sandstone" in description and not result["is_sandstone_or_limestone"]: + result["is_sandstone_or_limestone"] = True + # insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this # generally however this is sufficient for mvp thickness_map = { diff --git a/model_data/epc_attributes/attribute_utils.py b/model_data/epc_attributes/attribute_utils.py index a1b65327..b5fc590d 100644 --- a/model_data/epc_attributes/attribute_utils.py +++ b/model_data/epc_attributes/attribute_utils.py @@ -2,8 +2,9 @@ import re import string from typing import Tuple, Union, Dict, List -THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)" -THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR) +THERMAL_TRANSMITTANCE_STR = r"average thermal transmittance (-?\d+(\.\d+)?)\s(w/m\S+k)" +THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTANCE_STR) + DOUBLE_SPACE_PATTERN = re.compile(r"\s+") @@ -20,11 +21,12 @@ def extract_thermal_transmittance(result: dict, description: str) -> Tuple[ """ match = THERMAL_TRANSMITTANCE_REGEX.search(description) + if match: result['thermal_transmittance'] = float(match.group(1)) - result['thermal_transmittance_unit'] = match.group(2) + result['thermal_transmittance_unit'] = match.group(3) # Remove the match from the description - description = re.sub(THERMAL_TRANSMITTENCE_STR, "", description) + description = re.sub(THERMAL_TRANSMITTANCE_STR, "", description) else: result['thermal_transmittance'] = None result['thermal_transmittance_unit'] = None diff --git a/model_data/simulation_system/core/DataProcessor.py b/model_data/simulation_system/core/DataProcessor.py index c02e6ed5..1252f6c6 100644 --- a/model_data/simulation_system/core/DataProcessor.py +++ b/model_data/simulation_system/core/DataProcessor.py @@ -7,15 +7,18 @@ from model_data.simulation_system.core.Settings import ( EARLIEST_EPC_DATE, FULLY_GLAZED_DESCRIPTIONS, AVERAGE_FIXED_FEATURES, - FLOOR_HEIGHT_NATIONAL_AVERAGE, - TOTAL_FLOOR_AREA_NATIONAL_AVERAGE, FLOOR_LEVEL_MAP, BUILT_FORM_REMAP, COLUMNS_TO_MERGE_ON, COMPONENT_FEATURES, FIXED_FEATURES, - COLUMNTYPES + COLUMNTYPES, + RDSAP_RESPONSE, + MAX_SAP_SCORE, + fill_na_map, + FIXED_DESCRIPTON_MAPPED_FEATURES ) + from typing import List @@ -101,10 +104,14 @@ class DataProcessor: raise NotImplementedError("Not handled the case for value %s" % x) - self.data["CONSTRUCTION_AGE_BAND_CLEANED"] = self.data["CONSTRUCTION_AGE_BAND"].apply( + self.data["CONSTRUCTION_AGE_BAND"] = self.data["CONSTRUCTION_AGE_BAND"].apply( lambda x: clean_construction_age_band(x) ) + self.data = self.data[ + ~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"]) + ] + def clean_missing_rooms(self): """ For the number of heated rooms and number of habitable rooms, we clean these values up front, @@ -132,7 +139,7 @@ class DataProcessor: for col in ["NUMBER_HEATED_ROOMS", "NUMBER_HABITABLE_ROOMS"]: to_index = 3 - matching_columns = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND_CLEANED", "POSTAL_AREA"] + matching_columns = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "POSTAL_AREA"] has_missings = pd.isnull(self.data[col]).sum() while has_missings: self.data = apply_clean( @@ -175,6 +182,8 @@ class DataProcessor: if not self.newdata: self.confine_data() + self.remap_columns() + # We have some non-standard construction age bands which we'll clean for matching self.standardise_construction_age_band() self.clean_missing_rooms() @@ -189,7 +198,6 @@ class DataProcessor: self.retain_multiple_epc_properties( epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"] ) - self.remap_columns() if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1: # If we have multiple EPC records, we can try and do filling @@ -199,8 +207,14 @@ class DataProcessor: # Final re-casting after data transformed and prepared self.data = self.data.astype(COLUMNTYPES) + self.na_remapping() + return self.data + def na_remapping(self): + for column, fill_value in fill_na_map.items(): + self.data[column] = self.data[column].fillna(fill_value) + def fill_na_fields(self, columns_to_fill: List = COLUMNS_TO_MERGE_ON): """ If we have a minimum of 2 epcs, we can do back fill and forward fill on certain data fields @@ -244,6 +258,10 @@ class DataProcessor: data["FLOOR_LEVEL"] = data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP) data["BUILT_FORM"] = data["BUILT_FORM"].replace(BUILT_FORM_REMAP) + convert_to_lower = ["TRANSACTION_TYPE"] + for col in convert_to_lower: + data[col] = data[col].str.lower() + self.data = data def make_cleaning_averages(self) -> pd.DataFrame: @@ -305,62 +323,43 @@ class DataProcessor: suffixes=["", "_BUILT_FORM_AVERAGE"], ) - # Replace any missing NAN values with averages for the same Property type and built form - cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ - "TOTAL_FLOOR_AREA" - ].fillna(cleaning_averages_filled["TOTAL_FLOOR_AREA_AVERAGE"]) - cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ - "FLOOR_HEIGHT" - ].fillna(cleaning_averages_filled["FLOOR_HEIGHT_AVERAGE"]) - cleaning_averages_filled = cleaning_averages_filled.drop( - columns=["TOTAL_FLOOR_AREA_AVERAGE", "FLOOR_HEIGHT_AVERAGE"] - ) + for variable in AVERAGE_FIXED_FEATURES: + # Replace any missing NAN values with averages for the same Property type and built form + cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna( + cleaning_averages_filled[f"{variable}_AVERAGE"] + ) - # If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope - # and built form - # We can use just the property type average and replace - cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ - "TOTAL_FLOOR_AREA" - ].fillna(cleaning_averages_filled["TOTAL_FLOOR_AREA_PROPERTY_AVERAGE"]) - cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ - "FLOOR_HEIGHT" - ].fillna(cleaning_averages_filled["FLOOR_HEIGHT_PROPERTY_AVERAGE"]) - cleaning_averages_filled = cleaning_averages_filled.drop( - columns=[ - "TOTAL_FLOOR_AREA_PROPERTY_AVERAGE", - "FLOOR_HEIGHT_PROPERTY_AVERAGE", - ] - ) + cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_AVERAGE") - # If there are still NA values, use BUILT FORM averages - cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ - "TOTAL_FLOOR_AREA" - ].fillna(cleaning_averages_filled["TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE"]) - cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ - "FLOOR_HEIGHT" - ].fillna(cleaning_averages_filled["FLOOR_HEIGHT_BUILT_FORM_AVERAGE"]) - cleaning_averages_filled = cleaning_averages_filled.drop( - columns=[ - "TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE", - "FLOOR_HEIGHT_BUILT_FORM_AVERAGE", - ] - ) + # If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope + # and built form + # We can use just the property type average and replace - # If there still is na values, use average across all properties in consituecy - cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ - "TOTAL_FLOOR_AREA" - ].fillna(cleaning_averages_filled["TOTAL_FLOOR_AREA"].mean()) - cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ - "FLOOR_HEIGHT" - ].fillna(cleaning_averages_filled["FLOOR_HEIGHT"].mean()) + cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna( + cleaning_averages_filled[f"{variable}_PROPERTY_AVERAGE"] + ) + + cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_PROPERTY_AVERAGE") + + # If there are still NA values, use BUILT FORM averages + cleaning_averages_filled["variable"] = cleaning_averages_filled[variable].fillna( + cleaning_averages_filled[f"{variable}_BUILT_FORM_AVERAGE"] + ) + + cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_BUILT_FORM_AVERAGE") + + # If there still is na values, use average across all properties in consituecy + cleaning_averages_filled[variable] = cleaning_averages_filled[ + variable + ].fillna(cleaning_averages_filled[variable].mean()) # If the consituency is all NA values, then take UK AVERAGE VALUES - cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ - "TOTAL_FLOOR_AREA" - ].fillna(TOTAL_FLOOR_AREA_NATIONAL_AVERAGE) - cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ - "FLOOR_HEIGHT" - ].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE) + # cleaning_averages_filled["TOTAL_FLOOR_AREA"] = cleaning_averages_filled[ + # "TOTAL_FLOOR_AREA" + # ].fillna(TOTAL_FLOOR_AREA_NATIONAL_AVERAGE) + # cleaning_averages_filled["FLOOR_HEIGHT"] = cleaning_averages_filled[ + # "FLOOR_HEIGHT" + # ].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE) return cleaning_averages_filled @@ -402,12 +401,29 @@ class DataProcessor: # Filter 4: We remove floor level in top floor or mid floor since this is ambiguous + # Filter 5: Remove any EPCs with a SAP score above 100 + + # Filter 6: We found a small number of cases that have missing window description so we drop these + + # Filter 7: We found a small number of cases that have missing hotwater description so we drop these + self.data = self.data[~pd.isnull(self.data["UPRN"])] self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE] self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"] self.data = self.data[ ~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"]) ] + self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE] + + # We observed 7 final records with missing windows and 2 records with missing hot water so we shall remove them + self.data = self.data[~pd.isnull(self.data["WINDOWS_DESCRIPTION"])] + self.data = self.data[~pd.isnull(self.data["HOTWATER_DESCRIPTION"])] + self.data = self.data[~pd.isnull(self.data["ROOF_DESCRIPTION"])] + + # Because park homes are surveyed unusually (for example, we don't have u-values to + # look up for their different components, they need to be collected in survey and aren't reflected in + # EPCs) we'll ignore them from the model + self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"] def clean_multi_glaze_proportion(self) -> None: """ @@ -437,6 +453,12 @@ class DataProcessor: differs depending on where the function is being used. :return: Cleaned DataFrame. """ + + cols_to_clean = [ + c for c in ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"] if + c in data_to_clean.columns + ] + # Enforce data types for col in ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"]: data_to_clean[col] = data_to_clean[col].astype(float) @@ -445,10 +467,9 @@ class DataProcessor: columns_to_merge_on = data_to_clean[cols_to_merge_on].dropna().columns.tolist() # Calculate averages - cleaning_averages_to_merge = cleaning_data.groupby(columns_to_merge_on).agg({ - "TOTAL_FLOOR_AREA": "mean", - "FLOOR_HEIGHT": "mean" - }) + cleaning_averages_to_merge = cleaning_data.groupby(columns_to_merge_on).agg( + dict(zip(cols_to_clean, ["mean", ] * len(cols_to_clean))) + ) # Merge with the original data data_to_clean = pd.merge( @@ -460,7 +481,7 @@ class DataProcessor: ) # Fill NaN values with averages - for col in ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]: + for col in cols_to_clean: data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True) data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True) @@ -486,3 +507,144 @@ class DataProcessor: :return: Pandas dataframe containing the columns defined in FIXED_FEATURES """ return self.data[FIXED_FEATURES] + + @staticmethod + def coerce_boolean_columns(df: pd.DataFrame, cols_to_ignore: List | None = None): + """ + Coerce columns with string 'True'/'False' values to boolean columns. + + :param df: Input DataFrame. + :param cols_to_ignore: If specified, is a list of columns to ignore, e.g. uuids + :return: DataFrame with coerced columns. + """ + object_columns = df.select_dtypes(include=['object']).columns + if cols_to_ignore: + object_columns = [c for c in object_columns if c not in cols_to_ignore] + + for column in object_columns: + unique_values = df[column].dropna().unique() + # If the unique values in the column are 'True' and 'False', convert the column to boolean + if set(unique_values) == {'True', 'False'} or set(unique_values) == {True, False}: + df[column] = df[column].astype(bool) + + return df + + @classmethod + def difference_data(cls, df: pd.DataFrame): + + """ + Given a dataframe and starting and ending columns, this function will convert the features to + differenced the ending subtract the starting value, which is useful for modelling the difference responces + """ + + # We ensure that the u value columns are co-erced to a numerical format + uvalue_columns = [col for col in df.columns if "thermal_transmittance" in col] + for uvalue_col in uvalue_columns: + df[uvalue_col] = pd.to_numeric(df[uvalue_col]) + + key_columns = [ + "RDSAP_CHANGE", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "SAP_STARTING", "HEAT_DEMAND_STARTING", + "CARBON_STARTING", "UPRN", "CONSTITUENCY", + ] + + ignore_cols = FIXED_FEATURES + FIXED_DESCRIPTON_MAPPED_FEATURES + key_columns + + columns = {x for x in df.columns if x not in ignore_cols} + + non_numerical_columns = df.select_dtypes(exclude=['number']).columns.tolist() + non_numerical_columns = [col for col in non_numerical_columns if col in columns] + levels = {col: df[col].unique().tolist() for col in non_numerical_columns} + + df = pd.get_dummies(df, columns=non_numerical_columns) + + # We make sure there is a starting and ending version of the column + diff_columns = [] + no_diff_columns = [] # Store for debugging + for col in columns: + if "_ENDING" in col: + # Don't keep the endings + continue + else: + # We have a starting column so check if we have an ending + if col.replace("_STARTING", "") + "_ENDING" in columns: + diff_columns.append(col) + else: + no_diff_columns.append(col) + + if any(c not in FIXED_DESCRIPTON_MAPPED_FEATURES for c in no_diff_columns): + raise Exception("Something went wrong, potentially missed a differencing column") + + datatypes = df.dtypes + + # Note: We also difference columns like floor area and floor height. We should experiement with this. + # Starting floor area will heavily impact the starting sap value so that feature may be encapsulated by + # the starting value, therefore to explain any differences in the new floor area, it may be enough to + # just consider the difference however we can play around with this. + + # Do the differencing + cols_to_append = {} + for starting_col in diff_columns: + + base_col = starting_col.replace("_STARTING", "") + + if "_STARTING" in starting_col: + ending_col = starting_col.replace("_STARTING", "_ENDING") + else: + ending_col = starting_col + "_ENDING" + + if starting_col not in non_numerical_columns: + cols_to_append[f"{base_col}_DIFF"] = df[ending_col] - df[starting_col] + df = df.drop(columns=[starting_col, ending_col]) + continue + + level_values = list(set(levels[starting_col] + levels[ending_col])) + + level_cols = [] + for level in level_values: + starting_level_col = "_".join([starting_col, str(level)]) + ending_level_col = "_".join([ending_col, str(level)]) + + if starting_level_col not in df.columns: + # We have no starting, just ending + col_type = datatypes[ending_level_col].name + + if col_type == "bool": + cols_to_append[f"{base_col}_{level}_DIFF"] = df[ending_level_col].astype(int) + else: + cols_to_append[f"{base_col}_{level}_DIFF"] = df[ending_level_col] + + level_cols.append(ending_level_col) + + elif ending_level_col not in df.columns: + # We have no ending, just starting + col_type = datatypes[starting_level_col].name + + if col_type == "bool": + cols_to_append[f"{base_col}_{level}_DIFF"] = -1 * df[starting_level_col].astype(int) + else: + cols_to_append[f"{base_col}_{level}_DIFF"] = -1 * df[ending_level_col] + + level_cols.append(starting_level_col) + + else: + col_type = datatypes[starting_level_col].name + + if col_type == "bool": + cols_to_append[f"{base_col}_{level}_DIFF"] = ( + df[ending_level_col].astype(int) - df[starting_level_col].astype(int) + ) + else: + cols_to_append[f"{base_col}_{level}_DIFF"] = df[ending_level_col] - df[starting_level_col] + + level_cols.extend([starting_level_col, ending_level_col]) + + # Drop the columns + df = df.drop(columns=level_cols) + + cols_to_append = pd.DataFrame(cols_to_append) + df = pd.concat([df, cols_to_append], axis=1) + + # Perform a final coercing of string True/False columns to boolean + df = cls.coerce_boolean_columns(df, cols_to_ignore=key_columns) + + return df diff --git a/model_data/simulation_system/core/Settings.py b/model_data/simulation_system/core/Settings.py index c094c085..8b5252c5 100644 --- a/model_data/simulation_system/core/Settings.py +++ b/model_data/simulation_system/core/Settings.py @@ -55,7 +55,8 @@ FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45 AVERAGE_FIXED_FEATURES = [ "TOTAL_FLOOR_AREA", - "FLOOR_HEIGHT" + "FLOOR_HEIGHT", + "FIXED_LIGHTING_OUTLETS_COUNT", ] COLUMNS_TO_MERGE_ON = [ @@ -82,9 +83,6 @@ FIXED_FEATURES = [ "CONSTITUENCY", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", - "FLOOR_HEIGHT", - "FLOOR_LEVEL", - "TOTAL_FLOOR_AREA", ] COMPONENT_FEATURES = [ @@ -120,7 +118,6 @@ LATEST_FIELD = [ "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", - "FLOOR_LEVEL", "CONSTRUCTION_AGE_BAND", # This is a field we're probably want to use verisk data for ] @@ -173,7 +170,7 @@ DATA_PROCESSOR_SETTINGS = { COLUMNTYPES = { 'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object', 'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64', - 'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64', 'FLOOR_LEVEL': 'float64', + 'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64', 'CONSTRUCTION_AGE_BAND': 'object', 'TRANSACTION_TYPE': 'object', 'WALLS_DESCRIPTION': 'object', @@ -194,3 +191,31 @@ COLUMNTYPES = { 'EXTENSION_COUNT': 'float64', 'LODGEMENT_DATE': 'object', } + +# For modelling, we don't allow records with more than 100 SAP points +MAX_SAP_SCORE = 100 + +fill_na_map = { + # There are some descriptions, such as "To be used only when there is no heating/hot-water system or data is from + # a community network" that could be clustered with unknown fuel + "MAIN_FUEL": "UNKNOWN", + "MECHANICAL_VENTILATION": "Unknown", + "SECONDHEAT_DESCRIPTION": "None", + "ENERGY_TARIFF": "Unknown", + # We set solar water heating flag to N - we could investigate using a different category entirely + "SOLAR_WATER_HEATING_FLAG": "N", + "GLAZED_TYPE": "not defined", + "MULTI_GLAZE_PROPORTION": 0, + "LOW_ENERGY_LIGHTING": 0, + "MAINHEATCONT_DESCRIPTION": "Unknown", + "EXTENSION_COUNT": 0, + "NUMBER_OPEN_FIREPLACES": 0 +} + +# After the property descriptions have been re-remapped, we expect these features to be fixed +FIXED_DESCRIPTON_MAPPED_FEATURES = [ + 'another_property_below', 'is_roof_room', 'is_granite_or_whinstone', 'is_flat', 'is_suspended', + 'has_dwelling_above', 'is_as_built', 'is_to_external_air', 'is_cob', 'is_pitched', 'is_solid', 'is_at_rafters', + 'is_solid_brick', 'is_loft', 'is_system_built', 'is_timber_frame', 'is_sandstone_or_limestone', 'is_filled_cavity', + 'is_cavity_wall', 'is_thatched', 'is_to_unheated_space' +] diff --git a/model_data/simulation_system/generate_rdsap_change.py b/model_data/simulation_system/generate_rdsap_change.py index 42c2f878..003fa046 100644 --- a/model_data/simulation_system/generate_rdsap_change.py +++ b/model_data/simulation_system/generate_rdsap_change.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np from tqdm import tqdm import msgpack @@ -14,7 +15,12 @@ from model_data.simulation_system.core.Settings import ( CARBON_RESPONSE, ) from model_data.simulation_system.core.DataProcessor import DataProcessor -from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3 +from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3, read_dataframe_from_s3_parquet +from recommendations.rdsap_tables import england_wales_age_band_lookup +from recommendations.recommendation_utils import ( + get_wall_u_value, get_roof_u_value, get_floor_u_value, estimate_perimeter, + get_wall_type +) DATA_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates" @@ -48,65 +54,81 @@ def process_and_prune_desriptions(df, cleaned_lookup): :return: """ - # TODO: In a future iteration, we can test using the binary features and the insulation thickness - # estimates, we well as estimated U-values - cols_to_drop = { "walls": [ - 'original_description', 'thermal_transmittance', - 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity', - 'is_solid_brick', 'is_system_built', 'is_timber_frame', - 'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_assumed', - 'is_sandstone_or_limestone', 'insulation_thickness', - 'external_insulation', 'internal_insulation', + # We need to cleaned descriptions for pulling out u-values + 'original_description', 'thermal_transmittance_unit', 'original_description_ENDING', - 'thermal_transmittance_ENDING', 'thermal_transmittance_unit_ENDING', + 'thermal_transmittance_unit_ENDING', 'is_cavity_wall_ENDING', 'is_filled_cavity_ENDING', 'is_solid_brick_ENDING', 'is_system_built_ENDING', 'is_timber_frame_ENDING', 'is_granite_or_whinstone_ENDING', 'is_as_built_ENDING', 'is_cob_ENDING', 'is_assumed_ENDING', - 'is_sandstone_or_limestone_ENDING', 'insulation_thickness_ENDING', - 'external_insulation_ENDING', 'internal_insulation_ENDING', + 'is_sandstone_or_limestone_ENDING', + # Re remove the is_assumed columns + "is_assumed", "is_assumed_ENDING" ], "floor": [ - 'original_description', 'thermal_transmittance', - 'thermal_transmittance_unit', 'is_assumed', 'is_to_unheated_space', - 'is_to_external_air', 'is_suspended', 'is_solid', - 'another_property_below', 'insulation_thickness', 'no_data', - 'original_description_ENDING', - 'thermal_transmittance_ENDING', 'thermal_transmittance_unit_ENDING', - 'is_assumed_ENDING', 'is_to_unheated_space_ENDING', - 'is_to_external_air_ENDING', 'is_suspended_ENDING', 'is_solid_ENDING', - 'another_property_below_ENDING', 'insulation_thickness_ENDING', - 'no_data_ENDING', + "original_description", "clean_description", "thermal_transmittance_unit", + "no_data", "no_data_ENDING", "original_description_ENDING", + "clean_description_ENDING", "thermal_transmittance_unit_ENDING", + "is_suspended_ENDING", "is_solid_ENDING", "another_property_below_ENDING", + "is_to_unheated_space_ENDING", "is_to_external_air_ENDING", "is_assumed", + "is_assumed_ENDING" ], "roof": [ - 'original_description', 'clean_description', 'thermal_transmittance', - 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room', 'is_loft', - 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', - 'has_dwelling_above', 'is_valid', 'insulation_thickness', - 'original_description_ENDING', 'clean_description_ENDING', - 'thermal_transmittance_ENDING', 'thermal_transmittance_unit_ENDING', - 'is_pitched_ENDING', 'is_roof_room_ENDING', 'is_loft_ENDING', - 'is_flat_ENDING', 'is_thatched_ENDING', 'is_at_rafters_ENDING', - 'is_assumed_ENDING', 'has_dwelling_above_ENDING', 'is_valid_ENDING', - 'insulation_thickness_ENDING', - ] - + "original_description", "clean_description", "thermal_transmittance_unit", + "is_assumed", "is_valid", "original_description_ENDING", "clean_description_ENDING", + "thermal_transmittance_unit_ENDING", "is_pitched_ENDING", "is_roof_room_ENDING", + "is_loft_ENDING", "is_flat_ENDING", "is_thatched_ENDING", "is_at_rafters_ENDING", + "has_dwelling_above_ENDING", "is_assumed_ENDING", "is_valid_ENDING" + ], + "hotwater": [ + "original_description", "clean_description", "assumed", "original_description_ENDING", + "clean_description_ENDING", "assumed_ENDING" + ], + "mainheat": [ + "original_description", "clean_description", "original_description_ENDING", + "has_assumed", "original_description_ENDING", "clean_description_ENDING", + "has_assumed_ENDING", + ], + "mainheatcont": [ + "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING" + ], + "windows": [ + "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING", + # We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature + "has_glazing", "glazing_coverage", "no_data", "has_glazing_ENDING", "glazing_coverage_ENDING", + "no_data_ENDING" + ], + "main-fuel": [ + "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING" + ], } - for component in ["walls", "floor", "roof"]: + for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]: component_upper = component.upper() + if component == "main-fuel": + component_upper = component_upper.replace("-", "_") + + cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description" + left_on_starting = ( + f"{component_upper}_STARTING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_STARTING" + ) + + left_on_ending = ( + f"{component_upper}_ENDING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_ENDING" + ) df = df.merge( - pd.DataFrame(cleaned_lookup[f"{component}-description"]), + pd.DataFrame(cleaned_lookup[cleaned_key]), how="left", - left_on=f"{component_upper}_DESCRIPTION_STARTING", + left_on=left_on_starting, right_on="original_description", ).merge( - pd.DataFrame(cleaned_lookup[f"{component}-description"]), + pd.DataFrame(cleaned_lookup[cleaned_key]), how="left", - left_on=f"{component_upper}_DESCRIPTION_ENDING", + left_on=left_on_ending, right_on="original_description", suffixes=("", "_ENDING") ) @@ -126,9 +148,10 @@ def process_and_prune_desriptions(df, cleaned_lookup): (df["is_suspended"] == df["is_suspended_ENDING"]) & (df["is_solid"] == df["is_solid_ENDING"]) & (df["another_property_below"] == df["another_property_below_ENDING"]) & - (df["is_to_unheated_space"] == df["is_to_unheated_space_ENDING"]) + (df["is_to_unheated_space"] == df["is_to_unheated_space_ENDING"]) & + (df["is_to_external_air"] == df["is_to_external_air_ENDING"]) ] - else: + elif component == "roof": df = df[ (df["is_pitched"] == df["is_pitched_ENDING"]) & (df["is_roof_room"] == df["is_roof_room_ENDING"]) & @@ -144,17 +167,215 @@ def process_and_prune_desriptions(df, cleaned_lookup): # Drop original cols original_cols = [ f"{component_upper}_DESCRIPTION_STARTING", f"{component_upper}_DESCRIPTION_ENDING" + ] if component != "main-fuel" else [ + f"{component_upper}_STARTING", f"{component_upper}_ENDING" ] - df = df.drop( - columns=cols_to_drop[component] + original_cols - ).rename( - columns={ - "clean_description": f"{component_upper}_DESCRIPTION_STARTING", - "clean_description_ENDING": f"{component_upper}_DESCRIPTION_ENDING", + df = df.drop(columns=cols_to_drop[component] + original_cols) + + # If we have an insulation_thickness column, rename it + if "insulation_thickness" in cleaned_lookup[cleaned_key][0]: + df = df.rename( + columns={ + "insulation_thickness": f"{component}_insulation_thickness", + "insulation_thickness_ENDING": f"{component}_insulation_thickness_ENDING", + } + ) + # If we have thermal transmittance, rename it + if "thermal_transmittance" in cleaned_lookup[cleaned_key][0]: + df = df.rename( + columns={ + "thermal_transmittance": f"{component}_thermal_transmittance", + "thermal_transmittance_ENDING": f"{component}_thermal_transmittance_ENDING", + } + ) + + # If we have tarrif, rename it + if "tariff_type" in cleaned_lookup[cleaned_key][0]: + df = df.rename( + columns={ + "tariff_type": f"{component}_tariff_type", + "tariff_type_ENDING": f"{component}_tariff_type_ENDING", + } + ) + + # We need the walls descriptions so we rename them to distinguish them + if component == "walls": + df = df.rename( + columns={ + "clean_description": f"{component}_clean_description", + "clean_description_ENDING": f"{component}_clean_description_ENDING", + } + ) + + # We don't need any lighting specific cleaning, we just drop the original description as we use + # LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING + + df = df.drop(columns=["LIGHTING_DESCRIPTION_STARTING", "LIGHTING_DESCRIPTION_ENDING"]) + + return df + + +def make_uvalues(df): + df["row_index"] = df.index + + uvalues = [] + for _, x in df.iterrows(): + + uprn = x["UPRN"] + row_index = x["row_index"] + age_band = england_wales_age_band_lookup[x["CONSTRUCTION_AGE_BAND"]] + + # ~~~~~~~~~~~~~~~~~~ + # Walls + # ~~~~~~~~~~~~~~~~~~ + + starting_wall_uvalue = x["walls_thermal_transmittance"] + if pd.isnull(starting_wall_uvalue): + starting_wall_uvalue = get_wall_u_value( + clean_description=x["walls_clean_description"], + age_band=age_band, + is_granite_or_whinstone=x["is_granite_or_whinstone"], + is_sandstone_or_limestone=x["is_sandstone_or_limestone"], + ) + + ending_wall_uvalue = x["walls_thermal_transmittance_ENDING"] + if pd.isnull(ending_wall_uvalue): + if x["walls_clean_description"] != x["walls_clean_description_ENDING"]: + ending_wall_uvalue = get_wall_u_value( + clean_description=x["walls_clean_description_ENDING"], + age_band=age_band, + is_granite_or_whinstone=x["is_granite_or_whinstone"], + is_sandstone_or_limestone=x["is_sandstone_or_limestone"], + ) + else: + ending_wall_uvalue = starting_wall_uvalue + + # ~~~~~~~~~~~~~~~~~~ + # Roof + # ~~~~~~~~~~~~~~~~~~ + + starting_roof_uvalue = x["roof_thermal_transmittance"] + if pd.isnull(starting_roof_uvalue): + starting_roof_uvalue = get_roof_u_value( + insulation_thickness=x["roof_insulation_thickness"], + has_dwelling_above=x["has_dwelling_above"], + is_loft=x["is_loft"], + is_roof_room=x["is_roof_room"], + is_thatched=x["is_thatched"], + is_flat=x["is_flat"], + is_pitched=x["is_pitched"], + is_at_rafters=x["is_at_rafters"], + age_band=age_band + ) + + ending_roof_uvalue = x["roof_thermal_transmittance_ENDING"] + + if pd.isnull(ending_roof_uvalue): + ending_roof_uvalue = get_roof_u_value( + insulation_thickness=x["roof_insulation_thickness_ENDING"], + has_dwelling_above=x["has_dwelling_above"], + is_loft=x["is_loft"], + is_roof_room=x["is_roof_room"], + is_thatched=x["is_thatched"], + is_flat=x["is_flat"], + is_pitched=x["is_pitched"], + is_at_rafters=x["is_at_rafters"], + age_band=age_band + ) + + # ~~~~~~~~~~~~~~~~~~ + # Floor + # ~~~~~~~~~~~~~~~~~~ + perimeters = {} + for suffix in ["_STARTING", "_ENDING"]: + floor_area = x[f"TOTAL_FLOOR_AREA{suffix}"] + n_rooms = x["NUMBER_HABITABLE_ROOMS"] + + perimeters[f"estimated_perimeter{suffix}"] = estimate_perimeter(floor_area, n_rooms) + + floor_type = "suspended" if x["is_suspended"] else "solid" + wall_type = get_wall_type(**x) + + if x["another_property_below"]: + starting_floor_uvalue, ending_floor_uvalue = 0, 0 + else: + starting_floor_uvalue = x["floor_thermal_transmittance"] + ending_floor_uvalue = x["floor_thermal_transmittance_ENDING"] + + if pd.isnull(starting_floor_uvalue): + starting_floor_uvalue = get_floor_u_value( + floor_type=floor_type, + perimeter=perimeters["estimated_perimeter_STARTING"], + area=x[f"TOTAL_FLOOR_AREA_STARTING"], + insulation_thickness=x["floor_insulation_thickness"], + wall_type=wall_type, + age_band=age_band + ) + + if pd.isnull(ending_floor_uvalue): + ending_floor_uvalue = get_floor_u_value( + floor_type=floor_type, + perimeter=perimeters["estimated_perimeter_ENDING"], + area=x[f"TOTAL_FLOOR_AREA_ENDING"], + insulation_thickness=x["floor_insulation_thickness_ENDING"], + wall_type=wall_type, + age_band=age_band + ) + + uvalues.append( + { + "UPRN": uprn, + "row_index": row_index, + "starting_walls_uvalue": starting_wall_uvalue, + "ending_walls_uvalue": ending_wall_uvalue, + "starting_roof_uvalue": starting_roof_uvalue, + "ending_roof_uvalue": ending_roof_uvalue, + "starting_floor_uvalue": starting_floor_uvalue, + "ending_floor_uvalue": ending_floor_uvalue, + **perimeters } ) + uvalues = pd.DataFrame(uvalues) + + df = df.merge( + uvalues, how="left", on=["UPRN", "row_index"] + ).drop(columns="row_index") + + # Fill missings + for component in ["walls", "floor", "roof"]: + for suffix in ["", "_ENDING"]: + fill_col = f"starting_{component}_uvalue" if suffix == "" else f"ending_{component}_uvalue" + + df[f"{component}_thermal_transmittance{suffix}"] = np.where( + pd.isnull(df[f"{component}_thermal_transmittance{suffix}"]), + df[fill_col], + df[f"{component}_thermal_transmittance{suffix}"] + ) + + df = df.drop( + columns=[ + "starting_walls_uvalue", "ending_walls_uvalue", "starting_roof_uvalue", + "ending_roof_uvalue", "starting_floor_uvalue", "ending_floor_uvalue" + ] + ) + + return df + + +def clean_missings_after_description_process(df): + missings = pd.isnull(df).sum() + missings = missings[missings > 0] + for col in missings.index: + unique_values = df[col].unique() + if True in unique_values or False in unique_values: + df[col] = df[col].fillna(False) + if "none" in unique_values: + df[col] = df[col].fillna("none") + else: + df[col] = df[col].fillna("Unknown") + return df @@ -172,22 +393,6 @@ def app(): dataset = [] cleaning_dataset = [] - # TODO [x] : Does energy tariff make a difference - # - leave for now but it may not - # TODO: [x] : Add starting SAP and head demand as a feature - # TODO [x] : If SAP hasn't changed, we don't include the record - # TODO [x]: If SAP gets worse, it genuinely looks like in the vast majority of cases that the building looks - # worse in the newer epc, so we can switch the orders - # TODO [x] : Have a look at temporal features - # TODO [x] : Floor area will impact the EPC so instead of averaging, we should have a starting and ending value. - # TODO [x]: Same as floor area for floor height - # TODO [x]: If fundamental building fabric changes, we should proabably discard the record - # TODO [x]: Should we prune records that have an exceptionally large amount of time between them? - # - leave for now and check performance after temporal features - # TODO [x]: If we have multiple EPCs lodged on the same day, should we remove them? Could be corrections? - # - Leave for now - # - for directory in tqdm(directories): filepath = directory / "certificates.csv" @@ -197,6 +402,15 @@ def app(): df = data_processor.pre_process() cleaning_averages = data_processor.make_cleaning_averages() + # We have some odd cases with missing constituency so we fill + df = df.fillna({"CONSTITUENCY": df["CONSTITUENCY"].mode().values[0]}) + + df = DataProcessor.apply_averages_cleaning( + data_to_clean=df, + cleaning_data=cleaning_averages, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ) + data_by_urpn = [] for uprn, property_data in df.groupby("UPRN", observed=True): @@ -204,7 +418,9 @@ def app(): fixed_data = {} # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row - if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1): + if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or ( + pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0 + ): continue # Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS @@ -213,29 +429,22 @@ def app(): property_data[MANDATORY_FIXED_FEATURES].iloc[-1].to_dict() ) - # Extract the columns that are not all None - modified_property_data = DataProcessor.apply_averages_cleaning( - data_to_clean=property_data, - cleaning_data=cleaning_averages, - cols_to_merge_on=COLUMNS_TO_MERGE_ON - ) - # Combine all fields together fixed_data.update(mandatory_field_data) fixed_data.update(latest_field_data) # We include the lodgement date here as we probably need to factor time into the # model, since EPC standards and rigour have changed over time - variable_data = modified_property_data[ + variable_data = property_data[ COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE] ] # Note: we look at changes between subsequent EPCS, however we could look at other permutations # e.g. first vs second, second vs third and also first vs third property_model_data = [] - for idx in range(0, modified_property_data.shape[0] - 1): + for idx in range(0, property_data.shape[0] - 1): - if idx >= modified_property_data.shape[0] - 1: + if idx >= property_data.shape[0] - 1: break earliest_record = variable_data.iloc[idx] @@ -289,6 +498,7 @@ def app(): data_by_urpn.extend(property_model_data) data_by_urpn_df = pd.DataFrame(data_by_urpn) + # Add some temporal features - we look at the days from the standard starting point in time # for the starting and ending date so all records are from a fixed point data_by_urpn_df["DAYS_TO_STARTING"] = ( @@ -298,9 +508,7 @@ def app(): pd.to_datetime(data_by_urpn_df["LODGEMENT_DATE_ENDING"]) - pd.to_datetime(EARLIEST_EPC_DATE) ).dt.days - # TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and - # floors, we may want to use the U-value. We may also want to handle the (assumed) tags - # within descriptions + data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"]) # We look for key building fabric features that have changed from one EPC to the next. # if, for example, we see that a home has gone from being a cavity wall to a solid wall, we @@ -308,8 +516,28 @@ def app(): # is low # We also replace descriptions with their cleaned variants + if pd.isnull(data_by_urpn_df).sum().sum(): + raise ValueError("Null values found in dataset") + data_by_urpn_df = process_and_prune_desriptions(data_by_urpn_df, cleaned_lookup) + # Apply u-values + for col in ["walls_clean_description", "walls_clean_description_ENDING"]: + data_by_urpn_df[col] = data_by_urpn_df[col].str.replace("(assumed)", "").str.rstrip() + + data_by_urpn_df = make_uvalues(data_by_urpn_df).drop( + columns=["walls_clean_description", "walls_clean_description_ENDING"] + ) + + # TODO: For some of the features that we clean, we have either a true, false or possibly null value + # Those nulls should be False. clean_missings_after_description_process handles this but shouldn't + # need to + + data_by_urpn_df = clean_missings_after_description_process(data_by_urpn_df) + + if pd.isnull(data_by_urpn_df).sum().sum(): + raise ValueError("Null values found in dataset after process_and_prune_desriptions") + dataset.append(data_by_urpn_df) cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0] @@ -324,10 +552,13 @@ def app(): ) output = pd.concat(dataset) + + output = DataProcessor.difference_data(output) + save_dataframe_to_s3_parquet( df=output, bucket_name="retrofit-data-dev", - file_key="sap_change_model/dataset.parquet", + file_key="sap_change_model/dataset_new_not_diff.parquet", ) diff --git a/model_data/simulation_system/test_data_generation.py b/model_data/simulation_system/test_data_generation.py index 8d1dbf2b..8989d491 100644 --- a/model_data/simulation_system/test_data_generation.py +++ b/model_data/simulation_system/test_data_generation.py @@ -1,8 +1,8 @@ -from core.Logger import logger +from model_data.simulation_system.core.Logger import logger import argparse import pandas as pd from pathlib import Path -from core.Settings import RANDOM_SEED, TRAIN_AND_VALIDATION_DATA_NAME, TEST_DATA_NAME +from model_data.simulation_system.core.Settings import RANDOM_SEED, TRAIN_AND_VALIDATION_DATA_NAME, TEST_DATA_NAME def ingest_arguments() -> argparse.Namespace: @@ -96,7 +96,6 @@ def main( if __name__ == "__main__": - logger.info("--- Generate test data pipeline ---") args = ingest_arguments() diff --git a/model_data/tests/test_data/test_floor_attributes_cases.py b/model_data/tests/test_data/test_floor_attributes_cases.py index ba062c4f..5738f77f 100644 --- a/model_data/tests/test_data/test_floor_attributes_cases.py +++ b/model_data/tests/test_data/test_floor_attributes_cases.py @@ -366,4 +366,15 @@ clean_floor_cases = [ {'original_description': 'I ofod heb ei wresogi, dim inswleiddio (rhagdybiaeth)', 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True, 'is_to_unheated_space': True, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'insulation_thickness': 'none', "another_property_below": False}, + {'original_description': "Average thermal transmittance 1.10 W/m+é-¦K", 'thermal_transmittance': 1.1, + 'thermal_transmittance_unit': 'w/m+é-¦k', 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, + 'another_property_below': False, 'insulation_thickness': None}, + { + "original_description": "Trawsyriannedd thermol cyfartalog 0.27 W/m-¦K", 'thermal_transmittance': 0.27, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, + 'another_property_below': False, 'insulation_thickness': None + } + ] diff --git a/model_data/tests/test_data/test_lighting_attributes_cases.py b/model_data/tests/test_data/test_lighting_attributes_cases.py index 0a8fa6cf..f76b2d80 100644 --- a/model_data/tests/test_data/test_lighting_attributes_cases.py +++ b/model_data/tests/test_data/test_lighting_attributes_cases.py @@ -35,4 +35,5 @@ test_cases = [ {'original_description': 'Dim goleuadau ynni-isel', 'low_energy_proportion': 0}, {'original_description': 'Excellent lighting efficiency', 'low_energy_proportion': 1}, {'original_description': "Goleuadau ynni-isel ym mhob un o'r mannau gosod", 'low_energy_proportion': 1}, + {'original_description': "Goleuadau ynni-isel mewn 17% o'r mannau gosod", 'low_energy_proportion': 0.17}, ] diff --git a/model_data/tests/test_data/test_mainheat_attributes_cases.py b/model_data/tests/test_data/test_mainheat_attributes_cases.py index 44daddcb..d264ebff 100644 --- a/model_data/tests/test_data/test_mainheat_attributes_cases.py +++ b/model_data/tests/test_data/test_mainheat_attributes_cases.py @@ -1639,4 +1639,17 @@ mainheat_cases = [ 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, "has_electric_heat_pumps": False, "has_micro-cogeneration": False}, + {'original_description': 'Boiler and radiators, nwy prif gyflenwad, mains gas', 'has_radiators': True, + 'has_fan_coil_units': False, + 'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False, + 'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False, + 'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False, + 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, 'has_community_scheme': False, + 'has_ground_source_heat_pump': False, 'has_no_system_present': False, 'has_portable_electric_heaters': False, + 'has_water_source_heat_pump': False, 'has_electric': False, 'has_mains_gas': True, 'has_wood_logs': False, + 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, + "has_electric_heat_pumps": False, + "has_micro-cogeneration": False}, ] diff --git a/model_data/tests/test_data/test_mainheat_control_attributes_cases.py b/model_data/tests/test_data/test_mainheat_control_attributes_cases.py index 2f6d3768..9aa6c80c 100644 --- a/model_data/tests/test_data/test_mainheat_control_attributes_cases.py +++ b/model_data/tests/test_data/test_mainheat_control_attributes_cases.py @@ -16,7 +16,8 @@ mainheat_control_cases = [ {'original_description': 'Charging system linked to use of community heating, programmer and TRVs', 'thermostatic_control': None, 'charging_system': 'charging system', 'switch_system': 'programmer', 'no_control': None, 'dhw_control': None, 'community_heating': 'use of community heating', - 'multiple_room_thermostats': False, 'auxiliary_systems': None, 'trvs': 'trvs'}, { + 'multiple_room_thermostats': False, 'auxiliary_systems': None, 'trvs': 'trvs'}, + { 'original_description': 'Charging system linked to use of community heating, programmer and at least two room ' 'stats', 'thermostatic_control': None, 'charging_system': 'charging system', 'switch_system': 'programmer', @@ -38,10 +39,11 @@ mainheat_control_cases = [ {'original_description': 'Controls for high heat retention storage heaters', 'thermostatic_control': None, 'charging_system': 'high heat retention storage heaters', 'switch_system': None, 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, 'auxiliary_systems': None, - 'trvs': None}, {'original_description': 'Flat rate charging, TRVs', 'thermostatic_control': None, - 'charging_system': 'flat rate charging', 'switch_system': None, 'no_control': None, - 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, - 'auxiliary_systems': None, 'trvs': 'trvs'}, + 'trvs': None}, + {'original_description': 'Flat rate charging, TRVs', 'thermostatic_control': None, + 'charging_system': 'flat rate charging', 'switch_system': None, 'no_control': None, + 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs'}, {'original_description': 'Flat rate charging, no thermostatic control of room temperature', 'thermostatic_control': None, 'charging_system': 'flat rate charging', 'switch_system': None, 'no_control': 'no thermostatic control', 'dhw_control': None, 'community_heating': None, @@ -213,4 +215,42 @@ mainheat_control_cases = [ 'thermostatic_control': 'room thermostat', 'charging_system': 'charging system', 'switch_system': 'programmer', 'no_control': None, 'dhw_control': None, 'community_heating': 'use of community heating', 'multiple_room_thermostats': False, 'auxiliary_systems': None, 'trvs': None}, + {'original_description': 'System dalu wediGÇÖi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd a TRVs', + 'thermostatic_control': None, 'charging_system': 'charging system', 'switch_system': 'programmer', + 'no_control': None, 'dhw_control': None, 'community_heating': 'use of community heating', + 'multiple_room_thermostats': False, 'auxiliary_systems': None, 'trvs': 'trvs'}, + {'original_description': 'System dalu wediGÇÖi chysylltu +ó defnyddio gwres cymunedol, TRVs', + 'thermostatic_control': None, + 'charging_system': 'charging system', 'switch_system': None, 'no_control': None, 'dhw_control': None, + 'community_heating': 'use of community heating', 'multiple_room_thermostats': False, 'auxiliary_systems': None, + 'trvs': 'trvs'}, + {'original_description': 'Single rate heating, TRVs', + 'thermostatic_control': None, 'charging_system': None, + 'switch_system': None, + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': 'single rate heating'}, + {'original_description': 'T+-ól un gyfradd, TRVs', + 'thermostatic_control': None, 'charging_system': None, + 'switch_system': None, + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': 'single rate heating'}, + {'original_description': 'Single rate heating, programmer, TRVs', + 'thermostatic_control': None, 'charging_system': None, + 'switch_system': 'programmer', + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': 'single rate heating'}, + {'original_description': 'T+ól un gyfradd, rhaglennydd a TRVs', + 'thermostatic_control': None, 'charging_system': None, + 'switch_system': 'programmer', + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': 'single rate heating'}, + {'original_description': 'T+ól un gyfradd, TRVs', + 'thermostatic_control': None, 'charging_system': None, + 'switch_system': None, + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': 'single rate heating'}, + {'original_description': 'TRVs a falf osgoi', 'thermostatic_control': None, + 'charging_system': None, + 'switch_system': None, 'no_control': None, 'dhw_control': None, 'community_heating': None, + 'multiple_room_thermostats': False, 'auxiliary_systems': 'bypass', 'trvs': 'trvs'}, ] diff --git a/model_data/tests/test_data/test_roof_attributes_cases.py b/model_data/tests/test_data/test_roof_attributes_cases.py index 8c4415fe..ee7f865b 100644 --- a/model_data/tests/test_data/test_roof_attributes_cases.py +++ b/model_data/tests/test_data/test_roof_attributes_cases.py @@ -394,4 +394,8 @@ clean_roof_test_cases = [ 'thermal_transmittance_unit': None, 'is_pitched': False, 'is_roof_room': True, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}, + {'original_description': 'Average thermal transmittance 0.80 W/m+é-¦K', 'thermal_transmittance': 0.8, + 'thermal_transmittance_unit': 'w/m+é-¦k', 'is_pitched': False, 'is_roof_room': False, + 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None} ] diff --git a/model_data/tests/test_data/test_wall_attributes_cases.py b/model_data/tests/test_data/test_wall_attributes_cases.py index 162ebc1f..40d6fb9c 100644 --- a/model_data/tests/test_data/test_wall_attributes_cases.py +++ b/model_data/tests/test_data/test_wall_attributes_cases.py @@ -690,5 +690,194 @@ wall_cases = [ 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', - 'external_insulation': False, 'internal_insulation': False} + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Average thermal transmittance 1.60 W/m+é-¦K', + 'thermal_transmittance': 1.6, 'thermal_transmittance_unit': 'w/m+é-¦k', 'is_cavity_wall': False, + 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': False, 'is_cob': False, 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': None, 'external_insulation': False, + 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'below average', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Average thermal transmittance 1 W/m-¦K', 'thermal_transmittance': 1, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, + 'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, + 'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, ceudod wediGÇÖi lenwi', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': None, + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': True, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod,', 'thermal_transmittance': None, 'thermal_transmittance_unit': None, + 'is_cavity_wall': True, + 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', 'external_insulation': False, + 'internal_insulation': False}, + {'original_description': 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': True, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Granite or whinstone, with external insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': True, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Gwenithfaen neu risgraig, gydag inswleiddio allanol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': True, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': True, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Sandstone or limestone, with internal insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': True}, + {'original_description': 'Sandstone or limestone, with external insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, ynysydd allanol a llenwi ceudod', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Gwenithfaen neu risgraig, gydag inswleiddio mewnol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': True, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': True}, + {'original_description': 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': True, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'below average', + 'external_insulation': False, 'internal_insulation': False}, + { + 'original_description': 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (' + 'rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': True, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': True, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Briciau solet, gydag inswleiddio mewnol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': True, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': True}, + { + 'original_description': 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (' + 'rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': True, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, + 'insulation_thickness': 'below average', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': True, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + {'original_description': 'Tywodfaen, gydag inswleiddio allanol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Waliau ceudod, gydag inswleiddio allanol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Briciau solet, gydag inswleiddio allanol', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': True, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, + 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', + 'external_insulation': True, 'internal_insulation': False}, + {'original_description': 'Cob, with external insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, + 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, 'is_cob': True, + 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': True, + 'internal_insulation': False}, + {'original_description': 'Co with external insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, + 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, 'is_cob': True, + 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': True, + 'internal_insulation': False}, + {'original_description': 'Cowith external insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, + 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False, 'is_cob': True, + 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': True, + 'internal_insulation': False}, + {'original_description': 'Sandstone, as built, no insulation (assumed)', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False}, + + {'original_description': 'Sandstone or limestone, as built, insulated (assumed)', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': True, 'insulation_thickness': 'average', + 'external_insulation': False, 'internal_insulation': False}, + { + 'original_description': 'Park home wall, as built', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, + 'is_as_built': True, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, + 'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False, + 'is_park_home': True + } ] diff --git a/model_data/tests/test_lighting_attributes.py b/model_data/tests/test_lighting_attributes.py index 392e10d7..38219e86 100644 --- a/model_data/tests/test_lighting_attributes.py +++ b/model_data/tests/test_lighting_attributes.py @@ -56,3 +56,17 @@ class TestLightingAttributes: del expected_result["original_description"] result = LightingAttributes(test_case['original_description'], averages).process() assert sorted(result.items()) == sorted(expected_result.items()) + + def test_regex_translations(self): + """ + Some of the regex translations were falling through the net, though we were pulling out the percentages + so we just make sure we're translating correctly + """ + + init1 = LightingAttributes("Goleuadau ynni-isel mewn 17% o'r mannau gosod", []) + + assert init1.description == 'low energy lighting in 17% of fixed outlets' + + init2 = LightingAttributes("Goleuadau ynni-isel mewn 60% oGÇÖr mannau gosod", []) + + assert init2.description == 'low energy lighting in 60% of fixed outlets' diff --git a/model_data/tests/test_roof_attributes.py b/model_data/tests/test_roof_attributes.py index 2ee4e12a..5b010d90 100644 --- a/model_data/tests/test_roof_attributes.py +++ b/model_data/tests/test_roof_attributes.py @@ -1,6 +1,4 @@ import pytest -import pickle -from model_data.EpcClean import EpcClean from pathlib import Path from model_data.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases from model_data.epc_attributes.RoofAttributes import RoofAttributes diff --git a/model_data/tests/test_wall_attributes.py b/model_data/tests/test_wall_attributes.py index eb13cb98..c8d5eb24 100644 --- a/model_data/tests/test_wall_attributes.py +++ b/model_data/tests/test_wall_attributes.py @@ -48,5 +48,11 @@ class TestWallAttributes: expected_result = test_case.copy() del expected_result["original_description"] result = WallAttributes(test_case['original_description']).process() + # Some of the expected_result test data was produced before some attributes were added to the code + # base so we need to filter out some of the keys. The test is still valid + result = {k: v for k, v in result.items() if v} + expected_result = {k: v for k, v in expected_result.items() if v} + if not result: + raise Exception("Something went wong") # Ensure the output ordering is correct assert sorted(result.items()) == sorted(expected_result.items()) diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 114a7fe1..a19cf1a8 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -3,10 +3,10 @@ from typing import List from model_data.BaseUtility import Definitions from datatypes.enums import QuantityUnits from backend.Property import Property -from recommendations.rdsap_tables import default_wall_thickness, age_band_data from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_uvalue_estimate + get_recommended_part, estimate_perimeter, get_wall_type, + get_floor_u_value ) @@ -39,12 +39,10 @@ class FloorRecommendations(Definitions): def __init__( self, property_instance: Property, - uvalue_estimates: List, total_floor_area_group_decile: str, materials: List, ): self.property = property_instance - self.uvalue_estimates = uvalue_estimates self.total_floor_area_group_decile = total_floor_area_group_decile # For audit purposes, when estimating u values we'll store it self.estimated_u_value = None @@ -61,80 +59,6 @@ class FloorRecommendations(Definitions): part for part in self.materials if part["type"] == "solid_floor_insulation" ] - @staticmethod - def _estimate_perimeter(floor_area, num_rooms): - # Compute average room size based on total floor area and number of rooms - avg_room_size = floor_area / num_rooms - - # Estimate total side length for square layout - total_side_length = math.sqrt(avg_room_size * num_rooms) - - # Compute the perimeter - perimeter = total_side_length * 4 - - return perimeter - - def _estimate_suspended_floor_u_value( - self, floor_area, number_of_rooms, insulation_thickness, wall_type, region, age_band - ): - """ - Estimate the u-value of a suspended floor, based on RdSap methodology - Default U-value for UNINSULATED suspended floor, based on RdSAP methodology - https://files.bregroup.com/bre-co-uk-file-library-copy/filelibrary/SAP/2012/RdSAP-9.93/RdSAP_2012_9.93.pdf - - w = wall thickness, where these estimates are based on the RD SAP methodology, as in table S3 - A = floor area - Exposed perimeter = P - soil type clas thermal conductivity lambda_g = 1.5 W/mK - Rsi = 0.17m^2K/W - Rse = 0.04m^2K/W - Rf = 0.001 * d_ins / 0.035 where d_ins is the insulation thickness in mm - height above external ground h = 0.3m - average wind speed at 10m height v=5m/s - wind sheilding factor fw = 0.05 - vantilation factor E = 0.003 m^2/m - U-value of walls to underfloor space Uw = 1.5 W/m^2K - - # Calulations for suspended ground floors, example for 5 bedroom house with permiter estimated at - 44.36214602563767 - 1) dg = w + lambda_g x (Rsi + Rse) = 0.5 + 1.5 * (0.17 + 0.04) = 0.615 - 2) B = 2 * A/P = 2 * 123.0 / 44.36214602563767 = 5.545268253204708 - 3) Ug = 2 * lambda_g * log(pi * B/dg + 1)/(pi * B + dg) = - 2 * 1.5 * log(3.141592653589793 * 5.545268253204708/0.615 + 1) / (3.141592653589793 * 5.545268253204708 - + 0.615) = 0.5619604457160708 - 4) Ux = (2 * h * Uw /B) + (1450 * E * v * fw/B) = (2 * 0.3 * 1.5 / 5.545268253204708) + (1450 * 0.003 * 5 * - 0.05/5.545268253204708) = 0.35841367978030436 - 5) U = 1/ (2 * Rsi + Rf + 1/(Ug + Ux)) = 1 / (2 * 0.17 + 0 + 1/(0.5619604457160708 + 0.35841367978030436)) = - 0.701 - """ - age_band_letter = [x for x in age_band_data if x[region] == age_band][0]["age_band"] - - defaults = { - # We need width in meters - "w": [x[age_band_letter] for x in default_wall_thickness if x["type"] == wall_type][0] / 1000, - "lambda_g": 1.5, - "Rsi": 0.17, - "Rse": 0.04, - "Rf": 0.001 * insulation_thickness / 0.035, - "h": 0.3, - "v": 5, - "fw": 0.05, - "E": 0.003, - "Uw": 1.5, - } - - dg = defaults["w"] + defaults["lambda_g"] * (defaults["Rsi"] + defaults["Rse"]) - - # P is the exposed perimeter, which we estimate as we not have this data - p = self._estimate_perimeter(floor_area=floor_area, num_rooms=number_of_rooms) - b = 2 * floor_area / p - u_g = 2 * defaults["lambda_g"] * math.log(math.pi * b / dg + 1) / (math.pi * b + dg) - u_x = (2 * defaults["h"] * defaults["Uw"] / b) + (1450 * defaults["E"] * defaults["v"] * defaults["fw"] / b) - # This is the final estimated U-value - u = 1 / (2 * defaults["Rsi"] + defaults["Rf"] + 1 / (u_g + u_x)) - - return u - def recommend(self): u_value = self.property.floor["thermal_transmittance"] is_suspended = self.property.floor["is_suspended"] @@ -169,12 +93,6 @@ class FloorRecommendations(Definitions): # The floor is already compliant return - # For these methods, we need to know the additional details about the property - if self.property.walls["is_solid_brick"]: - wall_type = "solid brick" - else: - raise NotImplementedError("Implement me") - total_floor_area = float(self.property.data["total-floor-area"]) number_of_rooms = float(self.property.data["number-habitable-rooms"]) @@ -185,28 +103,18 @@ class FloorRecommendations(Definitions): else: raise NotImplementedError("Implement me") - if insulation_thickness == "none": + estimated_perimeter = estimate_perimeter(total_floor_area / num_floors, number_of_rooms / num_floors) - region_str, age_band = self.property.data["construction-age-band"].split(":") - region_str = region_str.strip() - age_band = age_band.strip() - region = self.REGION_LOOKUP[region_str] - - u_value = self._estimate_suspended_floor_u_value( - floor_area=total_floor_area / num_floors, - number_of_rooms=number_of_rooms / num_floors, - insulation_thickness=0, - wall_type=wall_type, - region=region, - age_band=age_band, - ) - else: - u_value = get_uvalue_estimate( - uvalue_estimates=self.uvalue_estimates, - property=self.property, - total_floor_area_group_decile=self.total_floor_area_group_decile - ) + wall_type = get_wall_type(**self.property.walls) + u_value = get_floor_u_value( + floor_type="suspended" if is_suspended else "solid", + area=total_floor_area, + perimeter=estimated_perimeter, + age_band=self.property.age_band, + insulation_thickness=insulation_thickness, + wall_type=wall_type + ) self.estimated_u_value = u_value if is_suspended: diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index fceee205..c46a495f 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -7,7 +7,7 @@ from backend.Property import Property from model_data.BaseUtility import Definitions from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_uvalue_estimate + get_recommended_part, get_wall_u_value ) @@ -44,13 +44,12 @@ class WallRecommendations(Definitions): } def __init__( - self, property_instance: Property, - uvalue_estimates: List, + self, + property_instance: Property, total_floor_area_group_decile: str, materials: List ): self.property = property_instance - self.uvalue_estimates = uvalue_estimates self.total_floor_area_group_decile = total_floor_area_group_decile # For audit purposes, when estimating u values we'll store it self.estimated_u_value = None @@ -116,18 +115,15 @@ class WallRecommendations(Definitions): raise NotImplementedError("Not implemented yet") - if is_solid_brick: + u_value = get_wall_u_value( + clean_description=self.property.walls["clean_description"], + age_band=self.property.age_band, + is_granite_or_whinstone=self.property.walls["is_granite_or_whinstone"], + is_sandstone_or_limestone=self.property.walls["is_sandstone_or_limestone"], + ) + self.estimated_u_value = u_value - if insulation_thickness == "none": - # This is an estimated figure based on industry standards - u_value = self.DEFAULT_U_VALUES["solid_brick"] - else: - u_value = get_uvalue_estimate( - uvalue_estimates=self.uvalue_estimates, - property=self.property, - total_floor_area_group_decile=self.total_floor_area_group_decile - ) - self.estimated_u_value = u_value + if is_solid_brick: if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE: self.find_insulation(u_value) diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py index eeacb935..71c52354 100644 --- a/recommendations/rdsap_tables.py +++ b/recommendations/rdsap_tables.py @@ -3,6 +3,7 @@ This script contains standard tables which are defined in rdsap. The most recent based on the 2012 version, however the government is currently working on releasing a new version, and there we will need to re-visit this """ +import pandas as pd age_band_data = [ { @@ -91,33 +92,373 @@ age_band_data = [ }, ] +england_wales_age_band_lookup = { + f"England and Wales: %s" % x["England_Wales"]: x["age_band"] for x in age_band_data +} + +######################################################################################################################## +# As defined in the rdsap documentation on page 9 +# https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf +######################################################################################################################## default_wall_thickness = [ { "type": "stone", "A": 500, "B": 500, "C": 500, "D": 500, "E": 450, "F": 420, "G": 420, "H": 420, - "I": 450, "J_K_L": 450 + "I": 450, "J": 450, "K": 450, "L": 450 }, { "type": "solid brick", "A": 220, "B": 220, "C": 220, "D": 220, "E": 240, "F": 250, "G": 270, "H": 270, - "I": 300, "J_K_L": 300 + "I": 300, "J": 300, "K": 300, "L": 300 }, { "type": "cavity", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 260, "G": 270, "H": 270, - "I": 300, "J_K_L": 300 + "I": 300, "J": 300, "K": 300, "L": 300 }, { "type": "timber frame", "A": 150, "B": 150, "C": 150, "D": 250, "E": 270, "F": 270, "G": 270, "H": 270, - "I": 300, "J_K_L": 300 + "I": 300, "J": 300, "K": 300, "L": 300 }, { "type": "cob", "A": 540, "B": 540, "C": 540, "D": 540, "E": 540, "F": 540, "G": 560, "H": 560, "I": 590, - "J_K_L": 590 + "J": 590, "K": 590, "L": 590 }, { "type": "system build", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 300, "G": 300, "H": 300, - "I": 300, "J_K_L": 300 + "I": 300, "J": 300, "K": 300, "L": 300 }, { - "type": "park home", "A": None, "B": None, "C": None, "D": None, "E": None, "F": 50, "G": None, - "H": None, "I": 50, "J_K_L": 100 + "type": "park home", "A": None, "B": None, "C": None, "D": None, "E": None, "F": 50, "G": 50, + "H": None, "I": 75, "J": 100, "K": 100, "L": 100 }, ] + +######################################################################################################################## +# This wall u-value table is defined in the rdsap documentation on page 19 +# https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf +######################################################################################################################## +wall_types = [ + "Stone: granite or whinstone as built", + "Stone: sandstone or limestone as built", + "Solid brick as built", + "Stone/solid brick with 50 mm external or internal insulation", + "Stone/solid brick with 100 mm external or internal insulation", + "Stone/solid brick with 150 mm external or internal insulation", + "Stone/solid brick with 200 mm external or internal insulation", + "Cob as built", + "Cob with 50 mm external or internal insulation", + "Cob with 100 mm external or internal insulation", + "Cob with 150 mm external or internal insulation", + "Cob with 200 mm external or internal insulation", + "Cavity as built", + "Unfilled cavity with 50 mm external or internal insulation", + "Unfilled cavity with 100 mm external or internal insulation", + "Unfilled cavity with 150 mm external or internal insulation", + "Unfilled cavity with 200 mm external or internal insulation", + "Filled cavity", + "Filled cavity with 50 mm external or internal insulation", + "Filled cavity with 100 mm external or internal insulation", + "Filled cavity with 150 mm external or internal insulation", + "Filled cavity with 200 mm external or internal insulation", + "Timber frame as built", + "Timber frame with internal insulation", + "System build as built", + "System build with 50 mm external or internal insulation", + "System build with 100 mm external or internal insulation", + "System build with 150 mm external or internal insulation", + "System build with 200 mm external or internal insulation", +] + +u_values = [ + ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["1.7", "1.7", "1.7", "1.7", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["0.55", "0.55", "0.55", "0.55", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], + ["0.32", "0.32", "0.32", "0.32", "0.32", "0.28", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], + ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], + ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], + ["0.80", "0.80", "0.80", "0.80", "0.80", "0.80", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], + ["0.26", "0.26", "0.26", "0.26", "0.26", "0.26", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], + ["0.20", "0.20", "0.20", "0.20", "0.20", "0.20", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], + ["0.16", "0.16", "0.16", "0.16", "0.16", "0.16", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], + ["1.5", "1.5", "1.5", "1.5", "1.5", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["0.53", "0.53", "0.53", "0.53", "0.53", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], + ["0.32", "0.32", "0.32", "0.32", "0.32", "0.30", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], + ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], + ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], + ["0.7", "0.7", "0.7", "0.7", "0.7", "0.40", "0.35", "0.35", "0.45", "0.35", "0.30", "0.28"], + ["0.37", "0.37", "0.37", "0.37", "0.37", "0.27", "0.25", "0.25", "0.25", "0.25", "0.21", "0.21"], + ["0.25", "0.25", "0.25", "0.25", "0.25", "0.20", "0.19", "0.19", "0.19", "0.19", "0.17", "0.16"], + ["0.19", "0.19", "0.19", "0.19", "0.19", "0.16", "0.15", "0.15", "0.15", "0.15", "0.14", "0.14"], + ["0.16", "0.16", "0.16", "0.16", "0.16", "0.13", "0.13", "0.13", "0.13", "0.13", "0.12", "0.12"], + ["2.5", "1.9", "1.9", "1.0", "0.80", "0.45", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], + ["0.60", "0.55", "0.55", "0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], + ["2.0", "2.0", "2.0", "2.0", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], + ["0.60", "0.60", "0.60", "0.60", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], + ["0.35", "0.35", "0.35", "0.35", "0.35", "0.32", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], + ["0.25", "0.25", "0.25", "0.25", "0.25", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], + ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], +] + +age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"] + +wall_uvalues = [] +for i, wall_type in enumerate(wall_types): + row = {"Wall_type": wall_type} + for j, age_band in enumerate(age_bands): + row[age_band] = u_values[i][j] + wall_uvalues.append(row) + +parkhome_wall_uvalues = [ + {"Wall_type": "Park home as built", "F": "1.7", "G": "1.2", "I": "0.7", "K": "0.6"}, + {"Wall_type": "Park home with additional insulation", "F": "s1.1.2", "G": "s1.1.2", "I": "s1.1.2", + "K": "s1.1.2"} +] + +wall_uvalues.extend(parkhome_wall_uvalues) + +wall_uvalues_df = pd.DataFrame(wall_uvalues) + +# This maps the descriptions in the EPC data to the descriptions in the RdSAP table +epc_wall_description_map = { + ############################ + # Cavity wall mappings + ############################ + "Cavity wall, as built, partial insulation": "Filled cavity", + "Cavity wall, filled cavity": "Filled cavity", + "Cavity wall, as built, no insulation": "Cavity as built", + "Cavity wall, as built, insulated": "Unfilled cavity with 100 mm external or internal insulation", + "Cavity wall, with external insulation": "Unfilled cavity with 100 mm external or internal insulation", + "Cavity wall, insulated": "Unfilled cavity with 100 mm external or internal insulation", + 'Cavity wall, partial insulation': "Unfilled cavity with 50 mm external or internal insulation", + + "Cavity wall,": "Cavity as built", # General case of cavity wall without further details + "Cavity wall, filled cavity and external insulation": + "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, filled cavity and internal insulation": + "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, with internal insulation": "Unfilled cavity with 100 mm external or internal insulation", + "Cavity wall, no insulation": "Cavity as built", + + ############################ + # Solid brick wall mappings + ############################ + "Solid brick, as built, no insulation": "Solid brick as built", + "Solid brick, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Solid brick, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Solid brick, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Solid brick, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", + + ############################ + # Timber frame wall mappings + ############################ + # These mappings are perhaps the most dubious due to the lack of timber options in the RdSAP table + "Timber frame, as built, insulated": "Timber frame with internal insulation", + "Timber frame, with additional insulation": "Timber frame with internal insulation", + "Timber frame, as built, partial insulation": "Timber frame as built", + "Timber frame, as built, no insulation": "Timber frame as built", + "Timber frame, with external insulation": "Timber frame with internal insulation", + + ############################ + # Sandstone/limestones wall mappings + ############################ + "Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built", + "Sandstone or limestone, with internal insulation": + "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " + "insulation", + "Sandstone, as built, no insulation": "Stone: sandstone or limestone as built", + "Sandstone or limestone, as built, insulated": + "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal " + "insulation", + "Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", + + ############################ + # Granite/whinstone wall mappings + ############################ + "Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built", + "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal " + "insulation", + "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " + "insulation", + "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal " + "insulation", + "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal " + "insulation", + + ############################ + # System built wall mappings + ############################ + "System built, as built, no insulation": "System build as built", + "System built, as built, partial insulation": "System build with 50 mm external or internal insulation", + "System built, with internal insulation": "System build with 100 mm external or internal insulation", + "System built, with external insulation": "System build with 100 mm external or internal insulation", + "System built, as built, insulated": "System build with 100 mm external or internal insulation", + + ############################ + # Cob wall mappings + ############################ + "Cob, as built": "Cob as built", + "Cob, with external insulation": "Cob with 100 mm external or internal insulation", + "Cob, with internal insulation": "Cob with 100 mm external or internal insulation", + 'Cob,': "Cob as built", + + ############################ + # Park home mappings + ############################ + "Park home wall, as built": "Park home as built", + "Park home wall, with external insulation": "Park home with additional insulation", + "Park home wall, with internal insulation": "Park home with additional insulation", +} + +######################################################################################################################## +# These following tables define table s9 and s10 which are used to assign roofs with their assumed u-values. +# The tables can be found on pages 23 and 24 of the BRE document +# https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf +######################################################################################################################## + +s9_list = [ + {"Insulation_thickness_mm": None, "Slates_or_tiles_U_value_W_m2K": 2.3, "Thatched_roof_U_value_W_m2K": 0.35}, + {"Insulation_thickness_mm": 12, "Slates_or_tiles_U_value_W_m2K": 1.5, "Thatched_roof_U_value_W_m2K": 0.32}, + {"Insulation_thickness_mm": 25, "Slates_or_tiles_U_value_W_m2K": 1.0, "Thatched_roof_U_value_W_m2K": 0.30}, + {"Insulation_thickness_mm": 50, "Slates_or_tiles_U_value_W_m2K": 0.68, "Thatched_roof_U_value_W_m2K": 0.25}, + {"Insulation_thickness_mm": 75, "Slates_or_tiles_U_value_W_m2K": 0.50, "Thatched_roof_U_value_W_m2K": 0.22}, + {"Insulation_thickness_mm": 100, "Slates_or_tiles_U_value_W_m2K": 0.40, "Thatched_roof_U_value_W_m2K": 0.20}, + {"Insulation_thickness_mm": 150, "Slates_or_tiles_U_value_W_m2K": 0.30, "Thatched_roof_U_value_W_m2K": 0.17}, + {"Insulation_thickness_mm": 200, "Slates_or_tiles_U_value_W_m2K": 0.21, "Thatched_roof_U_value_W_m2K": 0.14}, + {"Insulation_thickness_mm": 250, "Slates_or_tiles_U_value_W_m2K": 0.17, "Thatched_roof_U_value_W_m2K": 0.12}, + {"Insulation_thickness_mm": 270, "Slates_or_tiles_U_value_W_m2K": 0.16, "Thatched_roof_U_value_W_m2K": 0.12}, + {"Insulation_thickness_mm": 300, "Slates_or_tiles_U_value_W_m2K": 0.14, "Thatched_roof_U_value_W_m2K": 0.11}, + {"Insulation_thickness_mm": 350, "Slates_or_tiles_U_value_W_m2K": 0.12, "Thatched_roof_U_value_W_m2K": 0.10}, + {"Insulation_thickness_mm": 400, "Slates_or_tiles_U_value_W_m2K": 0.11, + "Thatched_roof_U_value_W_m2K": 0.09}, +] + +s10_list = [ + { + "Age_band": "A, B, C, D", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 2.3, + "Pitched_slates_or_tiles_insulation_at_rafters": 2.3, + "Flat_roof": 2.3, + "Room_in_roof_slates_or_tiles": 2.3, + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": None + }, + { + "Age_band": "E", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 1.5, + "Pitched_slates_or_tiles_insulation_at_rafters": 1.5, + "Flat_roof": 1.5, + "Room_in_roof_slates_or_tiles": 1.5, + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": None + }, + { + "Age_band": "F", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.68, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.68, + "Flat_roof": 0.68, + "Room_in_roof_slates_or_tiles": 0.80, + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": 1.7 + }, + { + "Age_band": "G", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.40, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.40, + "Flat_roof": 0.40, + "Room_in_roof_slates_or_tiles": "0.50", + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": 0.6 + }, + { + "Age_band": "H", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.30, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.35, + "Flat_roof": 0.35, + "Room_in_roof_slates_or_tiles": 0.35, + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": None + }, + { + "Age_band": "I", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.26, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.35, + "Flat_roof": 0.35, + "Room_in_roof_slates_or_tiles": 0.35, + "Thatched_roof": 0.35, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": 0.35 + }, + { + "Age_band": "J", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.16, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.20, + "Flat_roof": 0.25, + "Room_in_roof_slates_or_tiles": 0.30, + "Thatched_roof": 0.30, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": None + }, + { + "Age_band": "K", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.16, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.20, + "Flat_roof": 0.25, + "Room_in_roof_slates_or_tiles": 0.25, + "Thatched_roof": 0.25, + "Thatched_roof_room_in_roof": 0.25, + "Park_home": 0.30 + }, + { + "Age_band": "L", + "Pitched_slates_or_tiles_insulation_between_joists_or_unknown": 0.16, + "Pitched_slates_or_tiles_insulation_at_rafters": 0.18, + "Flat_roof": 0.18, + "Room_in_roof_slates_or_tiles": 0.18, + "Thatched_roof": 0.18, + "Thatched_roof_room_in_roof": 0.18, + "Park_home": None + } +] + +table_s9 = pd.DataFrame(s9_list) + +table_s10 = pd.DataFrame(s10_list) + +######################################################################################################################## +# Table s11 is used for assigning the u-values of floors when the insulation thickness is unknown +# which can be found on page 25 of the BRE document +# https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf +# +# The thickness values are in mm +######################################################################################################################## + +s11_list = [ + {"Age_band": "A, B", "Floor_construction": "suspended timber", "England_Wales": 0, "Scotland": 0, + "Northern_Ireland": 0, "Park_home": 0}, + {"Age_band": "C to F", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, + "Northern_Ireland": 0, "Park_home": 0}, + {"Age_band": "G", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, + "Northern_Ireland": 0, "Park_home": 25}, + {"Age_band": "H", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 25, + "Northern_Ireland": 25, "Park_home": 0}, + {"Age_band": "I", "Floor_construction": "solid", "England_Wales": 25, "Scotland": 50, + "Northern_Ireland": 50, "Park_home": 50}, + {"Age_band": "J", "Floor_construction": "solid", "England_Wales": 75, "Scotland": 75, + "Northern_Ireland": 0, "Park_home": 0}, + {"Age_band": "K", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 100, + "Northern_Ireland": 100, "Park_home": 70}, + {"Age_band": "L", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 120, + "Northern_Ireland": 100, "Park_home": 0}, +] + +table_s11 = pd.DataFrame(s11_list) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index e53aeb17..8a113050 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -1,7 +1,14 @@ +import math from copy import deepcopy + +import pandas as pd + from backend.Property import Property from statistics import mean -import random +from recommendations.rdsap_tables import ( + epc_wall_description_map, wall_uvalues_df, default_wall_thickness, table_s9 as s9, table_s10 as s10, + table_s11 as s11 +) def r_value_per_mm_to_u_value(depth_mm: int, r_value_per_mm: float): @@ -121,54 +128,351 @@ def get_recommended_part(part, selected_depth, selected_total_cost, quantity, qu return recommended_part -def get_uvalue_estimate(uvalue_estimates, property: Property, total_floor_area_group_decile): +def apply_formula_s_5_1_1(is_granite_or_whinstone, is_sandstone_or_limestone, age_band): """ - Wrapper function which contains the methodology to extract a property's walls u-value estimate - when we don't have a true value and if we can't base our assumption off of the material + As the u-value table in https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf + on page 19, certain u-values as indicated by an "a", should be populated using a formula as defined in section + S.5.1.1 + """ + + stone_wall_thickness = [x for x in default_wall_thickness if x["type"] == "stone"][0] + + thickness = stone_wall_thickness["J_K_L"] if age_band in ["J", "L", "L"] else stone_wall_thickness[age_band] + + if is_granite_or_whinstone: + return 3.3 - 0.002 * thickness + + if is_sandstone_or_limestone: + return 3 - 0.002 * thickness + + raise ValueError("This should only be called when is_granite_or_whinstone or is_sandstone_or_limestone is True") + + +def get_wall_u_value(clean_description, age_band, is_granite_or_whinstone, is_sandstone_or_limestone): + """ + Given some features about a wall, this function will query the wall u-value table and return the u-value + :param clean_description: Cleaned up description of the wall from the EPC data + :param age_band: age band of the property from the EPC data + :param is_granite_or_whinstone: Boolean indicating if the wall is made of granite or whinstone + :param is_sandstone_or_limestone: Boolean indicating if the wall is made of sandstone or limestone :return: """ - if not uvalue_estimates: - raise ValueError("No U-value estimate found for the given property - investigate") + mapped_description = epc_wall_description_map[clean_description] - # We try and filter on total_floor_area_group_decile - floor_area_filter = [ - x for x in uvalue_estimates if - x["total-floor-area_group"] == total_floor_area_group_decile - ] + mapped_value = wall_uvalues_df[wall_uvalues_df["Wall_type"] == mapped_description][age_band].values[0] - if not floor_area_filter: - # Take a mean of all the u-value estimates - return mean( - [x["median_thermal_transmittance"] for x in uvalue_estimates if x["median_thermal_transmittance"]] + if pd.isnull(mapped_value) and "Park home" in mapped_description: + # We don't know enough in this case so we default to 0 + return 0 + + if mapped_value == "a": + # The rdSap documentation indicateswe should use a formula to calculate the u-value + return float( + apply_formula_s_5_1_1( + is_granite_or_whinstone=is_granite_or_whinstone, + is_sandstone_or_limestone=is_sandstone_or_limestone, + age_band=age_band + ) ) - # Because of how spuriously populated the data is for number-habitable-rooms and number-heated-rooms, - # we will try and filter on these to see if we get a result + if "b" in mapped_value: + potential_uvalue = float(mapped_value.replace("b", "")) + formula_uvalue = float(apply_formula_s_5_1_1( + is_granite_or_whinstone=is_granite_or_whinstone, + is_sandstone_or_limestone=is_sandstone_or_limestone, + age_band=age_band + )) + return min(potential_uvalue, formula_uvalue) - habitable_rooms_filer = [ - x for x in floor_area_filter if - x["number-habitable-rooms"] == property.data["number-habitable-rooms"] - ] + if mapped_value == "s1.1.2": + # We don't know enough in this case so we default to 0 + return 0 - if not habitable_rooms_filer: - # Take a mean of all the u-value estimates - return mean( - [x["median_thermal_transmittance"] for x in floor_area_filter if x["median_thermal_transmittance"]] - ) + return float(mapped_value) - # Try perform a filter on heated rooms - heated_rooms_filter = [ - x for x in habitable_rooms_filer if - x["number-heated-rooms"] == property.data["number-heated-rooms"] - ] - if not heated_rooms_filter: - # Take a mean of all the u-value estimates - return mean( - [x["median_thermal_transmittance"] for x in habitable_rooms_filer if x["median_thermal_transmittance"]] - ) +def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched): + """Get the U-value from table S9 based on the insulation thickness.""" + if thickness in ["below average", "average", "above average", "none", None] or ( + not is_loft and not is_roof_room + ): + return None + elif thickness.endswith("+"): + thickness = int(thickness[:-1]) + else: + try: + thickness = int(thickness) + except ValueError: + # If thickness is not a valid number (could be a string or None), return None + return None - return mean( - [x["median_thermal_transmittance"] for x in heated_rooms_filter if x["median_thermal_transmittance"]] + # Determine the column to refer based on the roof type + column = 'Thatched_roof_U_value_W_m2K' if is_thatched else 'Slates_or_tiles_U_value_W_m2K' + + # Get the correct U-value based on the insulation thickness + return s9[s9['Insulation_thickness_mm'] >= thickness][column].iloc[0] + + +def get_roof_u_value( + insulation_thickness, + has_dwelling_above, + is_loft, + is_roof_room, + is_thatched, + age_band, + is_flat, + is_pitched, + is_at_rafters, + **kwargs +): + """ + Determine the U-value for a roof based on the description dictionary and age band. + + We use table s9 is the insulation thickness was measured, otherwise we use table s10. + + The methodology for this process can be found in page 23 of the BRE rdsap 2012 document found here: + https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf + + Parameters: + insulation_thickness (str): contains description of the insulation thickness - may be missing + has_dwelling_above (bool): Indicates if there is a property above + is_loft (bool): Indicates if ther oof has a loft + is_roof_room (bool): Indicates if there is a room in roof + is_thatched (bool): Indicates if the roof is thatched + is_flat (bool): Indicates if the roof is flat + is_pitched (bool): Indicates if the roof is pitched + is_at_rafters (bool): Indicates if there is insulation at the rafters of the roof + age_band (str): The age band of the property. + s9 (pd.DataFrame): The DataFrame representing table S9. + s10 (pd.DataFrame): The DataFrame representing table S10. + + Returns: + float: The determined U-value. + """ + + # If there is a dwelling above, the U-value is 0 + if has_dwelling_above: + return 0.0 + + # Step 1: Try to get the U-value from table S9 based on the insulation thickness + # The conditions for using table S9 are: + # - The insulation thickness is known + # - The roof is either a loft or a roof room + # The criteria for using this table is predominately defined by insulation around joists which is predominately + # a feature of lofts and roof rooms + u_value = get_u_value_from_s9( + thickness=insulation_thickness, + s9=s9, + is_loft=is_loft, + is_roof_room=is_roof_room, + is_thatched=is_thatched, ) + + if u_value is not None: + return u_value + + # Step 2: If the U-value could not be determined from table S9, use table S10 + + # Define the columns to be used based on the description details + if is_flat: + column = 'Flat_roof' + elif is_thatched: + if is_roof_room: + column = 'Thatched_roof_room_in_roof' + else: + column = 'Thatched_roof' + elif is_roof_room: + column = 'Room_in_roof_slates_or_tiles' + elif is_pitched: + if is_at_rafters: + column = 'Pitched_slates_or_tiles_insulation_at_rafters' + else: + column = 'Pitched_slates_or_tiles_insulation_between_joists_or_unknown' + else: + # Default to pitched roof with insulation between joists or unknown + column = 'Pitched_slates_or_tiles_insulation_between_joists_or_unknown' + + # Get the U-value from table S10 based on the age band and the determined column + u_value = s10.loc[s10['Age_band'].str.contains(age_band), column].values[0] + + return u_value + + +def estimate_perimeter(floor_area, num_rooms): + """ + Uses a basic methodology to attempt to estimate perimeter. Works better for + :param floor_area: floor area of the home + :param num_rooms: number of rooms in the home + :return: estimated perimeter + """ + if floor_area < 0: + raise ValueError("Floor area cannot be negative.") + if num_rooms <= 0: + raise ValueError("Number of rooms must be greater than zero.") + # Compute average room size based on total floor area and number of rooms + avg_room_size = floor_area / num_rooms + + # Estimate the side length of a square room with the average room size + avg_room_side_length = math.sqrt(avg_room_size) + + # Estimate total side length assuming rooms are lined up in a row + total_side_length = avg_room_side_length * num_rooms + + # Estimate the length and width of the property assuming it is rectangular + length = total_side_length / 2 + width = floor_area / length + + # Compute the perimeter of the property + perimeter = 2 * (length + width) + + return perimeter + + +def get_floor_u_value(floor_type, area, perimeter, age_band, wall_type, insulation_thickness=None): + """ + Estimate the u-value of a suspended floor, based on RdSap methodology + Default U-value for UNINSULATED suspended floor, based on RdSAP methodology + https://files.bregroup.com/bre-co-uk-file-library-copy/filelibrary/SAP/2012/RdSAP-9.93/RdSAP_2012_9.93.pdf + + w = wall thickness, where these estimates are based on the RD SAP methodology, as in table S3 + A = floor area + Exposed perimeter = P + soil type clas thermal conductivity lambda_g = 1.5 W/mK + Rsi = 0.17m^2K/W + Rse = 0.04m^2K/W + Rf = 0.001 * d_ins / 0.035 where d_ins is the insulation thickness in mm + height above external ground h = 0.3m + average wind speed at 10m height v=5m/s + wind sheilding factor fw = 0.05 + vantilation factor E = 0.003 m^2/m + U-value of walls to underfloor space Uw = 1.5 W/m^2K + + # Calulations for suspended ground floors, example for 5 bedroom house with permiter estimated at + 44.36214602563767 + 1) dg = w + lambda_g x (Rsi + Rse) = 0.5 + 1.5 * (0.17 + 0.04) = 0.615 + 2) B = 2 * A/P = 2 * 123.0 / 44.36214602563767 = 5.545268253204708 + 3) Ug = 2 * lambda_g * log(pi * B/dg + 1)/(pi * B + dg) = + 2 * 1.5 * log(3.141592653589793 * 5.545268253204708/0.615 + 1) / (3.141592653589793 * 5.545268253204708 + + 0.615) = 0.5619604457160708 + 4) Ux = (2 * h * Uw /B) + (1450 * E * v * fw/B) = (2 * 0.3 * 1.5 / 5.545268253204708) + (1450 * 0.003 * 5 * + 0.05/5.545268253204708) = 0.35841367978030436 + 5) U = 1/ (2 * Rsi + Rf + 1/(Ug + Ux)) = 1 / (2 * 0.17 + 0 + 1/(0.5619604457160708 + 0.35841367978030436)) = + 0.701 + """ + + # Cleans our regularly inputted insulation thickness for usage in this function + insulation_thickness = extract_insulation_thickness(insulation_thickness) + + # Define constants + lambda_g = 1.5 # thermal conductivity of soil in W/m·K + Rsi = 0.17 # in m²K/W + Rse = 0.04 # in m²K/W + lambda_ins = 0.035 # thermal conductivity of floor insulation in W/m·K + + wall_thickness = [x[age_band] for x in default_wall_thickness if x["type"] == wall_type][0] + if wall_thickness is None and wall_type == "park home": + # We don't know enough and likely won't make recommendations + return 0 + wall_thickness = wall_thickness / 1000 + + if insulation_thickness is None: + insulation_lookup = s11[s11["Age_band"].str.contains(age_band) & s11["Floor_construction"] == floor_type] + if insulation_lookup.empty: + insulation_thickness = 0 + else: + insulation_thickness = insulation_lookup["England_Wales"].values[0] + + # Calculate Rf for insulated floors + Rf = 0.001 * insulation_thickness / lambda_ins + + # Calculate B + B = 2 * area / perimeter + + if floor_type == 'solid': + # Calculate dt + dt = wall_thickness + lambda_g * (Rsi + Rf + Rse) + + # Calculate U value based on dt and B + if dt < B: + U = 2 * lambda_g * math.log(math.pi * B / dt + 1) / (math.pi * B + dt) + else: + U = lambda_g / (0.457 * B + dt) + + elif floor_type == 'suspended': + # Define additional constants for suspended floors + h = 0.3 # height above external ground level in meters + v = 5 # average wind speed at 10 m height in m/s + fw = 0.05 # wind shielding factor + epsilon = 0.003 # ventilation openings per m exposed perimeter in m²/m + Uw = 1.5 # U-value of walls to underfloor space in W/m²K + + # Calculate dg + dg = wall_thickness + lambda_g * (Rsi + Rse) + + # Calculate Ug and Ux + Ug = 2 * lambda_g * math.log(math.pi * B / dg + 1) / (math.pi * B + dg) + Ux = (2 * h * Uw / B) + (1450 * epsilon * v * fw / B) + + # Calculate final U value for suspended floors + if insulation_thickness > 0: + Rf += 0.2 # adding thermal resistance of floor deck + else: + Rf = 0.2 # thermal resistance of uninsulated floor deck + + U = 1 / (2 * Rsi + Rf + 1 / (Ug + Ux)) + else: + raise ValueError("Invalid floor type. Acceptable values are 'solid' or 'suspended'.") + + return round(U, 2) # rounding U value to two decimal places + + +def extract_insulation_thickness(insulation_thickness_str): + """ + Converts insulation thickness to a float + :param insulation_thickness_str: + :return: + """ + if insulation_thickness_str in ["none", "average", "below average", "above average", None]: + return None + + return int(insulation_thickness_str.replace("mm", "")) + + +def get_wall_type( + is_cavity_wall, + is_solid_brick, + is_granite_or_whinstone, + is_sandstone_or_limestone, + is_timber_frame, + is_cob, + is_system_built, + is_park_home, + **kwargs +): + """ + Converts booleans to a string wall type, for querying the wall thickness table + :return: + """ + if is_cavity_wall: + return "cavity" + + if is_solid_brick: + return "solid brick" + + if is_granite_or_whinstone or is_sandstone_or_limestone: + return "stone" + + if is_timber_frame: + return "timber frame" + + if is_cob: + return "cob" + + if is_system_built: + return "system build" + + if is_park_home: + return "park home" + + return None diff --git a/recommendations/tests/test_data/floor_uvalue_test_cases.py b/recommendations/tests/test_data/floor_uvalue_test_cases.py new file mode 100644 index 00000000..91d3814f --- /dev/null +++ b/recommendations/tests/test_data/floor_uvalue_test_cases.py @@ -0,0 +1,32 @@ +floor_uvalue_test_cases = [ + # Test with solid floor, no insulation + { + "floor_type": "solid", + "area": 100, + "perimeter": 40, + "age_band": "A", + "wall_type": "cavity", + "insulation_thickness": None, + "expected": 0.62, + }, + # Test with suspended floor, with insulation + { + "floor_type": "suspended", + "area": 120, + "perimeter": 44, + "age_band": "B", + "wall_type": "solid brick", + "insulation_thickness": "50mm", + "expected": 0.33, + }, + # Test with invalid floor type + { + "floor_type": "invalid", + "area": 100, + "perimeter": 40, + "age_band": "A", + "wall_type": "cavity", + "insulation_thickness": None, + "expected": ValueError, + }, +] diff --git a/recommendations/tests/test_data/uvalue_estimates.pkl b/recommendations/tests/test_data/uvalue_estimates.pkl deleted file mode 100644 index 767d1300..00000000 Binary files a/recommendations/tests/test_data/uvalue_estimates.pkl and /dev/null differ diff --git a/recommendations/tests/test_data/wall_uvalue_test_cases.py b/recommendations/tests/test_data/wall_uvalue_test_cases.py new file mode 100644 index 00000000..1cc6823c --- /dev/null +++ b/recommendations/tests/test_data/wall_uvalue_test_cases.py @@ -0,0 +1,80 @@ +wall_uvalue_test_cases = [ + { + "clean_description": "Cavity wall, as built, partial insulation", + "age_band": "A", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.7 + }, + { + "clean_description": "Cavity wall, as built, partial insulation", + "age_band": "F", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.4 + }, + { + "clean_description": "Cavity wall, as built, partial insulation", + "age_band": "F", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.4 + }, + { + + "clean_description": "Solid brick, with internal insulation", + "age_band": "C", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.32 + }, + { + "clean_description": "Solid brick, as built, no insulation", + "age_band": "C", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 1.7 + }, + { + "clean_description": "Timber frame, as built, no insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.8 + }, + { + "clean_description": "Sandstone or limestone, with external insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.32 + }, + { + "clean_description": "Granite or whinstone, as built, partial insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.55 + }, + { + "clean_description": "System built, as built, no insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 1.7 + }, + { + "clean_description": "Cob, with internal insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0.26 + }, + { + "clean_description": "Park home wall, with internal insulation", + "age_band": "E", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, + "uvalue": 0 + } +] diff --git a/recommendations/tests/test_floor_recommendations.py b/recommendations/tests/test_floor_recommendations.py index ee52abe2..1632f468 100644 --- a/recommendations/tests/test_floor_recommendations.py +++ b/recommendations/tests/test_floor_recommendations.py @@ -8,12 +8,6 @@ from recommendations.FloorRecommendations import FloorRecommendations # os.path.abspath(os.path.dirname(__file__)) + "/recommendations/tests/test_data/input_properties.pkl", "rb" # ) as f: # input_properties = pickle.load(f) -# -# with open( -# os.path.abspath(os.path.dirname(__file__)) + "/recommendations/tests/test_data/uvalue_estimates.pkl", "rb" -# ) as f: -# uvalue_estimates = pickle.load(f) - suspended_floor_insulation_parts = [ { @@ -85,13 +79,6 @@ class TestWallRecommendations: ) as f: return pickle.load(f) - @pytest.fixture - def uvalue_estimates(self): - with open( - os.path.abspath(os.path.dirname(__file__)) + "/test_data/uvalue_estimates.pkl", "rb" - ) as f: - return pickle.load(f) - @pytest.fixture def mock_floor_rec_instance(self): # Creating a mock instance of WallRecommendations with the necessary attributes @@ -99,27 +86,22 @@ class TestWallRecommendations: property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"} # or any date you want property_mock.data = {"construction-age-band": "1950"} # or any other data that fits your tests - uvalue_estimates_mock = Mock() - - mock_wall_rec_instance = FloorRecommendations(property_mock, uvalue_estimates_mock, "Decile 1") + mock_wall_rec_instance = FloorRecommendations(property_mock, "Decile 1", parts) return mock_wall_rec_instance - def test_init(self, input_properties, uvalue_estimates): + def test_init(self, input_properties): obj = FloorRecommendations( property_instance=input_properties[0], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) assert obj assert obj.property - assert obj.uvalue_estimates assert obj.total_floor_area_group_decile == "Decile 1" - def test_other_premises_below(self, input_properties, uvalue_estimates): + def test_other_premises_below(self, input_properties): recommender = FloorRecommendations( property_instance=input_properties[0], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) @@ -128,31 +110,32 @@ class TestWallRecommendations: assert not recommender.recommendations - def test_suspended_no_insulation(self, input_properties, uvalue_estimates): + def test_suspended_no_insulation(self, input_properties): """ For a suspended floor without insulation, we use the rdsap methogology to estimate a U-value for the floor :return: """ input_properties[2].floor_area = 50 + input_properties[2].walls["is_park_home"] = False + input_properties[2].age_band = "A" recommender = FloorRecommendations( property_instance=input_properties[2], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) assert recommender.estimated_u_value is None recommender.recommend() assert recommender.property.floor["is_suspended"] - assert recommender.estimated_u_value == 0.8766389420265843 + assert recommender.estimated_u_value == 0.52 assert recommender.recommendations types = {part["type"] for x in recommender.recommendations for part in x["parts"]} assert types == {"suspended_floor_insulation"} - def test_uvalue_0_12(self, input_properties, uvalue_estimates): + def test_uvalue_0_12(self, input_properties): """ This is a home that doesn't have a property below but it's highly performant already and therefore does not need floor insulation @@ -160,7 +143,6 @@ class TestWallRecommendations: """ recommender = FloorRecommendations( property_instance=input_properties[3], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) @@ -171,16 +153,17 @@ class TestWallRecommendations: assert recommender.estimated_u_value is None assert not recommender.recommendations - def test_solid_no_insulation(self, input_properties, uvalue_estimates): + def test_solid_no_insulation(self, input_properties): """ :return: """ input_properties[4].floor_area = 100 + input_properties[4].walls["is_park_home"] = False + input_properties[4].age_band = "B" recommender = FloorRecommendations( property_instance=input_properties[4], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) @@ -188,21 +171,20 @@ class TestWallRecommendations: recommender.recommend() assert not recommender.property.floor["is_suspended"] assert recommender.property.floor["is_solid"] - assert recommender.estimated_u_value == 0.7528014214215474 + assert recommender.estimated_u_value == 0.63 assert recommender.recommendations types = {part["type"] for x in recommender.recommendations for part in x["parts"]} assert types == {"solid_floor_insulation"} - def test_another_dwelling_below(self, input_properties, uvalue_estimates): + def test_another_dwelling_below(self, input_properties): """ This is another description we see when there is a property below """ recommender = FloorRecommendations( property_instance=input_properties[6], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=parts ) diff --git a/recommendations/tests/test_recommendation_utils.py b/recommendations/tests/test_recommendation_utils.py index 83a35587..b3ea0141 100644 --- a/recommendations/tests/test_recommendation_utils.py +++ b/recommendations/tests/test_recommendation_utils.py @@ -1,7 +1,10 @@ import pytest +import math from unittest.mock import MagicMock from recommendations import recommendation_utils from datatypes.enums import QuantityUnits +from recommendations.tests.test_data.wall_uvalue_test_cases import wall_uvalue_test_cases +from recommendations.tests.test_data.floor_uvalue_test_cases import floor_uvalue_test_cases class TestRecommendationUtils: @@ -43,35 +46,261 @@ class TestRecommendationUtils: part=part, selected_depth=1, selected_total_cost=50, quantity=99, quantity_unit="m2" ) == {'depths': [1], 'estimated_cost': 50, 'quantity': 99, 'quantity_unit': QuantityUnits.m2.value} - def test_get_uvalue_estimate(self, property_mock): - uvalue_estimates = [ - { - 'total-floor-area_group': 'Decile 1', - 'number-habitable-rooms': 3, - 'number-heated-rooms': 2, - 'median_thermal_transmittance': 1 - }, - { - 'total-floor-area_group': 'Decile 1', - 'number-habitable-rooms': 3, - 'number-heated-rooms': 2, - 'median_thermal_transmittance': 2 - } - ] + def test_get_roof_u_value(self): + # Test case 1: Insulation thickness is known and is_loft is True + inputs = { + 'insulation_thickness': '50', + 'is_loft': True, + 'is_roof_room': False, + 'is_thatched': False, + 'has_dwelling_above': False, + 'is_flat': False, + 'is_pitched': True, + 'is_at_rafters': False, + } + for age_band in ["A", "B", "C", "D"]: + assert recommendation_utils.get_roof_u_value(**{**inputs, "age_band": age_band}) == 0.68 - assert recommendation_utils.get_uvalue_estimate(uvalue_estimates, property_mock, "Decile 1") == 1.5 + def test_get_roof_u_value_case_2(self): + inputs = { + 'original_description': 'Pitched, 400+ mm insulation at joists', + 'clean_description': 'Pitched, 400+ mm insulation at joists', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': '400+', + 'age_band': "J" + } - with pytest.raises(ValueError): - recommendation_utils.get_uvalue_estimate([], property_mock, "Decile 1") + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.16, f"Expected 0.16, but got {u_value}" - # Test with missing 'median_thermal_transmittance' key - uvalue_estimates_missing_key = [ - { - 'total-floor-area_group': 'Decile 1', - 'number-habitable-rooms': 3, - 'number-heated-rooms': 2 - } - ] + def test_get_roof_u_value_case_3(self): + inputs = { + 'original_description': 'Room-in-roof, 200 mm insulation at rafters', + 'clean_description': 'Room-in-roof, 200 mm insulation at rafters', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': False, + 'is_roof_room': True, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': True, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': '200', + 'age_band': "J" + } - with pytest.raises(KeyError): - recommendation_utils.get_uvalue_estimate(uvalue_estimates_missing_key, property_mock, "Decile 1") + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.21, f"Expected 0.21, but got {u_value}" + + def test_get_roof_u_value_case_4(self): + inputs = { + 'original_description': 'Pitched, below average insulation', + 'clean_description': 'Pitched, below average insulation', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': 'below average', + 'age_band': "E" + } + + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 1.5, f"Expected 1.5, but got {u_value}" + + def test_get_roof_u_value_case_5(self): + # Test case where insulation thickness is exactly specified + inputs = { + 'original_description': 'Pitched, 100mm insulation', + 'clean_description': 'Pitched, 100mm insulation', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': '100', + 'age_band': "G" + } + + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.40, f"Expected 0.40, but got {u_value}" + + def test_get_roof_u_value_case_6(self): + # Test case for a thatched roof + inputs = { + 'original_description': 'Thatched, 75mm insulation', + 'clean_description': 'Thatched, 75mm insulation', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': False, + 'is_roof_room': False, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': True, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': '75', + 'age_band': "H" + } + + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.35, f"Expected 0.35, but got {u_value}" + + def test_get_roof_u_value_case_7(self): + # Test case where the roof has a room in it + inputs = { + 'original_description': 'Pitched, room-in-roof, 100mm insulation', + 'clean_description': 'Pitched, room-in-roof, 100mm insulation', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': True, + 'is_roof_room': True, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': False, + 'is_valid': True, + 'insulation_thickness': '100', + 'age_band': "J" + } + + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.40, f"Expected 0.40, but got {u_value}" + + def test_get_roof_u_value_case_8(self): + # Test case where there is a dwelling above the roof, U-value should be 0 + inputs = { + 'original_description': 'Pitched, 100mm insulation', + 'clean_description': 'Pitched, 100mm insulation', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, + 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, + 'is_flat': False, + 'is_thatched': False, + 'is_at_rafters': False, + 'is_assumed': False, + 'has_dwelling_above': True, + 'is_valid': True, + 'insulation_thickness': '100', + 'age_band': "J" + } + + u_value = recommendation_utils.get_roof_u_value(**inputs) + assert u_value == 0.0, f"Expected 0.0, but got {u_value}" + + @pytest.mark.parametrize( + "test_case", + wall_uvalue_test_cases + ) + def test_get_wall_uvalue(self, test_case): + expected_uvalue = test_case["uvalue"] + inputs = test_case.copy() + del inputs["uvalue"] + uvalue = recommendation_utils.get_wall_u_value(**inputs) + assert expected_uvalue == uvalue, f"Expected u value {expected_uvalue}, recieved {uvalue}" + + @pytest.mark.parametrize("test_input", floor_uvalue_test_cases) + def test_get_floor_u_value(self, test_input): + if not isinstance(test_input["expected"], float): + with pytest.raises(test_input["expected"]): + recommendation_utils.get_floor_u_value( + test_input["floor_type"], + test_input["area"], + test_input["perimeter"], + test_input["age_band"], + test_input["wall_type"], + test_input["insulation_thickness"], + ) + else: + result = recommendation_utils.get_floor_u_value( + floor_type=test_input["floor_type"], + area=test_input["area"], + perimeter=test_input["perimeter"], + age_band=test_input["age_band"], + wall_type=test_input["wall_type"], + insulation_thickness=test_input["insulation_thickness"], + ) + assert result == pytest.approx(test_input["expected"], abs=1e-2) + + # Test with wall_type not in default_wall_thickness + def test_wall_type_not_in_default_wall_thickness(self): + with pytest.raises(IndexError): + recommendation_utils.get_floor_u_value( + floor_type="solid", + area=100, + perimeter=40, + age_band="A", + wall_type="InvalidWallType", + insulation_thickness=None, + ) + + # Test with age_band not in s11 + def test_age_band_not_in_s11(self): + with pytest.raises(IndexError): + recommendation_utils.get_floor_u_value( + floor_type="solid", + area=100, + perimeter=40, + age_band="Z", + wall_type="Cavity", + insulation_thickness=None, + ) + + +def test_estimate_perimeter_regular_inputs(): + assert math.isclose( + recommendation_utils.estimate_perimeter(100, 5), 40.24922359499622, + rel_tol=1e-2 + ) + assert math.isclose( + recommendation_utils.estimate_perimeter(123, 5), 44.63854836349408, + rel_tol=1e-2 + ) + + +def test_estimate_perimeter_zero_floor_area(): + with pytest.raises(ZeroDivisionError): + recommendation_utils.estimate_perimeter(0, 5) + + with pytest.raises(ValueError): + assert recommendation_utils.estimate_perimeter(0, 0) == 0 + + +def test_estimate_perimeter_invalid_inputs(): + with pytest.raises(ValueError): + recommendation_utils.estimate_perimeter(100, 0) + with pytest.raises(ValueError): + recommendation_utils.estimate_perimeter(-100, 5) + with pytest.raises(ValueError): + recommendation_utils.estimate_perimeter(100, -5) diff --git a/recommendations/tests/test_wall_recommendations.py b/recommendations/tests/test_wall_recommendations.py index afd396e2..cf036a1c 100644 --- a/recommendations/tests/test_wall_recommendations.py +++ b/recommendations/tests/test_wall_recommendations.py @@ -1,12 +1,8 @@ import os - -import pandas as pd import pytest import pickle -import numpy as np from unittest.mock import Mock, MagicMock from recommendations.WallRecommendations import WallRecommendations -from model_data.analysis.UvalueEstimations import UvalueEstimations from backend.Property import Property from recommendations.recommendation_utils import is_diminishing_returns @@ -206,13 +202,6 @@ class TestWallRecommendations: ) as f: return pickle.load(f) - @pytest.fixture - def uvalue_estimates(self): - with open( - os.path.abspath(os.path.dirname(__file__)) + "/test_data/uvalue_estimates.pkl", "rb" - ) as f: - return pickle.load(f) - @pytest.fixture def mock_wall_rec_instance(self): # Creating a mock instance of WallRecommendations with the necessary attributes @@ -220,26 +209,22 @@ class TestWallRecommendations: property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"} # or any date you want property_mock.data = {"construction-age-band": "1950"} # or any other data that fits your tests - uvalue_estimates_mock = Mock() - mock_wall_rec_instance = WallRecommendations( - property_mock, uvalue_estimates_mock, "Decile 1", materials=wall_parts + property_mock, "Decile 1", materials=wall_parts ) return mock_wall_rec_instance - def test_init(self, input_properties, uvalue_estimates): + def test_init(self, input_properties): obj = WallRecommendations( property_instance=input_properties[0], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=wall_parts ) assert obj assert obj.property - assert obj.uvalue_estimates assert obj.total_floor_area_group_decile == "Decile 1" - def test_uvalue_0_16(self, input_properties, uvalue_estimates): + def test_uvalue_0_16(self, input_properties): """ This tests the wall description Average thermal transmittance 0.16 W/m-¦K The important data for this recommendation is: @@ -251,7 +236,6 @@ class TestWallRecommendations: input_properties[0].year_built = 2014 recommender = WallRecommendations( property_instance=input_properties[0], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=wall_parts ) @@ -260,7 +244,7 @@ class TestWallRecommendations: # This should be empty assert recommender.recommendations == [] - def test_solid_brick_no_insulation(self, input_properties, uvalue_estimates): + def test_solid_brick_no_insulation(self, input_properties): """ This tests a property with a wall description of Solid brick, as built, no insulation (assumed) The property was built in 1930, right on the threshold for when cavity walls were introduced @@ -271,10 +255,12 @@ class TestWallRecommendations: """ input_properties[1].year_built = 1930 input_properties[1].insulation_wall_area = 100 + input_properties[1].walls["clean_description"] = "Solid brick, as built, no insulation" + input_properties[1].walls["is_sandstone_or_limestone"] = False + input_properties[1].age_band = "A" recommender = WallRecommendations( property_instance=input_properties[1], - uvalue_estimates=uvalue_estimates, total_floor_area_group_decile="Decile 1", materials=wall_parts ) @@ -296,7 +282,7 @@ class TestWallRecommendations: recommender.recommendations ) - def test_solid_brick_insulation(self, input_properties, uvalue_estimates): + def test_solid_brick_insulation(self, input_properties): """ This tests a property with a wall description of Solid brick, as built, insulation (assumed) The property was built in 1991, after cavity walls were introduced @@ -311,7 +297,6 @@ class TestWallRecommendations: input_properties[6].year_built = 1991 recommender = WallRecommendations( property_instance=input_properties[6], - uvalue_estimates=uvalue_estimates.walls.to_dict("records"), total_floor_area_group_decile="Decile 1", materials=wall_parts ) @@ -390,39 +375,10 @@ class TestWallRecommendationsBase: return property_mock @pytest.fixture - def uvalue_estimations_mock(self): - uvalue_estimations_mock = MagicMock(spec=UvalueEstimations) - - uvalue_estimations_mock.walls = pd.DataFrame([ - { - 'local-authority': 'E09000012', - 'property-type': 'Bungalow', - 'walls-energy-eff': 'Very Good', - 'walls-env-eff': 'Very Good', - 'built-form': 'End-Terrace', - 'number-habitable-rooms': '', 'number-heated-rooms': '', 'total-floor-area_group': 'Decile 1', - 'median_thermal_transmittance': 0.15, 'n_samples': 1 - } - ]) - - uvalue_estimations_mock.walls_decile_data = { - 'decile_labels': ['Decile 1', 'Decile 2', 'Decile 3', 'Decile 4', 'Decile 5', 'Decile 6', 'Decile 7', - 'Decile 8', 'Decile 9', 'Decile 10'], - 'decile_boundaries': np.array([11., 49., 52., 56., 63., 70., 74., 79., - 90., 103.8, 1936.])} - - uvalue_estimations_mock.classify_decile_newvalues.return_value = ["Decile 1"] - return uvalue_estimations_mock - - @pytest.fixture - def wall_recommendations_instance(self, property_mock, uvalue_estimations_mock): + def wall_recommendations_instance(self, property_mock): wall_recommendations_instance = WallRecommendations( - property_mock, uvalue_estimations_mock, "Decile 1", materials=wall_parts + property_mock, "Decile 1", materials=wall_parts ) - wall_recommendations_instance.uvalue_estimates.walls_decile_data = { - "decile_labels": MagicMock(), - "decile_boundaries": MagicMock() - } return wall_recommendations_instance def test_ewi_valid_in_conservation_area(self, wall_recommendations_instance): @@ -443,7 +399,11 @@ class TestWallRecommendationsBase: "thermal_transmittance": None, "is_solid_brick": False, "is_cavity_wall": False, - "insulation_thickness": "none" + "insulation_thickness": "none", + "clean_description": "Solid brick, as built, no insulation", + "is_granite_or_whinstone": False, + "is_sandstone_or_limestone": False, } + wall_recommendations_instance.property.age_band = "A" with pytest.raises(NotImplementedError): wall_recommendations_instance.recommend() diff --git a/utils/s3.py b/utils/s3.py index c31f1520..8d24d6c0 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -1,6 +1,7 @@ import boto3 from io import BytesIO from botocore.exceptions import NoCredentialsError, PartialCredentialsError +import pandas as pd def read_from_s3(bucket_name, s3_file_name): @@ -63,3 +64,25 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key): # Upload the Parquet file to S3 client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue()) + + +def read_dataframe_from_s3_parquet(bucket_name, file_key): + """ + Read a pandas DataFrame from a Parquet file stored in S3. + + :param bucket_name: Name of the S3 bucket. + :param file_key: Key of the file (including directory path within the bucket). + :return: A pandas DataFrame. + """ + + # Create the boto3 client + client = boto3.client('s3') + + # Get the Parquet file from S3 + response = client.get_object(Bucket=bucket_name, Key=file_key) + + # Read the file into a pandas DataFrame + parquet_buffer = BytesIO(response['Body'].read()) + df = pd.read_parquet(parquet_buffer) + + return df