From c400a67bf6b7cd72af13785a28361ebb732535aa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 06:47:57 +0000 Subject: [PATCH] debugging wood chips fuel types --- backend/app/assumptions.py | 2 ++ etl/epc/Dataset.py | 53 +++++++++++++++--------------- etl/epc/Record.py | 4 ++- recommendations/Recommendations.py | 2 +- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index bc8f9cc6..492b9042 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -86,6 +86,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, + "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 62825cc5..3fa84136 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -212,11 +212,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -394,12 +394,13 @@ class TrainingDataset(BaseDataset): axis=1, ) - roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna( - roof_starting_uvalue - ) - roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna( - roof_ending_uvalue - ) + roof_starting_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance"], errors="coerce" + ).fillna(roof_starting_uvalue) + + roof_ending_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance_ending"], errors="coerce" + ).fillna(roof_ending_uvalue) # ~~~~~~~~~~~~~~~~~~ # Floor @@ -459,20 +460,20 @@ class TrainingDataset(BaseDataset): axis=1, ) - floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna( - floor_starting_uvalue - ) - floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna( - floor_ending_uvalue - ) + floor_starting_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance"], errors="coerce" + ).fillna(floor_starting_uvalue) + floor_ending_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance_ending"], errors="coerce" + ).fillna(floor_ending_uvalue) for component in ["walls", "roof", "floor"]: - self.df[f"{component}_thermal_transmittance"] = self.df[ - f"{component}_thermal_transmittance" - ].fillna(eval(f"{component}_starting_uvalue")) - self.df[f"{component}_thermal_transmittance_ending"] = self.df[ - f"{component}_thermal_transmittance_ending" - ].fillna(eval(f"{component}_ending_uvalue")) + self.df[f"{component}_thermal_transmittance"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance"], errors="coerce" + ).fillna(eval(f"{component}_starting_uvalue")) + self.df[f"{component}_thermal_transmittance_ending"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance_ending"], errors="coerce" + ).fillna(eval(f"{component}_ending_uvalue")) self.df = self.df.drop( columns=[ @@ -521,7 +522,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -538,7 +539,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -551,7 +552,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df diff --git a/etl/epc/Record.py b/etl/epc/Record.py index ec4dad96..c1c3ff67 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -38,6 +38,8 @@ DATA_BUCKET = os.environ.get( "DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None ) +pd.set_option("future.no_silent_downcasting", True) + @dataclass class EPCRecord: @@ -392,7 +394,7 @@ class EPCRecord: floor_height_data = self.cleaning_data[ (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) - ] + ] average = floor_height_data["floor_height"].mean() sd = floor_height_data["floor_height"].std() # If we're in the top 0.5 percentile of floor heights, we'll set it to the average diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 20f0c760..5d97a554 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -744,7 +744,7 @@ class Recommendations: # fairly regularly. A task has been added to planner to refactor this # We have observed an edge case where the fuel is described as not being community # but the hot water is. We handle as such - logger.warning("Hot water description not mapped: %s", heating_description) + logger.warning("Hot water description not mapped: %s", hotwater_description) mapped_hotwater = {"fuel": 'Unmapped', "cop": 0.9} return {