diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/Property.py b/backend/Property.py index 109f70a9..fddea1b1 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1,20 +1,15 @@ -from datetime import datetime -import re import os - -import numpy as np +from itertools import groupby import pandas as pd -from etl.epc.DataProcessor import EPCDataProcessor from etl.epc.Dataset import TrainingDataset -from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \ - BUILT_FORM_REMAP +from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from etl.solar.SolarPhotoSupply import SolarPhotoSupply from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet from etl.epc.settings import DATA_ANOMALY_MATCHES -from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP +from recommendations.rdsap_tables import FLOOR_LEVEL_MAP from recommendations.recommendation_utils import ( estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows ) @@ -172,11 +167,28 @@ class Property: previous_phase_representatives = [ r for r in property_representative_recommendations if r["phase"] in previous_phases ] + # For solid wall insulation, we will actually have 2 representative recommendations, since we consider + # both internal and external wall insulation as possible measures. We will use the representative that + # has the lowest efficiency. + # Take the representative with the lowest efficiency, by phase + + # To be safe, we sort by phase + previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase']) + + previous_phase_representatives = [ + min(group, key=lambda x: x['efficiency']) for _, group in groupby( + previous_phase_representatives, key=lambda x: x['phase'] + ) + ] + recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy() for rec in property_recommendations_by_phase: # We simulate the impact of the recommendation at this current phase, and all of the prior phases + if rec["type"] == "mechanical_ventilation": + continue + scoring_dict = self.create_recommendation_scoring_data( property_id=self.id, recommendation_record=recommendation_record, @@ -230,9 +242,10 @@ class Property: output["external_insulation"] = False output["internal_insulation"] = True + # TODO: perhaps detrimental # When making a recommendation for the wall, we will also update the ventilation - if output["mechanical_ventilation_ending"] == 'natural': - output["mechanical_ventilation_ending"] = 'mechanical, extract only' + # if output["mechanical_ventilation_ending"] == 'natural': + # output["mechanical_ventilation_ending"] = 'mechanical, extract only' else: if output["walls_thermal_transmittance_ending"] is None: @@ -328,7 +341,7 @@ class Property: output["photo_supply_ending"] = recommendation["photo_supply"] if recommendation["type"] not in [ - "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting", + "sealing_open_fireplace", "low_energy_lighting", "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index bc09f26c..bdc7c178 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -130,8 +130,14 @@ class ModelApi: ) ) - predictions_df["predictions"] = predictions_df["predictions"].astype(float).round(1) + predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1) predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True) + # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a + # string split on phase= and then grab the second element of the resulting list. We could also use a + # regular expression to do this but we use the string split method here, for safety. + predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0] + # Convert back to int + predictions_df['phase'] = predictions_df['phase'].astype(int) predictions[model_prefix] = predictions_df diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py index 6d50f0a2..352c4d8a 100644 --- a/recommendations/LightingRecommendations.py +++ b/recommendations/LightingRecommendations.py @@ -4,6 +4,9 @@ from recommendations.Costs import Costs class LightingRecommendations: + # We introduce a SAP limit to lighting, which is based on empirical findings. We do see cases where lighting is + # worth more than 2 points, but this is unlikely in the context of other upgrades that can be made to the property + SAP_LIMIT = 2 def __init__(self, property_instance: Property, materials: List): """ diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index f0cf7806..93472068 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -1,5 +1,6 @@ from backend.Property import Property from typing import List +from itertools import groupby from recommendations.FloorRecommendations import FloorRecommendations from recommendations.WallRecommendations import WallRecommendations from recommendations.RoofRecommendations import RoofRecommendations @@ -134,23 +135,32 @@ class Recommendations: has_u_value = recommendations_by_type[0].get("new_u_value") is not None has_sap_points = recommendations_by_type[0].get("sap_points") is not None - if has_u_value: - # We sort by the cost per U-value improvement - the lower the better - recommendations_by_type.sort( - key=lambda x: x["total"] / x["starting_u_value"] - x["new_u_value"] - ) - elif not has_u_value and has_sap_points: - # Sort the options by the cost per SAP point improvement - the lower the better - recommendations_by_type.sort( - key=lambda x: x["total"] / x["sap_points"] - ) - else: - # Sort the options by cost - the lower the better - recommendations_by_type.sort( - key=lambda x: x["total"] - ) + # When check if these recommendations have two different types, such as solid wall insulation + # If we have multiple types, we group by type and then select the best recommendation for each type - property_representative_recommendations.append(recommendations_by_type[0]) + recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"]) + representative_recommendations = [] + for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]): + recommendations = list(recommendations) + # We also create an efficiency key, which is used to sort the recommendations + if has_u_value: + # We sort by the cost per U-value improvement - the lower the better + for rec in recommendations: + rec["efficiency"] = rec["total"] / rec["starting_u_value"] - rec["new_u_value"] + elif not has_u_value and has_sap_points: + # Sort the options by the cost per SAP point improvement - the lower the better + for rec in recommendations: + rec["efficiency"] = rec["total"] / rec["sap_points"] + else: + # Sort the options by cost - the lower the better + for rec in recommendations: + rec["efficiency"] = rec["total"] + + recommendations.sort( + key=lambda x: x["efficiency"] + ) + representative_recommendations.append(recommendations[0]) + property_representative_recommendations.extend(representative_recommendations) return property_representative_recommendations @@ -168,7 +178,7 @@ class Recommendations: for recs in property_recommendations: for rec in recs: - rec["recommendation_id"] = idx + rec["recommendation_id"] = f"{str(idx)}_phase={str(rec['phase'])}" idx += 1 return property_recommendations @@ -198,11 +208,16 @@ class Recommendations: property_recommendations = recommendations[property_instance.id].copy() + # We calculate the impact by phase + sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index() + heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index() + carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index() + for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: - # We don't use the model for low energy lighting at the moment - if rec["type"] == "low_energy_lighting": + if rec["type"] == "mechanical_ventilation": + # We don't have a percieved sap impact of mechanical ventilation continue new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str( @@ -216,22 +231,37 @@ class Recommendations: new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str( rec["recommendation_id"] )]["predictions"].values[0] - rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"]) - if rec["type"] == "mechanical_ventilation": + if rec["phase"] == 0: + rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"]) + rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon + rec["heat_demand"] = property_instance.floor_area * ( + float(property_instance.data["energy-consumption-current"]) - new_heat_demand + ) + else: + + previous_phase = rec["phase"] - 1 + rec["sap_points"] = ( + new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0] + ) + rec["co2_equivalent_savings"] = ( + carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] - + new_carbon + ) + rec["heat_demand"] = property_instance.floor_area * ( + heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] - + new_heat_demand + ) + + if rec["type"] == "low_energy_lighting": # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2 - rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT) + rec["sap_points"] = min(rec["sap_points"], LightingRecommendations.SAP_LIMIT) # Round to 2 decimal places rec["sap_points"] = round(rec["sap_points"], 2) - rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon # Energy consumption current is per meter squared, so we need to multiply by the floor area to get # an absolute figure for the home - rec["heat_demand"] = ( - (float(property_instance.data["energy-consumption-current"]) - new_heat_demand - ) * property_instance.floor_area) - rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["heat_demand"]) if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or ( diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 7241cdec..19aad0e1 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -15,9 +15,6 @@ class VentilationRecommendations(Definitions): 'mechanical, supply and extract' ] - # We introduce a SAP limit, to prevent over-predicting the SAP impact of mechanical ventilation - SAP_LIMIT = 2 - def __init__( self, property_instance: Property, @@ -68,7 +65,7 @@ class VentilationRecommendations(Definitions): "description": f"Install {n_units} {part[0]['description']} units", "starting_u_value": None, "new_u_value": None, - "sap_points": None, + "sap_points": 0, "total": estimated_cost, # We use a very simple and rough estimate of 4 hours per unit "labour_hours": 4 * n_units,