From c95c4aeb927ec4289531a9a20ab1647a5b8f22de Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 22 May 2024 19:18:48 +0100 Subject: [PATCH] placeholder - setting as_built to false when scoring wall insulation --- backend/Property.py | 7 +- backend/app/plan/router.py | 148 +++++++++++++++++++++++-- backend/ml_models/AnnualBillSavings.py | 9 +- backend/ml_models/api.py | 13 ++- etl/customers/eon/pilot_asset_list.py | 15 ++- recommendations/RoofRecommendations.py | 3 + 6 files changed, 177 insertions(+), 18 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 94861a3f..a1972b5b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -379,6 +379,9 @@ class Property: if output["walls_energy_eff_ending"] not in ["Good", "Very Good"]: output["walls_energy_eff_ending"] = "Good" + # TODO TEMP - should be ending? + output["is_as_built"] = False + # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should # test the impact of using these booleans if recommendation["type"] == "external_wall_insulation": @@ -450,14 +453,14 @@ class Property: 400, ] - proposed_depth = int(parts[0]["depth"]) + proposed_depth = recommendation["new_thickness"] if proposed_depth not in valid_numeric_values: # Take the nearest value for scoring proposed_depth = min( valid_numeric_values, key=lambda x: abs(x - proposed_depth) ) - output["roof_insulation_thickness_ending"] = str(proposed_depth) + output["roof_insulation_thickness_ending"] = str(int(proposed_depth)) if recommendation["type"] == "loft_insulation": if proposed_depth >= 270: output["roof_energy_eff_ending"] = "Very Good" diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 1bbd6a7d..c6a26fcf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -641,14 +641,10 @@ async def build_mds(body: PlanTriggerRequest): input_properties = [] for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)): # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly - - if config["address"] in ["Flat Over 20 Holborough Road", "Flat above 7 Malling Road"]: - print("TEMP - not using uprn") - uprn = None - else: - uprn = config.get("uprn", None) - if uprn: - uprn = int(float(uprn)) + uprn = config.get("uprn", None) + uprn = None if uprn == "" else uprn + if uprn: + uprn = int(float(uprn)) epc_searcher = SearchEpc( address1=config["address"], @@ -661,6 +657,12 @@ async def build_mds(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True) + + if config["address"] == "35b High Street": + print("Performing temporary patch") + epc_searcher.newest_epc["uprn"] = 10002911892 + epc_searcher.full_sap_epc["uprn"] = 10002911892 + # Create a record in db # TODO: If we productionise the creation of this mds report, we will need to store this in the db # property_id, is_new = create_property( @@ -729,7 +731,7 @@ async def build_mds(body: PlanTriggerRequest): photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) logger.info("Getting spatial data") - for p in input_properties: + for p in tqdm(input_properties): p.get_spatial_data(uprn_filenames) logger.info("Getting components and epc recommendations") @@ -749,10 +751,134 @@ async def build_mds(body: PlanTriggerRequest): # Build the scoring data p.create_base_difference_epc_record(cleaned_lookup=cleaned) - p.simulate_all_representative_recommendations(property_representative_recommendations) + recommendations_scoring_data.append( + p.simulate_all_representative_recommendations(property_representative_recommendations) + ) - recommendations_scoring_data.extend(p.recommendations_scoring_data) + logger.info("Preparing data for scoring in sap change api") + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + recommendations_scoring_data = recommendations_scoring_data.drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) + + all_predictions = { + "sap_change_predictions": pd.DataFrame(), + "heat_demand_predictions": pd.DataFrame(), + "carbon_change_predictions": pd.DataFrame() + } + to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) + for chunk in tqdm(to_loop_over, total=len(to_loop_over)): + predictions_dict = model_api.predict_all( + df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], + bucket=get_settings().DATA_BUCKET, + prediction_buckets={ + "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, + "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, + "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET + } + ) + + # Append the predictions to the predictions dictionary + for key, scored in predictions_dict.items(): + all_predictions[key] = pd.concat([all_predictions[key], scored]) + + # We now produce a table of results for the mds report + + # TODO: TEMP + for p in plan_input: + if p["uprn"]: + p["uprn"] = str(int(float(p["uprn"]))) + + results = [] + for p in input_properties: + measures = p.measures + property_recommendations = [r['type'] for r in representative_recommendations[p.id]] + + # TODO: Check high heat retention storage heaters - looks like it's excluded controls! + + sap_prediction = all_predictions["sap_change_predictions"][ + all_predictions["sap_change_predictions"]["property_id"] == str(p.id) + ] + + heat_demand_prediction = all_predictions["heat_demand_predictions"][ + all_predictions["heat_demand_predictions"]["property_id"] == str(p.id) + ] + + carbon_prediction = all_predictions["carbon_change_predictions"][ + all_predictions["carbon_change_predictions"]["property_id"] == str(p.id) + ] + + # Get a before and after for SAP, heat demand, CO2 and also calculate energy bill and energy savings + sap_before = int(p.data["current-energy-efficiency"]) + sap_after = sap_prediction["predictions"].values[0] if measures else sap_before + + epc_before = p.data["current-energy-rating"] + epc_after = sap_to_epc(sap_after) if measures else epc_before + + heat_demand_before = p.data["energy-consumption-current"] + heat_demand_after = heat_demand_prediction["predictions"].values[0] if measures else heat_demand_before + + carbon_before = p.data["co2-emissions-current"] + carbon_after = carbon_prediction["predictions"].values[0] if measures else carbon_before + + # Estimate bill savings + + from backend.ml_models.AnnualBillSavings import AnnualBillSavings + current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( + epc_energy_consumption=heat_demand_before * p.floor_area, + current_epc_rating=epc_before, + ) + + # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are + # actually implemented + expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( + epc_energy_consumption=heat_demand_after * p.floor_area, + current_epc_rating=epc_before, + ) + + # TODO: We should determine if the home is gas & electricity or just electricity + current_energy_bill = AnnualBillSavings.calculate_annual_bill( + current_adjusted_energy, + ) + expected_energy_bill = AnnualBillSavings.calculate_annual_bill( + expected_adjusted_energy, + ) + + bill_savings = current_energy_bill - expected_energy_bill + energy_savings = current_adjusted_energy - expected_adjusted_energy + + config = [c for c in plan_input if c["uprn"] == str(p.uprn)] + if not config: + config = {"address": None, "postcode": None} + else: + config = config[0] + + to_append = { + "config_address": config["address"], + "config_postcode": config["postcode"], + "address": p.address, + "postcode": p.postcode, + "measures": measures, + "property_recommendations": property_recommendations, + "year_of_epc": p.data['lodgement-date'], + "sap_before": sap_before, + "sap_after": sap_after, + "epc_before": epc_before, + "epc_after": epc_after, + "heat_demand_before": heat_demand_before, + "heat_demand_after": heat_demand_after, + "carbon_before": carbon_before, + "carbon_after": carbon_after, + "bill_savings": bill_savings, + "energy_savings": energy_savings, + } + results.append(to_append) + + results = pd.DataFrame(results) except IntegrityError: diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 99d67126..b92077e4 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -43,15 +43,20 @@ class AnnualBillSavings: return cls.ELECTRICITY_PRICE_CAP * kwh @classmethod - def calculate_annual_bill(cls, kwh): + def calculate_annual_bill(cls, kwh, mains_gas=True): """ This method will estimate the total annual bill for a property It assumed gas & electricity are used :param kwh: The total kwh consumption + :param mains_gas: Whether the property uses mains gas :return: An estimate for annual bill """ - return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365) + if mains_gas: + return cls.PRICE_FACTOR * kwh + ( + cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365) + + return cls.ELECTRICITY_PRICE_CAP * kwh + (cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365) @classmethod def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating): diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index bdc7c178..a2024dd7 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -99,6 +99,13 @@ class ModelApi: # depending on how you want to handle errors in your application return None + @staticmethod + def extract_phase(recommendation_id): + if 'phase=' in recommendation_id: + return int(recommendation_id.split('phase=')[1][0]) + else: + return None + def predict_all(self, df, bucket, prediction_buckets) -> dict: """ @@ -135,9 +142,11 @@ class ModelApi: # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a # string split on phase= and then grab the second element of the resulting list. We could also use a # regular expression to do this but we use the string split method here, for safety. - predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0] + # We may not always have a phase to split on, so we need to handle this case. We can do this by using the + # str[1] method to grab the second element of the resulting list. We then grab the first character of this + # string to get the phase. We then convert this to an integer. # Convert back to int - predictions_df['phase'] = predictions_df['phase'].astype(int) + predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase) predictions[model_prefix] = predictions_df diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py index 5e6efbba..4f79e05e 100644 --- a/etl/customers/eon/pilot_asset_list.py +++ b/etl/customers/eon/pilot_asset_list.py @@ -211,6 +211,7 @@ def app(): # We now determine which measures we need for each property finalised_asset_list = [] for i, config in raw_asset_list.iterrows(): + asset_config = asset_list_with_uprn_df[ (asset_list_with_uprn_df["address"] == config["Address"]) & (asset_list_with_uprn_df["postcode"] == config["Postcode"]) @@ -223,11 +224,23 @@ def app(): # Get the property type pt = parse_property_type(config) + if config["Address"] in [ + "28 Hermitage Lane", + "35a High Street", + "35b High Street", + "Flat Over 20 Holborough Road", + "Flat above 7 Malling Road" + ]: + print(config["Address"]) + uprn = None + else: + uprn = asset_config["uprn"].values[0] + finalised_asset_list.append( { "address": config["Address"], "postcode": config["Postcode"], - "uprn": asset_config["uprn"].values[0], + "uprn": uprn, "n_bedrooms": config["No Bedrooms"], "measures": measures, **pt diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 5424ab57..538d90e4 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -230,6 +230,7 @@ class RoofRecommendations: already_installed = "loft_insulation" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) + new_thickness = insulation_thickness + material["depth"] elif material["type"] == "flat_roof_insulation": cost_result = self.costs.flat_roof_insulation( floor_area=self.property.insulation_floor_area, @@ -239,6 +240,7 @@ class RoofRecommendations: already_installed = "flat_roof_insulation" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) + new_thickness = None else: raise ValueError("Invalid material type") @@ -259,6 +261,7 @@ class RoofRecommendations: "new_u_value": new_u_value, "sap_points": None, "already_installed": already_installed, + "new_thickness": new_thickness, **cost_result } )