From ad3ba924754b33c2a509c4aa54550cf156a823a0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 17 Jan 2025 18:53:04 +0000 Subject: [PATCH] fixing route march data pull --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/SearchEpc.py | 13 +- backend/app/plan/router.py | 58 +++--- backend/ml_models/AnnualBillSavings.py | 8 +- etl/customers/cambridge/remote_assessment.py | 8 +- etl/customers/l_and_g/ic_asset_list.py | 166 ++++++++++++++++++ etl/find_my_epc/RetrieveFindMyEpc.py | 8 +- etl/route_march_data_pull/app.py | 18 +- recommendations/Recommendations.py | 13 ++ recommendations/RoofRecommendations.py | 4 + .../optimiser/optimiser_functions.py | 2 +- 12 files changed, 254 insertions(+), 48 deletions(-) create mode 100644 etl/customers/l_and_g/ic_asset_list.py diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 8ec4fdbe..d916f82f 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -693,9 +693,20 @@ class SearchEpc: estimated_epc[variable] = str(int(estimated_epc[variable])) # This is a string - estimated_epc["low-energy-fixed-light-count"] = str(estimated_epc["low-energy-fixed-light-count"]) + estimated_epc["low-energy-fixed-light-count"] = ( + str(estimated_epc["low-energy-fixed-light-count"]) if estimated_epc["low-energy-fixed-light-count"] else "" + ) + # This is an int + estimated_epc["photo-supply"] = ( + int(np.round(estimated_epc["photo-supply"])) if estimated_epc["photo-supply"] else estimated_epc[ + "photo-supply"] + ) estimated_epc["postcode"] = self.postcode + if not self.uprn: + # Update self.uprn too + self.uprn = hash(self.address1 + self.postcode) + estimated_epc["uprn"] = self.uprn estimated_epc["address"] = self.full_address # Indicate that this epc was estimated diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index fb896659..1989a363 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -407,6 +407,7 @@ async def trigger_plan(body: PlanTriggerRequest): plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) # Check for duplicate UPRNS input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] + if input_uprns: # Check for dupes if len(input_uprns) != len(set(input_uprns)): @@ -680,37 +681,42 @@ async def trigger_plan(body: PlanTriggerRequest): input_measures = prepare_input_measures(recommendations[p.id], body.goal) - current_sap_points = int(p.data["current-energy-efficiency"]) - target_sap_points = epc_to_sap_lower_bound(body.goal_value) - sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points) - - if not body.optimise: - if body.goal != "Increasing EPC": - raise NotImplementedError("Only EPC optimisation is currently supported") - solution = [] - for sub_list in input_measures: - # Select the entry with the highest gain, and if tied, choose the one with the lowest cost - best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost'])) - solution.append(best_measure) + if not input_measures[0]: + # This means that we have no defaults + selected_recommendations = {} else: - if body.budget: - optimiser = GainOptimiser( - input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0 - ) + current_sap_points = int(p.data["current-energy-efficiency"]) + target_sap_points = epc_to_sap_lower_bound(body.goal_value) + sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points) + + if not body.optimise: + if body.goal != "Increasing EPC": + raise NotImplementedError("Only EPC optimisation is currently supported") + solution = [] + for sub_list in input_measures: + # Select the entry with the highest gain, and if tied, choose the one with the lowest cost + best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost'])) + solution.append(best_measure) else: - # The minimum gain is the minimum number of SAP points required to get to the target SAP band - # If the gain is negative, the optimiser will return an empty solution - optimiser = CostOptimiser( - input_measures, - min_gain=sap_gain - ) - optimiser.setup() - optimiser.solve() - solution = optimiser.solution + if body.budget: + optimiser = GainOptimiser( + input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0 + ) + else: + # The minimum gain is the minimum number of SAP points required to get to the target SAP band + # If the gain is negative, the optimiser will return an empty solution + optimiser = CostOptimiser( + input_measures, + min_gain=sap_gain + ) - selected_recommendations = {r["id"] for r in solution} + optimiser.setup() + optimiser.solve() + solution = optimiser.solution + + selected_recommendations = {r["id"] for r in solution} # If wall insulation is selected, we also include mechanical ventilation as a best practice measure if any(x in [r["type"] for r in solution] for x in [ diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 211e5ea6..b22837d8 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -28,8 +28,8 @@ class AnnualBillSavings: # Latest price cap figures from Ofgem are for April 2024 # https://www.ofgem.gov.uk/energy-price-cap - ELECTRICITY_PRICE_CAP = 0.2236 - GAS_PRICE_CAP = 0.0548 + ELECTRICITY_PRICE_CAP = 0.2486 + GAS_PRICE_CAP = 0.0634 # This is the most recent export payment figure, at 9.28p/kWh # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates @@ -39,8 +39,8 @@ class AnnualBillSavings: PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE_GAS = 0.3143 - DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601 + DAILY_STANDARD_CHARGE_GAS = 0.3165 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097 # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison # For July 2024. These quotes are based on the east midlands region, so we diff --git a/etl/customers/cambridge/remote_assessment.py b/etl/customers/cambridge/remote_assessment.py index 3f152e79..dc5beff5 100644 --- a/etl/customers/cambridge/remote_assessment.py +++ b/etl/customers/cambridge/remote_assessment.py @@ -21,10 +21,10 @@ def app(): "property_type": "House", "built-form": "Semi-Detached" }, { - "address": "21 High Street", "postcode": "CB23 8AB", "uprn": 100090136026 + "address": "21 High Street", "postcode": "CB23 8AB", "uprn": 100090144815 }, { - "address": "22 High Street", "postcode": "CB23 8AB", "uprn": 100090136027 + "address": "22 High Street", "postcode": "CB23 8AB", "uprn": 100090144816 }, { "address": "5 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078615 @@ -52,8 +52,8 @@ def app(): valuations_data = [ {'uprn': 100090136018, "valuation": 586_000}, - {'uprn': 100090136026, "valuation": 551_000}, - {'uprn': 100090136027, "valuation": 844_000}, + {'uprn': 100090144815, "valuation": 446_000}, + {'uprn': 100090144816, "valuation": 448_000}, {'uprn': 10008078615, "valuation": 763_000}, {'uprn': 10008078616, "valuation": 616_000}, {'uprn': 10008078617, "valuation": 593_000}, diff --git a/etl/customers/l_and_g/ic_asset_list.py b/etl/customers/l_and_g/ic_asset_list.py new file mode 100644 index 00000000..d0966bdf --- /dev/null +++ b/etl/customers/l_and_g/ic_asset_list.py @@ -0,0 +1,166 @@ +""" +This script prepares the asset list for modelling the properties from the L&Q dataset, for their January IC +""" + +import pandas as pd +import numpy as np + +from etl.route_march_data_pull.app import get_data +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 124 +USER_ID = 8 + + +def app(): + asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon information for Domna/Basildon MDS v1.4 " + "(1).xlsx", + sheet_name="Basildon", + header=5 + ) + + asset_data = asset_data.head(-3) + + asset_data["address1"] = np.where( + pd.isnull(asset_data["Address 1"]), + asset_data["Address 2"], + asset_data["Address 1"] + ) + + asset_data["full_address"] = np.where( + pd.isnull(asset_data["Address 1"]), + asset_data["Address 2"] + ", " + asset_data["Address 3"], + asset_data["Address 1"] + ", " + asset_data["Address 2"] + ", " + asset_data["Address 3"], + ) + + asset_list = asset_data[["address1", "PostCode", "full_address", "Bedrooms"]] + + asset_list = asset_list.reset_index(drop=True) + + asset_list["row_id"] = asset_list.index + + # L&G's focus: + # Measures: loft and cavity insulation, replacement thermally efficient windows, PV cells, AS heat pumps. + + epc_data, errors, no_epc = get_data( + asset_list=asset_list, + fulladdress_column="full_address", + address1_column="address1", + postcode_column="PostCode", + manual_uprn_map={} + ) + + missed = asset_list[ + asset_list["row_id"].isin(no_epc) + ] + + # We merge on the property types, where we have them + missed = missed.merge( + asset_data[["address1", "PostCode", "Property Type"]], + how="left", + on=["address1", "PostCode"] + ) + # Remap Block: Residential to Flat + missed["Property Type"] = np.where( + missed["Property Type"] == "Block: Residential", + "Flat", + missed["Property Type"] + ) + + # We create the asset list - we have some properties that genuninely never had an EPC + + epc_df = pd.DataFrame(epc_data) + fetched_asset_list = epc_df[["address1", "postcode", "uprn", "row_id"]] + fetched_asset_list = fetched_asset_list.merge( + asset_list[["row_id", "Bedrooms"]], + how="left", + on=["row_id"] + ) + + missed = missed.rename(columns={"PostCode": "postcode"}).drop(columns=["row_id"]) + + # missed.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs.csv") + missed_uprns = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs_uprn.csv", + ) + + missed = missed.merge( + missed_uprns[["address1", "postcode", "UPRN"]].rename( + columns={"UPRN": "uprn"}, + ), + how="left", + on=["address1", "postcode"] + ) + + fetched_asset_list = fetched_asset_list.drop(columns=["row_id"]) + # We concatename them + final_asset_list = pd.concat( + [fetched_asset_list, missed[["address1", "postcode", "Property Type", "Bedrooms", "uprn"]]] + ) + + final_asset_list = final_asset_list.rename( + columns={ + "address1": "address", + "Property Type": "property_type", + "Bedrooms": "n_bedrooms" + } + ) + + # Finally, we merge on the numeber of bedrooms + + # Extract the non-invasive recommendations: + non_invasive_recommendations = [] + for x in epc_data: + non_invasive_recommendations.append( + { + "uprn": x["uprn"], + "recommendations": x["find_my_epc_data"]["recommendations"] + } + ) + + filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(final_asset_list), + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store the non-invasive recommendations in s3 + non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + + # Store the valuations data in s3 + # valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv" + # save_csv_to_s3( + # dataframe=pd.DataFrame(valuations_data), + # bucket_name="retrofit-plan-inputs-dev", + # file_name=valuations_filename + # ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "A", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "Retrofit Packages", + "multi_plan": True, + "budget": None, + "inclusions": [ + "cavity_wall_insulation", + "loft_insulation", + "windows", + "solar_pv", + "air_source_heat_pump" + ] + } + print(body) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 5ea35a64..cd960151 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -300,6 +300,8 @@ class RetrieveFindMyEpc: "Fan assisted storage heaters": [], "Fan-assisted storage heaters": [], "Step 1:": [], + "Step 2:": [], + 'Step 3:': [], "Biomass stove with boiler": [], "Replace boiler with biomass boiler": [], "Heating controls (room thermostat and thermostatic radiator valves)": [ @@ -308,7 +310,11 @@ class RetrieveFindMyEpc: "Heating controls (programmer, and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], - "Replacement warm air unit": [] + "Heating controls (programmer and TRVs)": [ + "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + "Replacement warm air unit": [], + "Secondary glazing": ["secondary_glazing"] } survey = True diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index 9ed55185..f2889975 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -1,6 +1,5 @@ import os import time -from idlelib.iomenu import errors import pandas as pd import numpy as np @@ -25,7 +24,6 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m epc_data = [] errors = [] no_epc = [] - # home = asset_list[asset_list["row_id"] == errors[5]].squeeze() for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): try: postcode = home[postcode_column] @@ -154,13 +152,13 @@ def app(): Property UPRN """ - DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Watford" - DATA_FILENAME = "JS Mailing List 10122024.xlsx" - SHEET_NAME = "Export" + DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches" + DATA_FILENAME = "Full Below SAP C Stock - RN Copy.xlsx" + SHEET_NAME = "Electric Properties" POSTCODE_COLUMN = "Postcode" - FULLADDRESS_COLUMN = "Property Address" - ADDRESS1_COLUMN = "Address Line 1" - ADDRESS1_METHOD = None + FULLADDRESS_COLUMN = "Address" + ADDRESS1_COLUMN = None + ADDRESS1_METHOD = "first_two_words" ADDRESS_COLS_TO_CONCAT = [] # Maps addresses to uprn in problematic cases @@ -372,7 +370,9 @@ def app(): how="left", on="row_id" ) - asset_list = asset_list.drop(columns=["row_id"]) + asset_list = asset_list.drop(columns=["row_id", "index"]) + + asset_list[asset_list["Assessor’s name"] == "Robin Bailey"]["Assessor's Email"].value_counts() # Store as an excel filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull - Main.xlsx" diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 189581d8..c34ff92b 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -28,6 +28,9 @@ class Recommendations: High level recommendations class, which sits above the measure specific recommendation classes """ + # Constant for carbon intensity calculation, as of 16th Jan 2025 + CARBON_INTENSITY = 0.232 + def __init__( self, property_instance: Property, @@ -691,6 +694,10 @@ class Recommendations: """ This method inserts the kwh savings and the bill savings that the customer will make from the recommendations based on the predictions from the ML model + + It also ensures we base our solar savings and solar carbon savings from the calculations based on + the solar API and size of the array, instead of ML model + :param property_instance: Instance of the Property class, for the home associated to property_id :param kwh_simulation_predictions: dictionary of predictions from the model apis :param property_recommendations: dictionary of recommendations for the property @@ -824,6 +831,12 @@ class Recommendations: if rec["type"] == "solar_pv": rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0] + + # Calculate carbon savings from this + emissions_kg = rec["kwh_savings"] * cls.CARBON_INTENSITY # Calculate emissions in kg + emissions_tonnes = emissions_kg / 1000 + + rec["co2_equivalent_savings"] = emissions_tonnes rec["energy_cost_savings"] = ( rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP ) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 6778e886..b7e34406 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -138,6 +138,10 @@ class RoofRecommendations: u_value = self.property.roof["thermal_transmittance"] + # If we have a flat roof but we don't have flat roof as a measure, we exit + if self.property.roof["is_flat"] and "flat_roof_insulation" not in measures: + return + # We check if the roof is already insulated and if so, we exit # Building regulations part L recommend installing at least 270mm of insulation, however generally we diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index c1123e3d..223b1f82 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -31,7 +31,7 @@ def prepare_input_measures(property_recommendations, goal): "gain": rec[goal_key], "type": rec["type"] } - for rec in recs + for rec in recs if rec["energy_cost_savings"] >= 0 ] )