diff --git a/backend/Property.py b/backend/Property.py index d66db529..45c7b3e5 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -721,13 +721,6 @@ class Property: ]["predictions"].values[0] ) - # heating_prediction = ( - # float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None - # else energy_consumption_client.score_new_data( - # new_data=scoring_df, target="heating_kwh" - # )[0] - # ) - hot_water_prediction = ( condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else hotwater_kwh_predictions[ @@ -735,23 +728,16 @@ class Property: ]["predictions"].values[0] ) - # hot_water_prediction = ( - # float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None - # else energy_consumption_client.score_new_data( - # new_data=scoring_df, target="hot_water_kwh" - # )[0] - # ) - # We convert the lighting cost into kwh, just using the price cap lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area) unadjusted_kwh_estimates = { - "heating": heating_prediction, - "hot_water": hot_water_prediction, - "lighting": lighting_kwh, - "appliances": appliances_kwh + "heating": float(heating_prediction), + "hot_water": float(hot_water_prediction), + "lighting": float(lighting_kwh), + "appliances": float(appliances_kwh) } adjusted_kwh_estimates = { @@ -762,10 +748,10 @@ class Property: } unadjusted_heating_costs = { - "heating": todays_heating_cost, - "hot_water": todays_hot_water_cost, - "lighting": todays_lighting_cost, - "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + "heating": float(todays_heating_cost), + "hot_water": float(todays_hot_water_cost), + "lighting": float(todays_lighting_cost), + "appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP } adjusted_heating_costs = { diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 47478b3c..05f8f88f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -326,7 +326,6 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties = [] for config in tqdm(plan_input): - # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly uprn = config.get("uprn", None) if uprn: @@ -782,7 +781,7 @@ async def trigger_plan(body: PlanTriggerRequest): predictions_dict = model_api.predict_all( df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], bucket=get_settings().DATA_BUCKET, - prediction_buckets=get_prediction_buckets() + prediction_buckets=get_prediction_buckets(), ) # Append the predictions to the predictions dictionary @@ -791,10 +790,6 @@ async def trigger_plan(body: PlanTriggerRequest): # We now produce predictions for the kwh models - # TODO!!!!! In order to score the kwh models, we need to insert the new SAP, heat demand, carbon, cost - # etc values, into the simulated EPC, otherwise it won't work. We might also want to drop all potential - # columns and env-efficiency columns (POTENTIAL COLUMNS ALREADY GONE, JUST NEED TO DROP ENV EFFICIENCY) - # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a # possibility with heating system diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index 15a52663..df95f8e2 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -131,7 +131,6 @@ def app(): sample_size = 500 energy_consumption_data = [] - cavity_walls_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 diff --git a/etl/testing_data/bills_model_testing.py b/etl/testing_data/bills_model_testing.py index 0c9bb06d..c10bbd8a 100644 --- a/etl/testing_data/bills_model_testing.py +++ b/etl/testing_data/bills_model_testing.py @@ -58,3 +58,208 @@ def app(): "budget": None, } print(body) + + +# This is some temp code, which is for diagnosing the issues with the bills models +heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet" + +# For the heating model: +heating_drop_columns = [ + "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", + "lighting_cost_ending", "hot_water_cost_ending", + # "days_to_ending", "days_to_starting", # TODO This is in the live version + 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', + 'number_heated_rooms_ending', + 'number_habitable_rooms', 'number_heated_rooms' +] + +heating_response = "heating_cost_ending" + +# for the hot water model (older dataset) +hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet" + +hot_water_drop_columns = [ + "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", + "lighting_cost_ending", "heating_cost_ending", + "days_to_starting", "days_to_ending", + 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', + 'number_heated_rooms_ending', + 'number_habitable_rooms', 'number_heated_rooms' +] + +# Diagnose heating +from utils.s3 import read_dataframe_from_s3_parquet + +train = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", + file_key=heating_training_data_filepath +) + +# Drop the columns that aren't used +train = train.drop(columns=heating_drop_columns) + +# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive) +train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"]) +change_direction = train["cost_diference"] > 0 +change_direction.value_counts(normalize=True) + +average_costs_by_time_starting = train.groupby( + ["lodgement_year_starting", "lodgement_month_starting"] +)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"]) + +average_costs_by_time_ending = train.groupby( + ["lodgement_year_ending", "lodgement_month_ending"] +)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"]) + +# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot +# water so let's look for electric homes +# Across the entire dataset, there is no correlation +# Even for electric properties, there is no correlation +photo_supply_averages = train[ + train["fuel_type_ending"] == "electricity" + ].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index() + +photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index() +photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr() +train[["total_floor_area_ending", "heating_cost_ending"]].corr() +# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly +# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating +# costs, but bigger units also have smaller photo_supply +adding_solar = train[ + (train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0) + ] +is_positive = (adding_solar["cost_diference"] > 0) +is_positive.value_counts(normalize=True) + +photo_supply_by_time = ( + train[ + train["fuel_type_ending"] == "electricity" + ].groupby( + ["lodgement_year_ending", "photo_supply_ending"] + )["heating_cost_ending"].mean().reset_index().sort_values( + ["lodgement_year_ending", "photo_supply_ending"], ascending=True) +) +# Plot +photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr() +photo_supply_by_time.plot() + +# Observations +# 1) We retain all of the potential columns, however they are just based on the starting EPC +# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority +# 3) Let's get ride of estimated perimeter starting and ending + +# Things I should check +# 1) Do we updated the lodgment_year_ending and lodgement_month_ending +# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so +# any savings should be benchmarked against what a customer is paying now +# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate +# of a panel size for the property + +# Get an example and score with the models +example = train[ + (train["photo_supply_starting"] == 0) & + (train["photo_supply_ending"] > 0) & + (train["heating_cost_starting"] > train["heating_cost_ending"]) + ].sample(1) + +# example["lodgement_month_starting"] +# example["lodgement_year_starting"] +# example["lodgement_month_ending"] +# example["lodgement_year_ending"].values[0] +# +# example["lodgement_year_ending"] = 2023 +# example["days_to_ending"] = 3500 +# example["days_to_starting"] + +# {'heating_cost_predictions': predictions +# 0 378.5} +resp = model_api.predict_all( + df=example, + bucket="retrofit-data-dev", + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_cost_predictions"], + extract_ids=False +) + +# Step 1: get a cost for today +p.create_base_difference_epc_record(cleaned) +cwi_impact = p.base_difference_record.df.copy() +for k in property_recommendations[0][0]["simulation_config"]: + cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k] + +# 2212.4 - Baseline +today = model_api.predict_all( + df=p.base_difference_record.df.copy(), + bucket="retrofit-data-dev", + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_cost_predictions"], + extract_ids=False +) + +# impact of CWI - 1908 +cwi_response = model_api.predict_all( + df=cwi_impact, + bucket="retrofit-data-dev", + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_cost_predictions"], + extract_ids=False +) + +pv_impact = cwi_impact.copy() +pv_impact["photo_supply_ending"] = 50 +pv_impact["heating_cost_starting"] = 2212.4 + +pv_response = model_api.predict_all( + df=pv_impact, + bucket="retrofit-data-dev", + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_cost_predictions"], + extract_ids=False +) + +# Testing kwh for vde +base_prediction = model_api.predict_all( + df=epcs_for_scoring, + bucket=get_settings().DATA_BUCKET, + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_kwh_predictions"], + extract_ids=False +) + +cwi_epc = epcs_for_scoring.copy() +cwi_epc["walls-description"] = "Cavity wall, filled cavity" +cwi_epc["walls-energy-eff"] = "Good" +cwi_epc["heating-cost-current"] = 1650 +cwi_epc["current-energy-efficiency"] = 72 +cwi_epc["current-energy-rating"] = "C" +cwi_epc["co2-emissions-current"] = 3.7 +cwi_epc["energy-consumption-current"] = 121 +cwi_epc["co2-emiss-curr-per-floor-area"] = 19 +cwi_epc["photo-supply"] = 0 +# cwi_epc["energy-consumption-current"] = +# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation" +# cwi_epc["roof-energy-eff"] = "Very Good" +# cwi_epc["heating-cost-current"] = 1264 + +# "heating-cost-current": rec_impact["epc_heating_cost"], +# "hot-water-cost-current": rec_impact["epc_hot_water_cost"], +# # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes +# # per year, we multiply by 1000 to get kg/m² +# "co2-emiss-curr-per-floor-area": round( +# 1000 * (rec_impact["carbon"] / self.data["total-floor-area"]) +# ), +# "co2-emissions-current": rec_impact["carbon"], +# "current-energy-rating": sap_to_epc(rec_impact["sap"]), +# "current-energy-efficiency": int(np.floor(rec_impact["sap"])), +# "energy-consumption-current": rec_impact["heat_demand"], +# "lighting-cost-current": rec_impact["epc_lighting_cost"], +# "id": "+".join([str(self.id), rec_id]) + +cwi_prediction = model_api.predict_all( + df=cwi_epc, + bucket=get_settings().DATA_BUCKET, + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_kwh_predictions"], + extract_ids=False +) +2344 - 2060 diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index 92451d76..f5394abf 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -166,6 +166,7 @@ def main(): # For each property, we download the xmls and extract the data database_data = [] for uprn, xmls in assessments_map.items(): + extracted_data = {} for xml in xmls: xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 3e7ede28..9456519a 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -160,7 +160,7 @@ class SolarPvRecommendations: if not non_invasive_recommendation["suitable"]: return - if non_invasive_recommendation: + if non_invasive_recommendation.get("array_wattage") is not None: roof_area = esimtate_pitched_roof_area( floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"] @@ -186,7 +186,7 @@ class SolarPvRecommendations: cost_result = self.costs.solar_pv( wattage=recommendation_config["array_wattage"], has_battery=has_battery, - array_cost=non_invasive_recommendation["cost"] if non_invasive_recommendation else None + array_cost=non_invasive_recommendation.get("cost", None) ) kw = np.floor(recommendation_config["array_wattage"] / 100) / 10 if has_battery: