From 891545804e855c9262045711647ee20be0a53853 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 6 Aug 2024 17:04:58 +0100 Subject: [PATCH] simplified extraction of costs and kwh predictions --- backend/Property.py | 166 +++++++++------------ backend/app/config.py | 6 +- backend/app/plan/router.py | 38 +++-- backend/ml_models/api.py | 34 +++-- etl/bill_savings/EnergyConsumptionModel.py | 11 +- etl/customers/newhaven/newhaven_study.py | 49 +++++- recommendations/WallRecommendations.py | 4 + 7 files changed, 174 insertions(+), 134 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 1e241b04..600e9b03 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -235,12 +235,9 @@ class Property: def parse_kwargs(self, kwargs): # We extract the elements from kwargs that we recognise. Anything additional is ignored - self.n_bathrooms = kwargs.get("n_bathrooms", None) - self.n_bedrooms = kwargs.get("n_bedrooms", None) - self.building_id = kwargs.get("building_id", None) - self.number_of_floors = kwargs.get("number_of_floors", None) - self.insulation_floor_area = kwargs.get("insulation_floor_area", None) - self.insulation_wall_area = kwargs.get("insulation_wall_area", None) + for arg, val in kwargs.items(): + if val is not None: + setattr(self, arg, val) def create_base_difference_epc_record(self, cleaned_lookup: dict): """ @@ -574,7 +571,8 @@ class Property: def get_components( self, cleaned, - energy_consumption_client + energy_consumption_client, + kwh_predictions ): """ Given the cleaning that has been performed, we'll use this to identify the property @@ -582,6 +580,7 @@ class Property: :param cleaned: This is the dictionary of components found in cleaner.cleaned :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current energy annual consumption in kWh + :param kwh_predictions: Contains the kwh predictions for heating and hot water :return: """ @@ -646,7 +645,7 @@ class Property: self.set_windows_count() self.set_energy_source() self.find_energy_sources() - self.set_current_energy_bill(energy_consumption_client) + self.set_current_energy_bill(energy_consumption_client, kwh_predictions) def set_solar_panel_configuration( self, solar_panel_configuration, roof_area @@ -659,7 +658,7 @@ class Property: # We also set the roof area self.roof_area = roof_area - def set_current_energy_bill(self, energy_consumption_client): + def set_current_energy_bill(self, energy_consumption_client, kwh_predictions): """ Given what we know about the property now, estimates the current energy consumption using the UCL paper https://www.sciencedirect.com/science/article/pii/S0378778823002542 @@ -687,97 +686,86 @@ class Property: # If we have the kwh figures, we don't need to predict them condition_data = self.energy_assessment_condition_data.copy() - scoring_df = pd.DataFrame([self.epc_record.prepared_epc]) - # Change columns from underscores to hyphens - scoring_df.columns = [ - x.lower().replace("_", "-") for x in scoring_df.columns - ] - for col in ["heating_kwh", "hot_water_kwh"]: - scoring_df[col] = None - - energy_consumption_client.data = None + # scoring_df = pd.DataFrame([self.epc_record.prepared_epc]) + # # Change columns from underscores to hyphens + # scoring_df.columns = [ + # x.lower().replace("_", "-") for x in scoring_df.columns + # ] + # for col in ["heating_kwh", "hot_water_kwh"]: + # scoring_df[col] = None + # + # energy_consumption_client.data = None + heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"] + hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"] heating_prediction = ( - float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None - else energy_consumption_client.score_new_data( - new_data=scoring_df, target="heating_kwh" - )[0] + condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else + heating_kwh_predictions[ + heating_kwh_predictions["id"].astype(int) == self.uprn + ]["predictions"].values[0] ) + # heating_prediction = ( + # float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None + # else energy_consumption_client.score_new_data( + # new_data=scoring_df, target="heating_kwh" + # )[0] + # ) + hot_water_prediction = ( - float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None - else energy_consumption_client.score_new_data( - new_data=scoring_df, target="hot_water_kwh" - )[0] + condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else + hotwater_kwh_predictions[ + hotwater_kwh_predictions["id"].astype(int) == self.uprn + ]["predictions"].values[0] ) + # hot_water_prediction = ( + # float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None + # else energy_consumption_client.score_new_data( + # new_data=scoring_df, target="hot_water_kwh" + # )[0] + # ) + # We convert the lighting cost into kwh, just using the price cap - lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP + lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area) - adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=heating_prediction, - current_epc_rating=self.data["current-energy-rating"], - ) + unadjusted_kwh_estimates = { + "heating": heating_prediction, + "hot_water": hot_water_prediction, + "lighting": lighting_kwh, + "appliances": appliances_kwh + } - adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=hot_water_prediction, - current_epc_rating=self.data["current-energy-rating"], - ) + adjusted_kwh_estimates = { + k: AnnualBillSavings.adjust_energy_to_metered( + epc_energy=v, + current_epc_rating=self.data["current-energy-rating"], + ) for k, v in unadjusted_kwh_estimates.items() + } - adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=lighting_kwh, - current_epc_rating=self.data["current-energy-rating"], - ) + unadjusted_heating_costs = { + "heating": todays_heating_cost, + "hot_water": todays_hot_water_cost, + "lighting": todays_lighting_cost, + "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + } - adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=appliances_kwh, - current_epc_rating=self.data["current-energy-rating"], - ) - - # Adjust today's cost figures with the UCL model - adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=todays_heating_cost, - current_epc_rating=self.data["current-energy-rating"], - ) - - adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=todays_hot_water_cost, - current_epc_rating=self.data["current-energy-rating"], - ) - - adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=todays_lighting_cost, - current_epc_rating=self.data["current-energy-rating"], - ) - - adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered( - epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP, - current_epc_rating=self.data["current-energy-rating"], - ) + adjusted_heating_costs = { + k: AnnualBillSavings.adjust_energy_to_metered( + epc_energy=v, + current_epc_rating=self.data["current-energy-rating"], + ) for k, v in unadjusted_heating_costs.items() + } # Sum up the adjusted kwh figures - self.current_adjusted_energy = ( - adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh - ) - self.current_energy_bill = ( - adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost - ) + self.current_adjusted_energy = sum(list(adjusted_kwh_estimates.values())) + self.current_energy_bill = sum(list(adjusted_heating_costs.values())) self.energy_cost_estimates = { - "adjusted": { - "heating": adjusted_heating_cost, - "hot_water": adjusted_hot_water_cost, - "lighting": adjusted_lighting_cost, - "appliances": adjusted_appliances_cost - }, - "unadjusted": { - "heating": todays_heating_cost, - "hot_water": todays_hot_water_cost, - "lighting": todays_lighting_cost, - "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP - }, + "adjusted": adjusted_heating_costs, + "unadjusted": unadjusted_heating_costs, "epc": { "heating": float(self.data["heating-cost-current"]), "hot_water": float(self.data["hot-water-cost-current"]), @@ -786,18 +774,8 @@ class Property: } self.energy_consumption_estimates = { - "adjusted": { - "heating": adjusted_heating_kwh, - "hot_water": adjusted_hot_water_kwh, - "lighting": adjusted_lighting_kwh, - "appliances": adjusted_applicances_kwh - }, - "unadjusted": { - "heating": heating_prediction, - "hot_water": hot_water_prediction, - "lighting": lighting_kwh, - "appliances": appliances_kwh - } + "adjusted": adjusted_kwh_estimates, + "unadjusted": unadjusted_kwh_estimates } def set_spatial(self, spatial: pd.DataFrame): diff --git a/backend/app/config.py b/backend/app/config.py index f80da387..b5ea72fe 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -30,6 +30,8 @@ class Settings(BaseSettings): LIGHTING_COST_PREDICTIONS_BUCKET: str HEATING_COST_PREDICTIONS_BUCKET: str HOT_WATER_COST_PREDICTIONS_BUCKET: str + HEATING_KWH_PREDICTIONS_BUCKET: str + HOTWATER_KWH_PREDICTIONS_BUCKET: str class Config: env_file = "backend/.env" @@ -48,5 +50,7 @@ def get_prediction_buckets(): "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET, "lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET, "heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET, - "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET + "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET, + "heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET, + "hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET, } diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d4b2a9a5..b4d5c774 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -432,24 +432,20 @@ async def trigger_plan(body: PlanTriggerRequest): environment=get_settings().ENVIRONMENT ) - epcs_for_scoring = pd.DataFrame([energy_consumption_client.prepare_new_data(p) for p in input_properties]) - # What do we need? - # We need an estimate of each properties energy consumption now, as well as the cost of heating and hot water - # The newest EPC may have been done quite some time ago, and so we should take this into consideration when - # producing the estimate for cost. With that said, we already have a methodology which will re-map the cost - # when the EPC was produced to a cost for today, however could we use the ML models. - # In theory, we could just score the kwh models via the API, pass the results into the get_components function - # and insert the kwh figures into the property and we're done - # TODO: Need to check if we need to re-map when scoring new data or not + model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - # We need to prepare the EPC so it's in the same format as the training data - # TODO: DELETE ME - # from utils.s3 import read_dataframe_from_s3_parquet - # train = read_dataframe_from_s3_parquet( - # bucket_name="retrofit-data-dev", - # file_key="energy_consumption/2024-07-08/energy_consumption_dataset.parquet" - # ) - # We need to prepare the EPC so it's in the same format as the training data + epcs_for_scoring = energy_consumption_client.prepare_new_data(input_properties) + + # prepare the data + + # TODO: Some junk is being returned by the heating kwh model! + kwh_predictions = model_api.predict_all( + df=epcs_for_scoring, + bucket=get_settings().DATA_BUCKET, + prediction_buckets=get_prediction_buckets(), + model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"], + extract_ids=False + ) # TODO: Move this/tidy it up uprn_map = {} @@ -478,7 +474,11 @@ async def trigger_plan(body: PlanTriggerRequest): for p in tqdm(input_properties): if p.spatial is None: raise Exception("Missed setting of spatial data for a property") - p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client) + p.get_components( + cleaned=cleaned, + # energy_consumption_client=energy_consumption_client # TODO: Full remove me + kwh_predictions=kwh_predictions + ) logger.info("Performing solar analysis") # TODO: Tidy this up @@ -663,8 +663,6 @@ async def trigger_plan(body: PlanTriggerRequest): "carbon_ending"] ) - model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - all_predictions = model_api.predictions_template() to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) for chunk in tqdm(to_loop_over, total=len(to_loop_over)): diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 4844d7fd..e4a0715f 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -15,6 +15,8 @@ class ModelApi: "lighting_cost_predictions", "heating_cost_predictions", "hot_water_cost_predictions", + "hotwater_kwh_predictions", + "heating_kwh_predictions", ] MODEL_URLS = { @@ -24,6 +26,8 @@ class ModelApi: "lighting_cost_predictions": "lightingmodel", "heating_cost_predictions": "heatingmodel", "hot_water_cost_predictions": "hotwatermodel", + "hotwater_kwh_predictions": "hotwaterkwhmodel", + "heating_kwh_predictions": "heatingkwhmodel", } def __init__( @@ -123,7 +127,7 @@ class ModelApi: else: return None - def predict_all(self, df, bucket, prediction_buckets) -> dict: + def predict_all(self, df, bucket, prediction_buckets, model_prefixes=None, extract_ids=True) -> dict: """ For each model prefix, this method will upload the scoring data to s3 and then make a request to the @@ -133,11 +137,17 @@ class ModelApi: :param df: Pandas dataframe with scoring data to be uploaded to s3 :param bucket: Name of the bucket in s3 to upload to :param prediction_buckets: Dictionary containing the prediction buckets for each model prefix + :param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be + used + :param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the + id column :return: """ + model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes + predictions = {} - for model_prefix in self.MODEL_PREFIXES: + for model_prefix in model_prefixes: logger.info(f"Scoring for model prefix: {model_prefix}") file_location = self.upload_scoring_data(df, bucket, model_prefix) response = self.predict( @@ -155,15 +165,17 @@ class ModelApi: ) predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1) - predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True) - # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a - # string split on phase= and then grab the second element of the resulting list. We could also use a - # regular expression to do this but we use the string split method here, for safety. - # We may not always have a phase to split on, so we need to handle this case. We can do this by using the - # str[1] method to grab the second element of the resulting list. We then grab the first character of this - # string to get the phase. We then convert this to an integer. - # Convert back to int - predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase) + if extract_ids: + predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True) + # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a + # string split on phase= and then grab the second element of the resulting list. We could also use a + # regular expression to do this but we use the string split method here, for safety. + # We may not always have a phase to split on, so we need to handle this case. We can do this by using + # the str[1] method to grab the second element of the resulting list. We then grab the first + # character of this + # string to get the phase. We then convert this to an integer. + # Convert back to int + predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase) predictions[model_prefix] = predictions_df diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 5922177e..01dcce7a 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -508,7 +508,7 @@ class EnergyConsumptionModel: return prediction @staticmethod - def prepare_new_data(p: Property): + def _prepare_new_data(p: Property): """ Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the @@ -558,6 +558,15 @@ class EnergyConsumptionModel: return epc + def prepare_new_data(self, input_properties: list[Property]): + scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties]) + scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year + scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month + + scoring_data["id"] = scoring_data["uprn"].copy() + + return scoring_data + @staticmethod def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages): diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py index 7c53405f..4092dd87 100644 --- a/etl/customers/newhaven/newhaven_study.py +++ b/etl/customers/newhaven/newhaven_study.py @@ -30,6 +30,8 @@ def make_asset_list(): epc_data = epc_data[~pd.isnull(epc_data["uprn"])] epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str) + # Take the newest EPC per uprn + epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index() # /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/ # We read in the multiple data sources address_base = pd.read_csv( @@ -72,7 +74,7 @@ def make_asset_list(): ].merge( epc_data[ ["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height", - "property-type", "built-form"]], + "property-type", "built-form", "co2-emissions-current"]], how="left", left_on="UPRN", right_on="uprn" @@ -86,6 +88,21 @@ def make_asset_list(): columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"} ) + had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])] + below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape + below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape + had_an_epc["energy-efficiency-rating"].value_counts() + asset_list["current-energy-rating"].value_counts() + asset_list["co2-emissions-current"].mean() + # Get the underlying data of a histograme + import matplotlib.pyplot as plt + n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7) + + bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10 + + # Step 3: Calculate the frequency of data in each bin + hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins) + # Take properties below a B - there are 2844 units asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80] # Drop caravans @@ -235,8 +252,7 @@ def make_asset_list(): file_name=non_invasive_recommendations_filename ) - # Create two scenarios - # Scenario A + # Create three scenarios body1 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", @@ -246,14 +262,16 @@ def make_asset_list(): "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, - "scenario_name": "Fabric - no solid wall", + "scenario_name": "Demand Reduction - no solid wall", "multi_plan": True, - "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"], + "exclusions": [ + "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv" + ], "budget": None, } print(body1) - # Scenario B - deep fabric, no exclusions + # Scenario B body2 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", @@ -263,8 +281,25 @@ def make_asset_list(): "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, - "scenario_name": "Deep Fabric", + "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall", "multi_plan": True, + "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"], "budget": None, } print(body2) + + # Scenario C - deep fabric, no exclusions + body3 = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "A", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "scenario_name": "Whole House", + "multi_plan": True, + "budget": None, + } + print(body3) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index edfc4d66..569d7bcb 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -61,10 +61,12 @@ class WallRecommendations(Definitions): "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation", "Solid brick, as built, no insulation": "Solid brick, with external insulation", "Solid brick, as built, insulated": "Solid brick, with external insulation", + "Solid brick, as built, partial insulation": "Solid brick, with external insulation", "Cob, as built": "Cob, with external insulation", "System built, as built, no insulation": "System built, with external insulation", "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation', "Timber frame, as built, no insulation": "Timber frame, with external insulation", + 'Timber frame, as built, partial insulation': 'Timber frame, with external insulation', } # These are the ending descriptions we consider for walls with internal insulation @@ -72,10 +74,12 @@ class WallRecommendations(Definitions): "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation", "Solid brick, as built, no insulation": "Solid brick, with internal insulation", "Solid brick, as built, insulated": "Solid brick, with internal insulation", + "Solid brick, as built, partial insulation": "Solid brick, with internal insulation", "Cob, as built": "Cob, with internal insulation", "System built, as built, no insulation": "System built, with internal insulation", "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation', "Timber frame, as built, no insulation": "Timber frame, with internal insulation", + 'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation', } def __init__(