diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index d29e3da5..f7b34d19 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -98,13 +98,15 @@ class GoogleSolarApi: raise @lru_cache(maxsize=128) - def get(self, longitude, latitude, required_quality="MEDIUM"): + def get(self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False): """ Wrapper function that calls get_building_insights and extracts roof segments, with caching. :param longitude: The longitude of the location. :param latitude: The latitude of the location. + :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude. :param required_quality: The required quality of the data (default is "MEDIUM"). + :param is_building: Whether the energy consumption is for a building or a unit. :return: The JSON response containing the building insights data. """ @@ -112,8 +114,8 @@ class GoogleSolarApi: # Extract key data from the insights response self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', []) - self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2'] + self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] self.panel_area = ( self.insights_data["solarPotential"]["panelHeightMeters"] * self.insights_data["solarPotential"]["panelWidthMeters"] @@ -133,7 +135,7 @@ class GoogleSolarApi: self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments] # We now start finding the solar panel configurations - self.optimise_solar_configuration() + self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building) @staticmethod def lifetime_production_ac_kwh( @@ -143,7 +145,7 @@ class GoogleSolarApi: ): """ Mimics the function described in the Google Solar API documentation, presenting the lifetime production - AC KWH as a geometri sum + AC KWH as a geometric sum """ return ( @@ -153,86 +155,7 @@ class GoogleSolarApi: installation_life_span)) / (1 - efficiency_depreciation_factor)) - @staticmethod - def annualUtilityBillEstimate( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year, - costIncreaseFactor, - discountRate): - """ - Implements the bill costing model for esimating annual bill - :param yearlyKWhEnergyConsumption: - :param initialAcKwhPerYear: - :param efficiencyDepreciationFactor: - :param year: - :param costIncreaseFactor: - :param discountRate: - :return: - """ - - return ( - billCostModel( - yearlyKWhEnergyConsumption - - annualProduction( - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year)) * - pow(costIncreaseFactor, year) / - pow(discountRate, year)) - - def lifetimeUtilityBill( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - installationLifeSpan, - costIncreaseFactor, - discountRate): - bill = [0] * installationLifeSpan - for year in range(installationLifeSpan): - bill[year] = annualUtilityBillEstimate( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year, - costIncreaseFactor, - discountRate) - return bill - - def estimate_solar_costs(self, panel_performance): - """ - This method implements the recommended costing approach, to estimate the ROI of a solar panel - configuration, as described in the Google Solar API documentation - :param panel_performance: dataframe containing the solar panel array configuration and energy generation data - :return: - """ - - # we now estiamte the financial benefits of solar panels for the household, using the framework described - # by the Google Solar API - # 1) Convert Solar Energy AD production from the DC production - panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate - - # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a - # property could be 100% electric - average_electricity_consumption - - # Remove anything where the total ac energy is less than half of the array wattage - panel_performance = panel_performance[ - (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 - ] - - # 2) Calculate the liftime solar energy production - panel_performance['lifetime_ac_kwh'] = panel_performance.apply( - self.lifetime_production_ac_kwh, - axis=1, - efficiency_depreciation_factor=self.efficiency_depreciation_factor, - installation_life_span=self.installation_life_span - ) - - # TODO: Complete the rest of the solar model - - def optimise_solar_configuration(self): + def optimise_solar_configuration(self, energy_consumption, is_building=False): """ Optimise the solar panel configuration for the building. :return: @@ -287,30 +210,67 @@ class GoogleSolarApi: panel_performance = pd.DataFrame(panel_performance) # We can have duplicate configurations panel_performance = panel_performance.drop_duplicates() - # Ensure more than 4 panels - panel_performance = panel_performance[panel_performance["n_panels"] >= 4] + # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the + # minimum is 4 + min_panels = 10 if is_building else 4 + panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels] - self.estimate_solar_costs() + panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate - # This first bracket is the value of the energy bill savings - panel_performance["bill_savings"] = ( - self.SOLAR_CONSUMPTION_PROPORTION * - panel_performance["total_energy"] * - AnnualBillSavings.ELECTRICITY_PRICE_CAP + # Remove anything where the total ac energy is less than half of the array wattage + panel_performance = panel_performance[ + (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 + ] + + # 2) Calculate the liftime solar energy production + panel_performance['lifetime_ac_kwh'] = panel_performance.apply( + self.lifetime_production_ac_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span ) - # This is the amount of energy exported - panel_performance["export_value"] = ( - (1 - self.SOLAR_CONSUMPTION_PROPORTION) * - panel_performance["total_energy"] * - AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT + + # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi + roi_results = [] + for _, panel_config in panel_performance.iterrows(): + lifetime_ac_kwh = panel_config["lifetime_ac_kwh"] + lifetime_energy_consumption = energy_consumption * self.installation_life_span + + if lifetime_ac_kwh < lifetime_energy_consumption: + # We estimate the amount of electricity generated, based on the price cap + generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + roi = generation_value / panel_config["total_cost"] + generation_deficit = lifetime_energy_consumption - lifetime_ac_kwh + else: + # We now have a surplus of energy, which we can sell back to the grid + surplus = lifetime_ac_kwh - lifetime_energy_consumption + surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT + generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP + roi = (generation_value + surplus_value) / panel_config["total_cost"] + generation_deficit = surplus_value + + # Generation deficit tells us how much more energy we need to meet the generation demand. + roi_results.append( + { + "n_panels": panel_config["n_panels"], + "roi": roi, + "generation_value": generation_value, + "generation_deficit": generation_deficit + } + ) + + roi_results = pd.DataFrame(roi_results) + + panel_performance = panel_performance.merge( + roi_results, how="left", on="n_panels" ) - panel_performance["energy_value"] = panel_performance["bill_savings"] + panel_performance["export_value"] - panel_performance["payback_years"] = panel_performance["total_cost"] / panel_performance["energy_value"] - panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False) - # TODO: Finish this!! - - panel_performance["roof_area_percentage"] = panel_performance["panneled_roof_area"] / self.roof_area + # We prioritise maximal roi, then minimal geneartion deficit, then maximal generation value (if there is still + # a tie). Ideally, we want the best roi over the lifetime of the solar panels, but we also want to ensure that + # we can meet the energy demands of the building. + panel_performance = panel_performance.sort_values( + ["roi", "generation_deficit", "generation_value"], ascending=[False, True, False] + ) self.panel_performance = panel_performance diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 2832989e..c3823180 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -351,13 +351,14 @@ async def trigger_plan(body: PlanTriggerRequest): photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY) - dataset_version = "2024-07-05" + dataset_version = "2024-07-08" energy_consumption_client = EnergyConsumptionModel( model_paths={ "heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl", "hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl" }, - dummy_schema_path=f"model_directory/energy_consumption_model/dummy_schema_{dataset_version}.pkl", + dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl", + consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet", cleaned=cleaned ) @@ -366,14 +367,48 @@ async def trigger_plan(body: PlanTriggerRequest): p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client) p.get_spatial_data(uprn_filenames) + # TODO: Handle the case of modelling some units as buildings and some as properties individually + # TODO: We should adjust the energy consumtpion to account for post-retrofit energy consumption building_ids = [ { - "building_id": p.building_id, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"] + "building_id": p.building_id, + "longitude": p.spatial["longitude"], + "latitude": p.spatial["latitude"], + "energy_consumption": energy_consumption_client.estimate_new_consumption( + current_rating=p.data["current-energy-rating"], + target_rating=body.goal_value, + current_consumption=p.current_adjusted_energy + ), } for p in input_properties if p.building_id is not None ] if building_ids: - # Model the solar potential at the building level - print("complete me") + # Find the unique longitude and latitude pairs for each building id + unique_coordinates = {} + for entry in building_ids: + building_id = entry['building_id'] + coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']} + + if building_id not in unique_coordinates: + unique_coordinates[building_id] = [] + + if coordinate_pair not in unique_coordinates[building_id]: + unique_coordinates[building_id].append(coordinate_pair) + + for building_id, coordinates in unique_coordinates.items(): + if len(coordinates) > 1: + raise NotImplementedError("more than one coordinate for a building - handle me") + + coordinates = coordinates[0] + energy_consumption = sum( + [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id] + ) + solar_performance = solar_api_client.get( + longitude=coordinates["longitude"], + latitude=coordinates["latitude"], + energy_consumption=energy_consumption, + is_building=True, + ) + else: # Model the solar potential at the property level for p in input_properties: diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 59a68a56..e7658de5 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -46,7 +46,7 @@ class EnergyConsumptionModel: "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating" ] - def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, n_jobs=1): + def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1): self.cleaned = cleaned self.models = {} self.model_paths = model_paths or {} @@ -85,6 +85,13 @@ class EnergyConsumptionModel: s3_file_name=dummy_schema_path ) + self.consumption_averages = None + if consumption_average_path: + self.consumption_averages = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", + file_key=consumption_average_path + ) + def read_dataset(self, file_path): """Reads the dataset from the specified file path.""" logger.info(f"Reading dataset from {file_path}") @@ -434,3 +441,33 @@ class EnergyConsumptionModel: self.data = None return prediction + + @staticmethod + def calculate_percentage_decrease(start_rating, end_rating, consumption_averages): + + start_consumption = consumption_averages.loc[ + consumption_averages["current-energy-rating"] == start_rating, "total_consumption" + ].values[0] + end_consumption = consumption_averages.loc[ + consumption_averages["current-energy-rating"] == end_rating, "total_consumption" + ].values[0] + + percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100 + return percentage_decrease + + def estimate_new_consumption(self, current_rating, target_rating, current_consumption): + """ + Given then consumption_averages dataset, which is produced as a result of the data_combining.py script, + for the energy kwh models, this function will estimate the new consumption based on the current consumption, + based on the expected reduction in consumption from the current rating to the target rating. + :param current_rating: + :param target_rating: + :param current_consumption: + :param df: + :return: + """ + percentage_decrease = self.calculate_percentage_decrease( + current_rating, target_rating, self.consumption_averages + ) + new_consumption = current_consumption * (1 - percentage_decrease / 100) + return new_consumption diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index 4fc03f99..c51bf59e 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -133,7 +133,7 @@ def app(): energy_consumption_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 - if i < 260: + if i < 26: continue data = pd.read_csv(directory / "certificates.csv", low_memory=False) diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py index a111ecf2..11366360 100644 --- a/etl/bill_savings/data_combining.py +++ b/etl/bill_savings/data_combining.py @@ -91,3 +91,14 @@ def app(): file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet", df=df ) + + # We also estimate the energy consumption reduction from this data, by band + df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"] + consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index() + + # Save the consumption averages back to s3 + save_dataframe_to_s3_parquet( + bucket_name="retrofit-data-dev", + file_key=f"energy_consumption/{run_date}/consumption_averages.parquet", + df=consumption_averages + )