diff --git a/backend/Property.py b/backend/Property.py index a80c3057..4ae65d7d 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -2,8 +2,10 @@ import os import ast from itertools import groupby import pandas as pd +from datetime import datetime, timedelta from etl.epc.Dataset import TrainingDataset +from etl.epc.Record import EPCRecord from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from etl.solar.SolarPhotoSupply import SolarPhotoSupply @@ -60,6 +62,10 @@ class Property: # Surplus information, that can be provided as optional inputs, by a customer n_bathrooms = None n_bedrooms = None + building_id = None # Used to group properties together into a single building + + # Contains the solar panel optimisation results from the Google Solar API + solar_panel_configuration = None def __init__( self, @@ -112,6 +118,9 @@ class Property: self.wall_type = None self.floor_type = None + self.energy_cost_estimates = {} + self.energy_consumption_estimates = {} + self.energy = { "primary_energy_consumption": epc_record.get("energy_consumption_current"), "co2_emissions": epc_record.get("co2_emissions_current"), @@ -167,6 +176,7 @@ class Property: self.hot_water_energy_source = None self.recommendations_scoring_data = [] + self.simulation_epcs = {} self.parse_kwargs(kwargs) @@ -190,12 +200,14 @@ class Property: return { "n_bathrooms": n_bathrooms, "n_bedrooms": n_bedrooms, + "building_id": kwargs.get("building_id", None), } def parse_kwargs(self, kwargs): # We extract the elements from kwargs that we recognise. Anything additional is ignored self.n_bathrooms = kwargs.get("n_bathrooms", None) self.n_bedrooms = kwargs.get("n_bedrooms", None) + self.building_id = kwargs.get("building_id", None) def create_base_difference_epc_record(self, cleaned_lookup: dict): """ @@ -273,6 +285,7 @@ class Property: """ self.recommendations_scoring_data = [] + self.simulation_epcs = {} phases = sorted( [ r[0]["phase"] @@ -280,6 +293,7 @@ class Property: if r[0]["phase"] is not None ] ) + simulation_lodgment_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") for phase in phases: property_recommendations_by_phase = [ @@ -312,6 +326,10 @@ class Property: 0 ].copy() + recommendation_record["days_to_ending"] = EPCRecord._calculate_days_to( + lodgement_date=simulation_lodgment_date, + ) + for rec in property_recommendations_by_phase: # We simulate the impact of the recommendation at this current phase, and all of the prior phases @@ -327,6 +345,53 @@ class Property: ) self.recommendations_scoring_data.append(scoring_dict) + # We also use the representative recommendations to produce transformed EPCs + represenative_recs_to_this_phase = [ + r for r in property_representative_recommendations + if r["phase"] <= phase + ] + epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase] + + # It is possible that we could have two simulations applied to the same descriptions + # We extract these out + phase_epc_transformation = {} + for config in epc_transformations: + for k, v in config.items(): + if k in phase_epc_transformation: + if "-energy-eff" in k: + # We take the highest value + if phase_epc_transformation[k] == "Very Good": + continue + elif phase_epc_transformation[k] == "Good": + if v == "Very Good": + phase_epc_transformation[k] = v + elif phase_epc_transformation[k] == "Average": + if v in ["Good", "Very Good"]: + phase_epc_transformation[k] = v + elif phase_epc_transformation[k] == "Poor": + if v in ["Average", "Good", "Very Good"]: + phase_epc_transformation[k] = v + else: + phase_epc_transformation[k] = v + + continue + + if phase_epc_transformation[k] == v: + continue + + raise NotImplementedError( + "Already have this key in the phase_epc_transformation - implement me") + phase_epc_transformation[k] = v + + simulation_epc = self.epc_record.prepared_epc.copy() + # Insert static values + simulation_epc["lodgement_date"] = simulation_lodgment_date + + # Replace the understores with hyphens + simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()} + simulation_epc.update(phase_epc_transformation) + self.simulation_epcs[phase] = simulation_epc + @staticmethod def create_recommendation_scoring_data( property_id, @@ -478,6 +543,7 @@ class Property: if recommendation["type"] in [ "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating", "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", + "cylinder_thermostat" ]: # We update the data, as defined in the recommendaton if output["walls_insulation_thickness_ending"] is None: @@ -501,7 +567,7 @@ class Property: "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation", - "heating_control", "secondary_heating" + "heating_control", "secondary_heating", "cylinder_thermostat" ]: raise NotImplementedError( "Implement me, given type %s" % recommendation["type"] @@ -512,7 +578,11 @@ class Property: return output def get_components( - self, cleaned, photo_supply_lookup, floor_area_decile_thresholds + self, + cleaned, + photo_supply_lookup, + floor_area_decile_thresholds, + energy_consumption_client ): """ Given the cleaning that has been performed, we'll use this to identify the property @@ -522,6 +592,8 @@ class Property: of the roof that is suitable for solar panels :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the solar pv roof area + :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current + energy annual consumption in kWh :return: """ @@ -590,25 +662,144 @@ class Property: ) self.set_energy_source() self.find_energy_sources() - self.set_current_energy_bill() + self.set_current_energy_bill(energy_consumption_client) - def set_current_energy_bill(self): + def set_solar_panel_configuration(self, solar_panel_configuration): + """ + This funtion inserts the solar panel configuration into the property object + """ + self.solar_panel_configuration = solar_panel_configuration + + def set_current_energy_bill(self, energy_consumption_client): """ Given what we know about the property now, estimates the current energy consumption using the UCL paper https://www.sciencedirect.com/science/article/pii/S0378778823002542 :return: """ - starting_heat_demand = ( - float(self.data["energy-consumption-current"]) * self.floor_area + + # We get the following things: + # 1) Today's cost. This give us a basline figure for what the cost is today + # 2) Predicted KwH + + # Today's costs + todays_heating_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["heating-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + ) + todays_hot_water_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["hot-water-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + ) + todays_lighting_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["lighting-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) ) - self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=starting_heat_demand, + scoring_df = pd.DataFrame([self.epc_record.prepared_epc]) + # Change columns from underscores to hyphens + scoring_df.columns = [ + x.lower().replace("_", "-") for x in scoring_df.columns + ] + for col in ["heating_kwh", "hot_water_kwh"]: + scoring_df[col] = None + + energy_consumption_client.data = None + heating_prediction = energy_consumption_client.score_new_data( + new_data=scoring_df, target="heating_kwh" + )[0] + + hot_water_prediction = energy_consumption_client.score_new_data( + new_data=scoring_df, target="hot_water_kwh" + )[0] + + # We convert the lighting cost into kwh, just using the price cap + lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP + + appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area) + + adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=heating_prediction, current_epc_rating=self.data["current-energy-rating"], - total_floor_area=self.floor_area ) - self.current_energy_bill = AnnualBillSavings.calculate_annual_bill(self.current_adjusted_energy) + adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=hot_water_prediction, + current_epc_rating=self.data["current-energy-rating"], + ) + + adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=lighting_kwh, + current_epc_rating=self.data["current-energy-rating"], + ) + + adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=appliances_kwh, + current_epc_rating=self.data["current-energy-rating"], + ) + + # Adjust today's cost figures with the UCL model + adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=todays_heating_cost, + current_epc_rating=self.data["current-energy-rating"], + ) + + adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=todays_hot_water_cost, + current_epc_rating=self.data["current-energy-rating"], + ) + + adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=todays_lighting_cost, + current_epc_rating=self.data["current-energy-rating"], + ) + + adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered( + epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP, + current_epc_rating=self.data["current-energy-rating"], + ) + + # Sum up the adjusted kwh figures + self.current_adjusted_energy = ( + adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh + ) + self.current_energy_bill = ( + adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost + ) + + self.energy_cost_estimates = { + "adjusted": { + "heating": adjusted_heating_cost, + "hot_water": adjusted_hot_water_cost, + "lighting": adjusted_lighting_cost, + "appliances": adjusted_appliances_cost + }, + "unadjusted": { + "heating": todays_heating_cost, + "hot_water": todays_hot_water_cost, + "lighting": todays_lighting_cost, + "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + }, + "epc": { + "heating": float(self.data["heating-cost-current"]), + "hot_water": float(self.data["hot-water-cost-current"]), + "lighting": float(self.data["lighting-cost-current"]), + } + } + + self.energy_consumption_estimates = { + "adjusted": { + "heating": adjusted_heating_kwh, + "hot_water": adjusted_hot_water_kwh, + "lighting": adjusted_lighting_kwh, + "appliances": adjusted_applicances_kwh + }, + "unadjusted": { + "heating": heating_prediction, + "hot_water": hot_water_prediction, + "lighting": lighting_kwh, + "appliances": appliances_kwh + } + } def set_spatial(self, spatial: pd.DataFrame): """ diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index d29e3da5..8d08b083 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -5,6 +5,11 @@ from backend.ml_models.AnnualBillSavings import AnnualBillSavings import requests from functools import lru_cache import time +from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data +from utils.logger import setup_logger +from sklearn.preprocessing import MinMaxScaler + +logger = setup_logger() class GoogleSolarApi: @@ -61,6 +66,9 @@ class GoogleSolarApi: self.panel_wattage = None self.panel_performance = None + # Indicates if we need to store the data to the db + self.need_to_store = False + def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None): """ Make an API request to retrieve building insights based on the given longitude and latitude, with retry @@ -98,22 +106,39 @@ class GoogleSolarApi: raise @lru_cache(maxsize=128) - def get(self, longitude, latitude, required_quality="MEDIUM"): + def get( + self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None, + uprn=None + ): """ Wrapper function that calls get_building_insights and extracts roof segments, with caching. :param longitude: The longitude of the location. :param latitude: The latitude of the location. + :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude. :param required_quality: The required quality of the data (default is "MEDIUM"). + :param is_building: Whether the energy consumption is for a building or a unit. + :param session: The database session to use for the query (default is None). + :param uprn: The unique property reference number (default is None). :return: The JSON response containing the building insights data. """ - self.insights_data = self.get_building_insights(longitude, latitude, required_quality) + is_outdated = False + if session is not None: + # Check if the data is already in the database + self.insights_data, _, is_outdated = get_solar_data( + session, longitude=longitude, latitude=latitude, uprn=uprn + ) + + # If we have no data in the db, or updated_at is more than 6 months + if self.insights_data is None or is_outdated: + self.insights_data = self.get_building_insights(longitude, latitude, required_quality) + self.need_to_store = True # Extract key data from the insights response self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', []) - self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2'] + self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] self.panel_area = ( self.insights_data["solarPotential"]["panelHeightMeters"] * self.insights_data["solarPotential"]["panelWidthMeters"] @@ -133,106 +158,75 @@ class GoogleSolarApi: self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments] # We now start finding the solar panel configurations - self.optimise_solar_configuration() + self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building) + + def save_to_db(self, session, uprns_to_location, scenario_type): + if self.insights_data is None: + raise ValueError("No api data to store") + + if scenario_type not in ["unit", "building"]: + raise Exception("Invalid scenario type. Must be either 'unit' or 'building'") + + if not self.need_to_store: + return + + logger.info("Storing to database") + + scenarios_data = self.panel_performance.head(1)[ + [ + "n_panels", + "yearly_dc_energy", + "total_cost", + "panneled_roof_area", + "array_warrage", + "initial_ac_kwh_per_year", + "lifetime_ac_kwh", + "roi", + "expected_payback_years", + "lifetime_dc_kwh" + ] + ].rename( + columns={ + "n_panels": "number_panels", + "yearly_dc_energy": "yearly_dc_kwh", + "total_cost": "cost", + "panneled_roof_area": "panelled_roof_area", + "array_warrage": "array_kwhp", + "initial_ac_kwh_per_year": "yearly_ac_kwh", + } + ) + scenarios_data["is_default"] = True + scenarios_data["scenario_type"] = scenario_type + scenarios_data = scenarios_data.to_dict(orient="records") + + # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists + store_batch_data( + session=session, + api_data=self.insights_data, + uprns_to_location=uprns_to_location, + scenarios_data=scenarios_data + ) @staticmethod - def lifetime_production_ac_kwh( + def lifetime_production_kwh( row, efficiency_depreciation_factor, - installation_life_span + installation_life_span, + column_name="initial_ac_kwh_per_year" ): """ Mimics the function described in the Google Solar API documentation, presenting the lifetime production - AC KWH as a geometri sum + AC KWH as a geometric sum """ return ( - row["initial_ac_kwh_per_year"] * + row[column_name] * (1 - pow( efficiency_depreciation_factor, installation_life_span)) / (1 - efficiency_depreciation_factor)) - @staticmethod - def annualUtilityBillEstimate( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year, - costIncreaseFactor, - discountRate): - """ - Implements the bill costing model for esimating annual bill - :param yearlyKWhEnergyConsumption: - :param initialAcKwhPerYear: - :param efficiencyDepreciationFactor: - :param year: - :param costIncreaseFactor: - :param discountRate: - :return: - """ - - return ( - billCostModel( - yearlyKWhEnergyConsumption - - annualProduction( - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year)) * - pow(costIncreaseFactor, year) / - pow(discountRate, year)) - - def lifetimeUtilityBill( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - installationLifeSpan, - costIncreaseFactor, - discountRate): - bill = [0] * installationLifeSpan - for year in range(installationLifeSpan): - bill[year] = annualUtilityBillEstimate( - yearlyKWhEnergyConsumption, - initialAcKwhPerYear, - efficiencyDepreciationFactor, - year, - costIncreaseFactor, - discountRate) - return bill - - def estimate_solar_costs(self, panel_performance): - """ - This method implements the recommended costing approach, to estimate the ROI of a solar panel - configuration, as described in the Google Solar API documentation - :param panel_performance: dataframe containing the solar panel array configuration and energy generation data - :return: - """ - - # we now estiamte the financial benefits of solar panels for the household, using the framework described - # by the Google Solar API - # 1) Convert Solar Energy AD production from the DC production - panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate - - # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a - # property could be 100% electric - average_electricity_consumption - - # Remove anything where the total ac energy is less than half of the array wattage - panel_performance = panel_performance[ - (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 - ] - - # 2) Calculate the liftime solar energy production - panel_performance['lifetime_ac_kwh'] = panel_performance.apply( - self.lifetime_production_ac_kwh, - axis=1, - efficiency_depreciation_factor=self.efficiency_depreciation_factor, - installation_life_span=self.installation_life_span - ) - - # TODO: Complete the rest of the solar model - - def optimise_solar_configuration(self): + def optimise_solar_configuration(self, energy_consumption, is_building=False): """ Optimise the solar panel configuration for the building. :return: @@ -252,7 +246,7 @@ class GoogleSolarApi: wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"] generated_dc_energy = segment["yearlyEnergyDcKwh"] ratio = generated_dc_energy / wattage - cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_dc_energy / 1000) + cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000) roi_summary.append( { "segmentIndex": segment["segmentIndex"], @@ -287,30 +281,105 @@ class GoogleSolarApi: panel_performance = pd.DataFrame(panel_performance) # We can have duplicate configurations panel_performance = panel_performance.drop_duplicates() - # Ensure more than 4 panels - panel_performance = panel_performance[panel_performance["n_panels"] >= 4] + # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the + # minimum is 4 + min_panels = 10 if is_building else 4 + panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels] - self.estimate_solar_costs() + panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate - # This first bracket is the value of the energy bill savings - panel_performance["bill_savings"] = ( - self.SOLAR_CONSUMPTION_PROPORTION * - panel_performance["total_energy"] * - AnnualBillSavings.ELECTRICITY_PRICE_CAP + # Remove anything where the total ac energy is less than half of the array wattage + panel_performance = panel_performance[ + (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 + ] + + # 2) Calculate the liftime solar energy production + panel_performance['lifetime_ac_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="initial_ac_kwh_per_year" ) - # This is the amount of energy exported - panel_performance["export_value"] = ( - (1 - self.SOLAR_CONSUMPTION_PROPORTION) * - panel_performance["total_energy"] * - AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT + + panel_performance['lifetime_dc_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="yearly_dc_energy", ) - panel_performance["energy_value"] = panel_performance["bill_savings"] + panel_performance["export_value"] - panel_performance["payback_years"] = panel_performance["total_cost"] / panel_performance["energy_value"] - panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False) - # TODO: Finish this!! + # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi + lifetime_energy_consumption = energy_consumption * self.installation_life_span + roi_results = [] + for _, panel_config in panel_performance.iterrows(): + lifetime_ac_kwh = panel_config["lifetime_ac_kwh"] - panel_performance["roof_area_percentage"] = panel_performance["panneled_roof_area"] / self.roof_area + surplus = 0 + if lifetime_ac_kwh < lifetime_energy_consumption: + # We estimate the amount of electricity generated, based on the price cap + generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + roi = generation_value / panel_config["total_cost"] + generation_deficit = lifetime_energy_consumption - lifetime_ac_kwh + else: + + # We now have a surplus of energy, which we can sell back to the grid + surplus = lifetime_ac_kwh - lifetime_energy_consumption + surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT + generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP + roi = (generation_value + surplus_value) / panel_config["total_cost"] + generation_deficit = surplus_value + + # Calculate expected payback years + if generation_value > 0: + expected_payback_years = panel_config["total_cost"] / ( + generation_value / self.installation_life_span) + else: + expected_payback_years = None # or some high value indicating no payback + + # Generation deficit tells us how much more energy we need to meet the generation demand. + roi_results.append( + { + "n_panels": panel_config["n_panels"], + "roi": roi, + "generation_value": generation_value, + "generation_deficit": generation_deficit, + "expected_payback_years": expected_payback_years, + "surplus": surplus + } + ) + + roi_results = pd.DataFrame(roi_results) + + panel_performance = panel_performance.merge( + roi_results, how="left", on="n_panels" + ) + + # We want max roi, minimal generation deficit, and max generation value - we create a ranking score + # Assign equal weights to each metric + weights = {'roi': 0.6, 'generation_value': 0.2, 'generation_deficit': 0.2} + metrics = panel_performance[['roi', 'generation_value', 'generation_deficit']] + + # Normalize the columns (0 to 1 scale) + scaler = MinMaxScaler() + normalized_metrics = scaler.fit_transform(metrics) + + # Convert normalized metrics back to a dataframe + normalized_metrics_df = pd.DataFrame( + normalized_metrics, columns=['roi', 'generation_value', 'generation_deficit'] + ) + normalized_metrics_df['combined_score'] = ( + normalized_metrics_df['roi'] * weights['roi'] + + normalized_metrics_df['generation_value'] * weights['generation_value'] + + (1 - normalized_metrics_df['generation_deficit']) * weights['generation_deficit'] + ) + + panel_performance['combined_score'] = normalized_metrics_df['combined_score'].values + panel_performance['rank'] = panel_performance['combined_score'].rank(ascending=False) + panel_performance = panel_performance.sort_values(by='rank') + + panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int) self.panel_performance = panel_performance diff --git a/backend/app/config.py b/backend/app/config.py index 6f2e405b..ef48c317 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -8,9 +8,6 @@ class Settings(BaseSettings): SECRET_KEY: str ENVIRONMENT: str DATA_BUCKET: str - SAP_PREDICTIONS_BUCKET: str - CARBON_PREDICTIONS_BUCKET: str - HEAT_PREDICTIONS_BUCKET: str PLAN_TRIGGER_BUCKET: str EPC_AUTH_TOKEN: str ORDNANCE_SURVEY_API_KEY: str @@ -21,6 +18,14 @@ class Settings(BaseSettings): DB_PORT: str DB_NAME: str + # Prediction buckets + SAP_PREDICTIONS_BUCKET: str + CARBON_PREDICTIONS_BUCKET: str + HEAT_PREDICTIONS_BUCKET: str + LIGHTING_COST_PREDICTIONS_BUCKET: str + HEATING_COST_PREDICTIONS_BUCKET: str + HOT_WATER_COST_PREDICTIONS_BUCKET: str + class Config: env_file = "backend/.env" @@ -28,3 +33,15 @@ class Settings(BaseSettings): @lru_cache() def get_settings(): return Settings() + + +@lru_cache() +def get_prediction_buckets(): + return { + "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, + "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, + "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET, + "lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET, + "heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET, + "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET + } diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index 69203368..402675e8 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -11,7 +11,7 @@ def aggregate_portfolio_recommendations( session.query( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), - func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"), + func.sum(Recommendation.kwh_savings).label("energy_savings"), func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index b22ce92f..365829e4 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -80,8 +80,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "starting_u_value": rec.get("starting_u_value"), "new_u_value": rec.get("new_u_value"), "sap_points": rec["sap_points"], - "heat_demand": rec["heat_demand"], - "adjusted_heat_demand": rec["adjusted_heat_demand"], + "energy_savings": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], "co2_equivalent_savings": rec["co2_equivalent_savings"], "total_work_hours": rec["labour_hours"], "energy_cost_savings": rec["energy_cost_savings"], diff --git a/backend/app/db/functions/solar_functions.py b/backend/app/db/functions/solar_functions.py new file mode 100644 index 00000000..59243f01 --- /dev/null +++ b/backend/app/db/functions/solar_functions.py @@ -0,0 +1,96 @@ +import datetime +import pytz +from sqlalchemy.orm import Session +from sqlalchemy.orm.exc import NoResultFound +from backend.app.db.models.solar import Solar, SolarScenario + + +def get_solar_data(session: Session, longitude: float = None, latitude: float = None, uprn: str = None): + """ + This function will fetch data from the solar table based on longitude and latitude or UPRN. + :param session: The database session + :param longitude: The longitude to search for + :param latitude: The latitude to search for + :param uprn: The UPRN to search for (overrides longitude and latitude if provided) + :return: The google_api_response and updated_at fields + """ + try: + if uprn: + # Search by UPRN + solar_data = session.query(Solar.google_api_response, Solar.updated_at).filter_by(uprn=uprn).one() + else: + # Search by longitude and latitude + solar_data = session.query(Solar.google_api_response, Solar.updated_at).filter( + Solar.longitude == longitude, + Solar.latitude == latitude + ).one() + + # Check if updated_at is more than 6 months old + six_months_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(days=6 * 30) # Approximate 6 months + is_outdated = solar_data.updated_at < six_months_ago + + return solar_data.google_api_response, solar_data.updated_at, is_outdated + + except NoResultFound: + return None, None, False + + +def store_batch_data(session: Session, api_data: dict, uprns_to_location: list, scenarios_data: list): + """ + This function will store the API data to the solar table against all of the UPRNs with longitude and latitude. + :param session: The database session + :param api_data: The API data to store + :param uprns_to_location: A list of dictionaries containing uprn, longitude, and latitude + :param scenarios_data: A list of dictionaries containing scenario data for each UPRN + """ + try: + + # Insert data into the Solar table and get the IDs + solar_records = [] + for data in uprns_to_location: + solar_record = Solar( + uprn=data['uprn'], + longitude=data['longitude'], + latitude=data['latitude'], + google_api_response=api_data, + updated_at=datetime.datetime.now(pytz.utc) + ) + solar_records.append(solar_record) + session.add(solar_record) + + session.flush() # Flush to get the IDs generated + + for record in solar_records: + session.refresh(record) # Refresh to populate the ID fields + + # Retrieve the IDs of the inserted records + inserted_ids = {record.uprn: record.id for record in solar_records} + + # Prepare the data for SolarScenario + scenario_records = [] + for data in uprns_to_location: + solar_id = inserted_ids.get(data['uprn']) + for scenario in scenarios_data: + scenario_record = SolarScenario( + solar_id=solar_id, + scenario_type=scenario['scenario_type'], + number_panels=scenario['number_panels'], + array_kwhp=scenario['array_kwhp'], + lifetime_dc_kwh=scenario['lifetime_dc_kwh'], + yearly_dc_kwh=scenario['yearly_dc_kwh'], + lifetime_ac_kwh=scenario.get('lifetime_ac_kwh'), # Optional field + yearly_ac_kwh=scenario.get('yearly_ac_kwh'), # Optional field + cost=scenario['cost'], + expected_payback_years=scenario.get('expected_payback_years'), # Optional field + panelled_roof_area=scenario['panelled_roof_area'], + is_default=scenario['is_default'] + ) + scenario_records.append(scenario_record) + + # Insert data into the SolarScenario table + session.bulk_save_objects(scenario_records) + session.commit() + + except Exception as e: + session.rollback() + raise e diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 186f87a8..8ab7908f 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -22,7 +22,7 @@ class Recommendation(Base): new_u_value = Column(Float) sap_points = Column(Float) heat_demand = Column(Float) - adjusted_heat_demand = Column(Float) + kwh_savings = Column(Float) co2_equivalent_savings = Column(Float) energy_savings = Column(Float) energy_cost_savings = Column(Float) diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py new file mode 100644 index 00000000..88372bd3 --- /dev/null +++ b/backend/app/db/models/solar.py @@ -0,0 +1,45 @@ +import datetime +import pytz +from enum import Enum as PyEnum +from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class Solar(Base): + __tablename__ = 'solar' + id = Column(Integer, primary_key=True, autoincrement=True) + longitude = Column(Float, nullable=False) + latitude = Column(Float, nullable=False) + uprn = Column(Integer, nullable=False) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc), onupdate=datetime.datetime.now(pytz.utc) + ) + google_api_response = Column(JSON, nullable=False) + + +class ScenarioType(PyEnum): + unit = "unit" + building = "building" + + +class SolarScenario(Base): + __tablename__ = 'solar_scenario' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + solar_id = Column(BigInteger, ForeignKey('solar.id'), nullable=False) + scenario_type = Column(Enum(ScenarioType), nullable=False) + number_panels = Column(Integer, nullable=False) + array_kwhp = Column(Integer, nullable=False) + lifetime_dc_kwh = Column(Float, nullable=False) + yearly_dc_kwh = Column(Float, nullable=False) + lifetime_ac_kwh = Column(Float) + yearly_ac_kwh = Column(Float) + cost = Column(Float, nullable=False) + expected_payback_years = Column(Float) + panelled_roof_area = Column(Float, nullable=False) + is_default = Column(Boolean, nullable=False) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 258449c2..00e73b56 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -10,7 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response -from backend.app.config import get_settings +from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine from backend.app.db.functions.materials_functions import get_materials from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations @@ -40,6 +40,7 @@ from recommendations.Mds import Mds from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3 from backend.ml_models.Valuation import PropertyValuation +from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel logger = setup_logger() @@ -128,7 +129,7 @@ def extract_portfolio_aggregation_data( pre_retrofit_energy_consumption = p.current_adjusted_energy post_retrofit_energy_consumption = p.current_adjusted_energy - sum( - [r["adjusted_heat_demand"] for r in default_recommendations] + [r["kwh_savings"] for r in default_recommendations] ) # Add up energy savings @@ -350,13 +351,110 @@ async def trigger_plan(body: PlanTriggerRequest): photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY) + dataset_version = "2024-07-08" + energy_consumption_client = EnergyConsumptionModel( + model_paths={ + "heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl", + "hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl" + }, + dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl", + consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet", + cleaned=cleaned, + environment=get_settings().ENVIRONMENT + ) + logger.info("Getting spatial data") for p in input_properties: - p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) + p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client) p.get_spatial_data(uprn_filenames) - # Call Google Solar API - # TODO: Complete me - solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]) + + # TODO: Handle the case of modelling some units as buildings and some as properties individually + building_ids = [ + { + "building_id": p.building_id, + "longitude": p.spatial["longitude"], + "latitude": p.spatial["latitude"], + # Energy consumption is adjusted for the property's expected post retrofit state + "energy_consumption": energy_consumption_client.estimate_new_consumption( + current_rating=p.data["current-energy-rating"], + target_rating=body.goal_value, + current_consumption=p.current_adjusted_energy + ), + "property_id": p.id, + "uprn": p.uprn + } for p in input_properties if p.building_id is not None + ] + if building_ids: + # Find the unique longitude and latitude pairs for each building id + unique_coordinates = {} + building_uprns = {} + for entry in building_ids: + building_id = entry['building_id'] + coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']} + + if building_id not in unique_coordinates: + unique_coordinates[building_id] = [] + + if coordinate_pair not in unique_coordinates[building_id]: + unique_coordinates[building_id].append(coordinate_pair) + + if building_id not in building_uprns: + building_uprns[building_id] = [] + + if entry['uprn'] not in building_uprns[building_id]: + building_uprns[building_id].append( + { + "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude'] + } + ) + + solar_panel_configuration = {} + for building_id, coordinates in unique_coordinates.items(): + if len(coordinates) > 1: + raise NotImplementedError("more than one coordinate for a building - handle me") + + coordinates = coordinates[0] + energy_consumption = sum( + [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id] + ) + solar_api_client.get( + longitude=coordinates["longitude"], + latitude=coordinates["latitude"], + energy_consumption=energy_consumption, + is_building=True, + session=session + ) + solar_panel_configuration[building_id] = { + "insights_data": solar_api_client.insights_data, + "panel_performance": solar_api_client.panel_performance, + "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id]) + } + + # Store the data in the database + # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists + solar_api_client.save_to_db( + session=session, uprns_to_location=building_uprns[building_id], scenario_type="building" + ) + + # Insert this into the properties that have this building id + for p in input_properties: + if p.building_id == building_id: + unit_solar_panel_configuration = solar_panel_configuration[building_id].copy() + + unit_solar_panel_configuration["unit_share_of_energy"] = ( + [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] / + energy_consumption + ) + p.set_solar_panel_configuration(unit_solar_panel_configuration) + + else: + # # Model the solar potential at the property level + # for p in input_properties: + # # TODO: Complete me! - we probably won't do this for individual flats + # solar_performance = solar_api_client.get( + # longitude=p.spatial["longitude"], latitude=p.spatial["latitude"] + # ) + print("Implement me") logger.info("Getting components and epc recommendations") recommendations = {} @@ -392,21 +490,13 @@ async def trigger_plan(body: PlanTriggerRequest): model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - all_predictions = { - "sap_change_predictions": pd.DataFrame(), - "heat_demand_predictions": pd.DataFrame(), - "carbon_change_predictions": pd.DataFrame() - } + all_predictions = model_api.predictions_template() to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) for chunk in tqdm(to_loop_over, total=len(to_loop_over)): predictions_dict = model_api.predict_all( df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], bucket=get_settings().DATA_BUCKET, - prediction_buckets={ - "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, - "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, - "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET - } + prediction_buckets=get_prediction_buckets() ) # Append the predictions to the predictions dictionary @@ -431,7 +521,9 @@ async def trigger_plan(body: PlanTriggerRequest): Recommendations.calculate_recommendation_impact( property_instance=property_instance, all_predictions=all_predictions, - recommendations=recommendations + recommendations=recommendations, + representative_recommendations=representative_recommendations, + energy_consumption_client=energy_consumption_client ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index fbc4d4f2..77ac4217 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -16,6 +16,7 @@ class PlanTriggerRequest(BaseModel): # Pre-defined list of possibilities for exclusions _allowed_exclusions = { + # Measure classes "wall_insulation", "ventilation", "roof_insulation", @@ -25,7 +26,9 @@ class PlanTriggerRequest(BaseModel): "heating", "hot_water", "lighting", - "solar_pv" + "solar_pv", + # Specific measures + "air_source_heat_pump", } _allowed_goals = {"Increase EPC"} diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index e6494bcd..e4d9d143 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -25,11 +25,13 @@ class AnnualBillSavings: AVERAGE_GAS_CONSUMPTION = 11500 # Latest price cap figures from Ofgem are for April 2024 - # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today - ELECTRICITY_PRICE_CAP = 0.245 - GAS_PRICE_CAP = 0.0604 - # This is the most recent export payment figure, at 12p per kwh - ELECTRICITY_EXPORT_PAYMENT = 0.12 + # https://www.ofgem.gov.uk/energy-price-cap + ELECTRICITY_PRICE_CAP = 0.2236 + GAS_PRICE_CAP = 0.0548 + # This is the most recent export payment figure, at 9.28p/kWh + # Smart export guarantee rates can be found here: + # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates + ELECTRICITY_EXPORT_PAYMENT = 0.0928 # This is a weighted mean of the price caps, using the consumption figures above as weights PRICE_FACTOR = 0.09549999999999999 @@ -125,7 +127,15 @@ class AnnualBillSavings: return eam @classmethod - def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating, total_floor_area): + def estimate_appliances_energy_use(cls, total_floor_area): + # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the + # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1 + estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area) + appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area) + return appliances_energy_use + + @classmethod + def adjust_energy_to_metered(cls, epc_energy, current_epc_rating): """ The over-prediction of energy use by EPCs in Great Britain: A comparison of EPC-modelled and metered primary energy use intensity @@ -133,16 +143,11 @@ class AnnualBillSavings: Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542 We implement the results on page 10 + This is used to just re-map the cost from the EPC to the metered cost + epc_energy could be cost or kwh :return: """ - # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the - # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1 - estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area) - appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area) - - epc_energy_consumption += appliances_energy_use - gradients = { "A": -0.1, "B": -0.1, @@ -167,9 +172,10 @@ class AnnualBillSavings: intercept = intercepts[current_epc_rating] # This should be negative - consumption_difference = gradient * epc_energy_consumption + intercept + consumption_difference = gradient * epc_energy + intercept + consumption_difference = 0 if consumption_difference > 0 else consumption_difference - adjusted_consumption = (epc_energy_consumption + consumption_difference) + adjusted_consumption = (epc_energy + consumption_difference) if adjusted_consumption < 0: raise ValueError("consumption_difference should be negative") diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 1af38194..b87f156b 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -93,6 +93,13 @@ class PropertyValuation: # Northern Group Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ 10070868263: 194_000, # Based on Zoopla 10070868244: 195_000, # Based on Zoopla + # Places For People Pilot + 200140644: 385_000, + 200140645: 481_000, + 200140646: 372_000, + 200140647: 481_000, + 200140648: 373_000, + 200140649: 373_000, } # We base our valuation uplifts on a number of sources diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index a2024dd7..4844d7fd 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -11,13 +11,19 @@ class ModelApi: MODEL_PREFIXES = [ "sap_change_predictions", "heat_demand_predictions", - "carbon_change_predictions" + "carbon_change_predictions", + "lighting_cost_predictions", + "heating_cost_predictions", + "hot_water_cost_predictions", ] MODEL_URLS = { "sap_change_predictions": "sapmodel", "heat_demand_predictions": "heatmodel", - "carbon_change_predictions": "carbonmodel" + "carbon_change_predictions": "carbonmodel", + "lighting_cost_predictions": "lightingmodel", + "heating_cost_predictions": "heatingmodel", + "hot_water_cost_predictions": "hotwatermodel", } def __init__( @@ -39,6 +45,17 @@ class ModelApi: self.portfolio_id = portfolio_id self.timestamp = timestamp + @staticmethod + def predictions_template(): + return { + "sap_change_predictions": pd.DataFrame(), + "heat_demand_predictions": pd.DataFrame(), + "carbon_change_predictions": pd.DataFrame(), + "lighting_cost_predictions": pd.DataFrame(), + "heating_cost_predictions": pd.DataFrame(), + "hot_water_cost_predictions": pd.DataFrame(), + } + def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str: """ The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt index 3173f7f8..c4e7367c 100644 --- a/backend/requirements/base.txt +++ b/backend/requirements/base.txt @@ -36,4 +36,7 @@ boto3==1.28.3 pandas==1.5.3 pyarrow==12.0.1 textblob -usaddress==0.5.10 \ No newline at end of file +usaddress==0.5.10 + +# Requirements we may not need +xgboost==1.7.6 \ No newline at end of file diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index b616be08..9a7d6523 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -1,17 +1,13 @@ import pandas as pd import numpy as np -import msgpack from xgboost import XGBRegressor -from datetime import datetime from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error from sklearn.feature_selection import RFECV -from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_from_s3 -import logging -from pprint import pprint +from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3 +from utils.logger import setup_logger -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = setup_logger() class EnergyConsumptionModel: @@ -26,8 +22,6 @@ class EnergyConsumptionModel: "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation", "low-energy-lighting", "environment-impact-current", "energy-tariff", "county", "construction-age-band", "co2-emissions-current", - # TODO: Testing - "lighting-cost-current", "hot-water-cost-current", "current-energy-rating" ], "hot_water_kwh": [ "lodgement-year", "lodgement-month", @@ -51,11 +45,17 @@ class EnergyConsumptionModel: "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating" ] - def __init__(self, cleaned, model_paths=None, n_jobs=1): + retail_price_comparison = None + + def __init__( + self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1, + environment="dev" + ): self.cleaned = cleaned self.models = {} self.model_paths = model_paths or {} self.n_jobs = n_jobs + self.environment = environment self.data = None self.input_data = None @@ -63,6 +63,7 @@ class EnergyConsumptionModel: self.training_predictions = {} self.testing_predictions = {} self.best_iteration = {} + self.dummy_schema = None self.x_train = {} self.x_test = {} @@ -79,17 +80,90 @@ class EnergyConsumptionModel: if model_paths: for target, path in model_paths.items(): - self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path) + # Read model + self.models[target] = read_pickle_from_s3( + bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path + ) + # Read dummy schema + + if dummy_schema_path: + self.dummy_schema = read_pickle_from_s3( + bucket_name=f"retrofit-model-directory-{environment}", + s3_file_name=dummy_schema_path + ) + + self.consumption_averages = None + if consumption_average_path: + self.consumption_averages = read_dataframe_from_s3_parquet( + bucket_name=f"retrofit-data-{environment}", + file_key=consumption_average_path + ) + + # We also retrieve the newest retail price comparison data which comes from Ofgem: + # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators + # We use the detail price comparison by company and tariff type data + self.read_retail_price_comparison() + + def read_retail_price_comparison(self): + data = read_csv_from_s3( + bucket_name=f"retrofit-data-{self.environment}", + filepath="energy_consumption/retail-price-comparison.csv" + ) + header = ['Date', 'Average standard variable tariff (Large legacy suppliers)', + 'Average standard variable tariff (Other suppliers)', 'Average fixed tariff', + 'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)', + 'Cheapest tariff (Basket)', 'Default tariff cap level'] + + # Extract data rows + data_rows = [] + for row in data[1:]: + date = row['\ufeff"'] + values = row[None] + data_rows.append([date] + values) + + self.retail_price_comparison = pd.DataFrame(data_rows, columns=header) + self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce') + + def convert_cost_to_today(self, original_cost, lodgement_date): + """ + Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs + (or as close to today as possible) + :param original_cost: The original energy cost + :param lodgement_date: The date the EPC was lodged + :return: + """ + closest_date = self.retail_price_comparison.iloc[ + (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1] + ]['Date'].values[0] + closest_date = pd.Timestamp(closest_date) + + # Extract the tariff price on the closest date + tariff_2024 = self.retail_price_comparison[ + self.retail_price_comparison['Date'] == closest_date + ]['Average standard variable tariff (Large legacy suppliers)'].values[0] + + # Extract the latest available tariff price + latest_tariff = self.retail_price_comparison[ + 'Average standard variable tariff (Large legacy suppliers)' + ].iloc[-1] + + # Calculate the ratio + ratio = float(latest_tariff) / float(tariff_2024) + + # Calculate the updated heating cost + updated_cost = original_cost * ratio + + return updated_cost def read_dataset(self, file_path): """Reads the dataset from the specified file path.""" - logging.info(f"Reading dataset from {file_path}") - self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path) + logger.info(f"Reading dataset from {file_path}") + self.data = read_dataframe_from_s3_parquet(bucket_name=f"retrofit-data-{self.environment}", file_key=file_path) self.input_data = self.data.copy() - def feature_engineering(self): + def feature_engineering(self, drop_first=False): """Performs feature engineering on the dataset.""" - logging.info("Starting feature engineering") + logger.info("Starting feature engineering") self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"]) self.data["lodgement-year"] = self.data["lodgement-date"].dt.year self.data["lodgement-month"] = self.data["lodgement-date"].dt.month @@ -143,20 +217,13 @@ class EnergyConsumptionModel: ) self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"]) - # Modify number of heated rooms and number of habitable rooms - self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply( - lambda x: "16_or_more" if x > 15 else str(x) - ) - # self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply( - # lambda x: "10+" if x > 10 else str(x) - # ) - # Convert data types self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric) self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str) # Convert categorical columns to dummies - self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=True) + self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=drop_first) + self.dummy_schema = self.data.columns.tolist() # Store the dummy columns self.dummy_columns = {} @@ -170,14 +237,14 @@ class EnergyConsumptionModel: dummy_feature_columns.append(feature) self.dummy_columns[target] = dummy_feature_columns - logging.info("Feature engineering completed") + logger.info("Feature engineering completed") def split_dataset(self, target, test_size=0.2, validation_size=0.2, random_state=42): """Splits the dataset into training, validation, and testing sets.""" if target not in self.TARGETS: raise ValueError(f"Target {target} not in {self.TARGETS}") - logging.info(f"Splitting dataset for target {target}") + logger.info(f"Splitting dataset for target {target}") # Split into train + validation and test sets x_train_val, x_test, y_train_val, y_test = train_test_split( @@ -211,7 +278,7 @@ class EnergyConsumptionModel: if target not in self.TARGETS: raise ValueError(f"Target {target} not in {self.TARGETS}") - logging.info(f"Starting feature selection for target {target}") + logger.info(f"Starting feature selection for target {target}") # Sample the data if specified if sample_fraction < 1.0: @@ -238,7 +305,7 @@ class EnergyConsumptionModel: self.x_test[target] = self.x_test[target][self.selected_features[target]] self.x_val[target] = self.x_val[target][self.selected_features[target]] - logging.info(f"Feature selection completed for target {target}") + logger.info(f"Feature selection completed for target {target}") def init_model(self, feature_selection=False): @@ -271,7 +338,7 @@ class EnergyConsumptionModel: def fit_model(self, target): """Fits the model to the training data and removes zero-importance features.""" - logging.info(f"Fitting model for target {target}") + logger.info(f"Fitting model for target {target}") # Initialize and fit the model model = self.init_model() @@ -293,7 +360,7 @@ class EnergyConsumptionModel: zero_importance_features = feature_importance[feature_importance['Importance'] == 0]['Feature'].tolist() if zero_importance_features: - logging.info(f"Removing zero-importance features for target {target}: {zero_importance_features}") + logger.info(f"Removing zero-importance features for target {target}: {zero_importance_features}") self.x_train[target] = self.x_train[target].drop(columns=zero_importance_features) self.x_val[target] = self.x_val[target].drop(columns=zero_importance_features) @@ -314,22 +381,22 @@ class EnergyConsumptionModel: # Store the best iteration self.best_iteration[target] = self.models[target].best_iteration - logging.info(f"Model fitting completed for target {target}") + logger.info(f"Model fitting completed for target {target}") def re_train_final_model(self, target): """Re-trains the final model on the combined training and validation set.""" - logging.info(f"Re-training final model for target {target}") + logger.info(f"Re-training final model for target {target}") x_train_val = pd.concat([self.x_train[target], self.x_val[target]]) y_train_val = pd.concat([self.y_train[target], self.y_val[target]]) self.models[target] = self.init_model() self.models[target].fit(x_train_val, y_train_val, verbose=False) - logging.info(f"Re-training final model completed for target {target}") + logger.info(f"Re-training final model completed for target {target}") def evaluate_model(self, target): """Evaluates the model on training and testing data.""" - logging.info(f"Evaluating model for target {target}") + logger.info(f"Evaluating model for target {target}") y_train_pred = self.models[target].predict(self.x_train[target]) train_mse = mean_squared_error(self.y_train[target], y_train_pred) train_r2 = r2_score(self.y_train[target], y_train_pred) @@ -367,7 +434,7 @@ class EnergyConsumptionModel: 'Importance': self.models[target].feature_importances_ }).sort_values(by='Importance', ascending=False) - logging.info(f"Evaluation completed for target {target}") + logger.info(f"Evaluation completed for target {target}") return { 'train': { @@ -383,14 +450,21 @@ class EnergyConsumptionModel: } } - def save_model(self, target): + def save_model(self, target, dataset_version): """Saves the model to S3.""" - logging.info(f"Saving model for target {target}") - run_date = datetime.now().strftime("%Y-%m-%d") + logger.info(f"Saving model for target {target}") save_pickle_to_s3( self.models[target], - bucket_name="retrofit-model-directory-dev", - s3_file_name=f"model_directory/energy_consumption_model/{target}_{run_date}.pkl" + bucket_name=f"retrofit-model-directory-{self.environment}", + s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl" + ) + + def save_dummy_schema(self, dataset_version): + logger.info("Saving dummy schema for target {target}") + save_pickle_to_s3( + self.dummy_schema, + bucket_name=f"retrofit-model-directory-{self.environment}", + s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl" ) def score_new_data(self, new_data, target): @@ -398,129 +472,65 @@ class EnergyConsumptionModel: if target not in self.models: raise ValueError(f"Model for target {target} not loaded or trained") - new_data_transformed = self.transform_new_data(new_data, target) - return self.models[target].predict(new_data_transformed) + # Verify that self.data is None + if self.data is not None: + raise ValueError("self.data is not None. Ensure that self.data is reset before scoring new data.") - def transform_new_data(self, new_data, target): - """Applies the same transformations to new data as were applied to the training data.""" + # Temporarily set self.data to new data + self.data = new_data.copy() - # TODO THis should jsut use our other transformation function - new_data["lodgement-date"] = pd.to_datetime(new_data["lodgement-date"]) - new_data["lodgement-year"] = new_data["lodgement-date"].dt.year - new_data["lodgement-month"] = new_data["lodgement-date"].dt.month + # Run feature engineering + self.feature_engineering(drop_first=False) - # Convert categorical columns to dummies - new_data = pd.get_dummies(new_data, columns=self.CATEGORICAL_COLUMNS, drop_first=True) + new_data_transformed = self.data.copy() - # Align new data with the dummy columns from training data - new_data = new_data.reindex(columns=self.dummy_columns[target], fill_value=0) + for col in self.dummy_schema: + if col not in new_data_transformed.columns: + new_data_transformed[col] = 0 - # Select the features used by the model - new_data = new_data[self.selected_features[target]] + new_data_transformed = new_data_transformed[self.dummy_schema] + missed_dummies = [c for c in self.models[target].feature_names_in_ if c not in new_data_transformed.columns] + zero_df = pd.DataFrame([dict(zip(missed_dummies, [0, ] * len(missed_dummies)))]) + new_data_transformed = pd.concat([new_data_transformed, zero_df], axis=1) - return new_data + # When we dummy in this case, we run with drop_first = False so we may end up with some of those + # first columns, we we'll need to dorp them + new_data_transformed = new_data_transformed[self.models[target].feature_names_in_] - def error_analysis(self, target, top_n=10, unique_threshold=0.8): + # Generate predictions + prediction = self.models[target].predict(new_data_transformed) + + # Reset self.data to None + self.data = None + + return prediction + + @staticmethod + def calculate_percentage_decrease(start_rating, end_rating, consumption_averages): + + start_consumption = consumption_averages.loc[ + consumption_averages["current-energy-rating"] == start_rating, "total_consumption" + ].values[0] + end_consumption = consumption_averages.loc[ + consumption_averages["current-energy-rating"] == end_rating, "total_consumption" + ].values[0] + + percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100 + return percentage_decrease + + def estimate_new_consumption(self, current_rating, target_rating, current_consumption): """ - Perform error analysis on the provided model and dataset. - - Parameters: - - target: The target variable to analyze. - - top_n: Number of top residuals to consider for analysis. - - unique_threshold: Threshold to exclude columns with high unique values. - - Returns: - - summary: Dictionary summarizing common features among poorly performing rows. + Given then consumption_averages dataset, which is produced as a result of the data_combining.py script, + for the energy kwh models, this function will estimate the new consumption based on the current consumption, + based on the expected reduction in consumption from the current rating to the target rating. + :param current_rating: + :param target_rating: + :param current_consumption: + :param df: + :return: """ - - # Calculate predictions and residuals - y_train_pred = self.models[target].predict(self.x_train[target]) - y_test_pred = self.models[target].predict(self.x_test[target]) - - train_residuals = self.y_train[target] - y_train_pred - test_residuals = self.y_test[target] - y_test_pred - - # Identify top N poorly performing rows by absolute residuals - top_train_indices = train_residuals.abs().nlargest(top_n).index - top_test_indices = test_residuals.abs().nlargest(top_n).index - - top_train_data = self.input_data.loc[top_train_indices] - top_test_data = self.input_data.loc[top_test_indices] - - # Automatically detect and exclude columns - def exclude_columns(data, threshold): - exclude_cols = [] - num_rows = data.shape[0] - for col in data.columns: - if data[col].dtype == 'object' and data[col].nunique() / num_rows >= threshold: - exclude_cols.append(col) - return exclude_cols - - exclude_cols = exclude_columns(top_train_data, unique_threshold) - - top_train_data = top_train_data.drop(columns=exclude_cols) - top_test_data = top_test_data.drop(columns=exclude_cols) - - # One-hot encode categorical variables - categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist() - top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True) - top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True) - - # Ensure all original columns are included in the encoded data - top_train_data_encoded = top_train_data_encoded.reindex(columns=self.input_data.columns, fill_value=0) - top_test_data_encoded = top_test_data_encoded.reindex(columns=self.input_data.columns, fill_value=0) - - # Correlation analysis with residuals - train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices]) - test_corr = top_test_data_encoded.corrwith(test_residuals.loc[top_test_indices]) - - # Return summaries - summary = { - "train_summary": top_train_data.describe(include='all').T, - "test_summary": top_test_data.describe(include='all').T, - "train_corr": train_corr, - "test_corr": test_corr, - "top_train_data": top_train_data, - "top_test_data": top_test_data - } - - return summary - - -# Usage: -cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" -) - -cleaned = msgpack.unpackb(cleaned, raw=False) - -model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2) -model.read_dataset('energy_consumption/2024-07-05/energy_consumption_dataset.parquet') -model.feature_engineering() - -# For heating_kwh -model.split_dataset(target='heating_kwh') -model.fit_model(target='heating_kwh') -model.re_train_final_model(target='heating_kwh') -evaluation_results = model.evaluate_model(target='heating_kwh') - -pprint(evaluation_results["train"]) -pprint(evaluation_results["test"]) - -importance_df = evaluation_results["train"]["Feature Importance"] -testing_predictions = model.testing_predictions["heating_kwh"] -testing_predictions = testing_predictions.sort_values("residual", ascending=False) -training_predictions = model.training_predictions["heating_kwh"] -training_predictions = training_predictions.sort_values("residual", ascending=False) -# Merge on model.input_data, by the index -merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True) -merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True) - -# For hot_water_kwh -model.split_dataset(target='hot_water_kwh') -model.fit_model(target='hot_water_kwh') -model.re_train_final_model(target='hot_water_kwh') -evaluation_results = model.evaluate_model(target='hot_water_kwh') -pprint(evaluation_results["train"]) -pprint(evaluation_results["test"]) + percentage_decrease = self.calculate_percentage_decrease( + current_rating, target_rating, self.consumption_averages + ) + new_consumption = current_consumption * (1 - percentage_decrease / 100) + return new_consumption diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index 4d913e8f..be141c01 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -133,7 +133,7 @@ def app(): energy_consumption_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 - if i < 36: + if i < 110: continue data = pd.read_csv(directory / "certificates.csv", low_memory=False) diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py index a111ecf2..11366360 100644 --- a/etl/bill_savings/data_combining.py +++ b/etl/bill_savings/data_combining.py @@ -91,3 +91,14 @@ def app(): file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet", df=df ) + + # We also estimate the energy consumption reduction from this data, by band + df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"] + consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index() + + # Save the consumption averages back to s3 + save_dataframe_to_s3_parquet( + bucket_name="retrofit-data-dev", + file_key=f"energy_consumption/{run_date}/consumption_averages.parquet", + df=consumption_averages + ) diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py new file mode 100644 index 00000000..df60298b --- /dev/null +++ b/etl/bill_savings/training.py @@ -0,0 +1,57 @@ +from pprint import pprint +import msgpack +from utils.s3 import read_from_s3 +from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel + + +def handler(): + """ + This function is used to train the model and store the final models in s3 as pickles + :return: + """ + + dataset_version = "2024-07-08" + + # Usage: + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + + cleaned = msgpack.unpackb(cleaned, raw=False) + + model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2) + model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet') + model.feature_engineering() + model.save_dummy_schema(dataset_version=dataset_version) + + # For heating_kwh + model.split_dataset(target='heating_kwh') + model.fit_model(target='heating_kwh') + model.re_train_final_model(target='heating_kwh') + evaluation_results = model.evaluate_model(target='heating_kwh') + + pprint(evaluation_results["train"]) + pprint(evaluation_results["test"]) + + model.save_model(target='heating_kwh', dataset_version=dataset_version) + + # importance_df = evaluation_results["train"]["Feature Importance"] + # testing_predictions = model.testing_predictions["heating_kwh"] + # testing_predictions = testing_predictions.sort_values("residual", ascending=False) + # training_predictions = model.training_predictions["heating_kwh"] + # training_predictions = training_predictions.sort_values("residual", ascending=False) + # # Merge on model.input_data, by the index + # merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True) + # merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True) + + # For hot_water_kwh + model.split_dataset(target='hot_water_kwh') + model.fit_model(target='hot_water_kwh') + model.re_train_final_model(target='hot_water_kwh') + evaluation_results = model.evaluate_model(target='hot_water_kwh') + + pprint(evaluation_results["train"]) + pprint(evaluation_results["test"]) + + model.save_model(target='hot_water_kwh', dataset_version=dataset_version) diff --git a/etl/customers/places_for_people/demo_portfolio.py b/etl/customers/places_for_people/demo_portfolio.py new file mode 100644 index 00000000..2d48eff3 --- /dev/null +++ b/etl/customers/places_for_people/demo_portfolio.py @@ -0,0 +1,294 @@ +import pandas as pd + +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 83 +SECOND_PORTFOLIO_ID = 84 +USER_ID = 8 + + +def app(): + # TODO: We can insert a variable, indicating the they own all of the units in the building + asset_list = [ + { + "address": "Flat 1, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140644, + "building_id": 1, + }, + { + "address": "Flat 2, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140645, + "building_id": 1, + }, + { + "address": "Flat 3, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140646, + "building_id": 1, + }, + { + "address": "Flat 4, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140647, + "building_id": 1, + }, + { + "address": "Flat 5, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140648, + "building_id": 1, + }, + { + "address": "Flat 6, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140649, + "building_id": 1, + } + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "budget": None, + "exclusions": ["floor_insulation"] + } + print(body) + + # Get an example of flats with solar panels from epc data + + # import inspect + # import pandas as pd + # from tqdm import tqdm + # from pathlib import Path + # + # src_file_path = inspect.getfile(lambda: None) + # + # EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" + # + # epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] + # + # directory = epc_directories[1] + # data = pd.read_csv(directory / "certificates.csv", low_memory=False) + # # Get flats + # data = data[data["PROPERTY_TYPE"].str.lower().str.contains("flat")] + # data = data[~pd.isnull(data["UPRN"])] + # data["UPRN"] = data["UPRN"].astype(int).astype(str) + # data = data[pd.to_datetime(data["LODGEMENT_DATE"]) > "2020-01-01"] + # flats_with_solar = data[data['PHOTO_SUPPLY'] > 0] + # + # print(flats_with_solar["UPRN"]) + # + # flats_with_solar[["ADDRESS", "UPRN"]] + # + # # Good example: + # # UPRN: 10013160824, Flat 39, The Meadow, 30 Busk Meadow S5 7JH (care home with 39 flats, have solar panels) + # # + # # Mostly, For a mid-floor flat, the property doesn't show as having solar panels through the photo_supply variable + # # But actually for UPRN: 10013245713, Apartment 4, Orchard House, Gill Lane PR4 5QN, this has a dwelling above + # # but the photo_supply variable is 20 + # + # # Small flat consisting of 2 units + # # UPRN: 42172953, FLAT 2, 276 CLAUGHTON ROAD, BIRKENHEAD CH41 4DX + # + # # Flat containing 5 units + # # UPRN: 10013247127 Flat 1, Old Church House PR4 5GE + # # UPRN: 10013247130 Flat 4, Old Church House PR4 5GE + # + # # Flat containing multiple units: + # # UPRNS: 10013245710, 10013245716, 10013245711, 10013245717, 10013245714, 10013245715, 10013245712, 10013245713 + # + # # Look for flats with air source heat pumps! + # flats_with_asps = data[data["MAINHEAT_DESCRIPTION"].str.lower().str.contains("air source heat pump")] + # print(flats_with_asps[["UPRN", "ADDRESS"]]) + + +def app_epc_b(): + # TODO: We can insert a variable, indicating the they own all of the units in the building + asset_list = [ + { + "address": "Flat 1, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140644, + "building_id": 1, + }, + { + "address": "Flat 2, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140645, + "building_id": 1, + }, + { + "address": "Flat 3, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140646, + "building_id": 1, + }, + { + "address": "Flat 4, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140647, + "building_id": 1, + }, + { + "address": "Flat 5, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140648, + "building_id": 1, + }, + { + "address": "Flat 6, Fenton Court", + "postcode": "N2 8DS", + "uprn": 200140649, + "building_id": 1, + } + ] + + non_invasive_recommendations = [ + { + "address": "Flat 1, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + # 'air_source_heat_pump' + ] + }, + { + "address": "Flat 2, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + # 'air_source_heat_pump' + ] + }, + { + "address": "Flat 3, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + # 'air_source_heat_pump' + ] + }, + { + "address": "Flat 4, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + # 'air_source_heat_pump' + ] + }, + { + "address": "Flat 5, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + 'loft_insulation', + # 'air_source_heat_pump' + ] + }, + { + "address": "Flat 6, Fenton Court", + "postcode": "N2 8DS", + 'recommendations': [ + 'cavity_extract_and_refill', + 'loft_insulation', + # 'air_source_heat_pump' + ] + }, + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{SECOND_PORTFOLIO_ID}/non_intrusives.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store non-invasive recommendations in S3 + non_invasive_recommendations_filename = f"{USER_ID}/{SECOND_PORTFOLIO_ID}/non_invasive_recommendations.json" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + + body = { + "portfolio_id": str(SECOND_PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "budget": None, + "exclusions": ["floor_insulation"] + } + print(body) + + # Get an example of flats with solar panels from epc data + + # import inspect + # import pandas as pd + # from tqdm import tqdm + # from pathlib import Path + # + # src_file_path = inspect.getfile(lambda: None) + # + # EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" + # + # epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] + # + # directory = epc_directories[1] + # data = pd.read_csv(directory / "certificates.csv", low_memory=False) + # # Get flats + # data = data[data["PROPERTY_TYPE"].str.lower().str.contains("flat")] + # data = data[~pd.isnull(data["UPRN"])] + # data["UPRN"] = data["UPRN"].astype(int).astype(str) + # data = data[pd.to_datetime(data["LODGEMENT_DATE"]) > "2020-01-01"] + # flats_with_solar = data[data['PHOTO_SUPPLY'] > 0] + # + # print(flats_with_solar["UPRN"]) + # + # flats_with_solar[["ADDRESS", "UPRN"]] + # + # # Good example: + # # UPRN: 10013160824, Flat 39, The Meadow, 30 Busk Meadow S5 7JH (care home with 39 flats, have solar panels) + # # + # # Mostly, For a mid-floor flat, the property doesn't show as having solar panels through the photo_supply variable + # # But actually for UPRN: 10013245713, Apartment 4, Orchard House, Gill Lane PR4 5QN, this has a dwelling above + # # but the photo_supply variable is 20 + # + # # Small flat consisting of 2 units + # # UPRN: 42172953, FLAT 2, 276 CLAUGHTON ROAD, BIRKENHEAD CH41 4DX + # + # # Flat containing 5 units + # # UPRN: 10013247127 Flat 1, Old Church House PR4 5GE + # # UPRN: 10013247130 Flat 4, Old Church House PR4 5GE + # + # # Flat containing multiple units: + # # UPRNS: 10013245710, 10013245716, 10013245711, 10013245717, 10013245714, 10013245715, 10013245712, 10013245713 + # + # # Look for flats with air source heat pumps! + # flats_with_asps = data[data["MAINHEAT_DESCRIPTION"].str.lower().str.contains("air source heat pump")] + # print(flats_with_asps[["UPRN", "ADDRESS"]]) diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py index bbc46754..280ba968 100644 --- a/etl/customers/vander_elliot/non_intrusives.py +++ b/etl/customers/vander_elliot/non_intrusives.py @@ -119,11 +119,12 @@ def app(): "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increase EPC", - "goal_value": "A", + "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": already_installed_filename, "patches_file_path": "", "non_invasive_recommendations_file_path": "", + "exclusions": ["wall_insulation", "air_source_heat_pump"], "budget": None, } print(body) diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index bc3bfd91..0601d6ec 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -40,7 +40,7 @@ VARIABLE_DATA_FEATURES = ( COMPONENT_FEATURES + ROOM_FEATURES + EFFICIENCY_FEATURES - # + POTENTIAL_COLUMNS + + POTENTIAL_COLUMNS + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE] ) COST_FEATURES = [x.lower() for x in COST_FEATURES] @@ -288,9 +288,11 @@ class EPCPipeline: for x in variable_data.to_dict(orient="records") ] - # TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be part of the EPCRecord + # TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be + # part of the EPCRecord - # We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all permutations of records + # We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all + # permutations of records property_difference_records = self._generate_property_difference_records( epc_records, uprn, directory, fixed_data ) @@ -311,7 +313,8 @@ class EPCPipeline: property_difference_records: list = [] - # property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_difference_records) + # property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, + # property_difference_records) property_difference_records = self._compare_all_permutation_epcs( epc_records, uprn, directory, fixed_data, property_difference_records @@ -353,7 +356,9 @@ class EPCPipeline: if not difference_record.ensure_adequate_data(): # Rdsap hasn't changed but we have enough data to use this record # i.e. all fields aside from mechnical ventilation are the same] - # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record}) + # self.check_records.append({"uprn": uprn, "directory_name": directory.name, + # "difference_record": difference_record, "earliest_record": earliest_record, + # "latest_record": latest_record}) continue all_equal = difference_record.compare_fields_in_records( @@ -402,7 +407,8 @@ class EPCPipeline: if not difference_record.ensure_adequate_data(): # Rdsap hasn't changed but we have enough data to use this record # i.e. all fields aside from mechnical ventilation are the same] - # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record}) + # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": + # difference_record, "earliest_record": earliest_record, "latest_record": latest_record}) continue all_equal = difference_record.compare_fields_in_records( diff --git a/etl/epc/Record.py b/etl/epc/Record.py index b8471ccf..cc70d42b 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -79,10 +79,10 @@ class EPCRecord: lighting_cost_current: float = None heating_cost_current: float = None hot_water_cost_current: float = None - # potential_energy_efficiency: float = None - # environment_impact_potential: float = None - # energy_consumption_potential: float = None - # co2_emissions_potential: float = None + potential_energy_efficiency: float = None + environment_impact_potential: float = None + energy_consumption_potential: float = None + co2_emissions_potential: float = None lodgement_date: str = None current_energy_efficiency: int = None energy_consumption_current: int = None @@ -255,18 +255,18 @@ class EPCRecord: self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"] self.heating_cost_current: float = self.prepared_epc["heating_cost_current"] self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"] - # self.potential_energy_efficiency: float = float( - # self.prepared_epc["potential_energy_efficiency"] - # ) - # self.environment_impact_potential: float = float( - # self.prepared_epc["environment_impact_potential"] - # ) - # self.energy_consumption_potential: float = float( - # self.prepared_epc["energy_consumption_potential"] - # ) - # self.co2_emissions_potential: float = float( - # self.prepared_epc["co2_emissions_potential"] - # ) + self.potential_energy_efficiency: float = float( + self.prepared_epc["potential_energy_efficiency"] + ) + self.environment_impact_potential: float = float( + self.prepared_epc["environment_impact_potential"] + ) + self.energy_consumption_potential: float = float( + self.prepared_epc["energy_consumption_potential"] + ) + self.co2_emissions_potential: float = float( + self.prepared_epc["co2_emissions_potential"] + ) self.lodgement_date: str = self.prepared_epc["lodgement_date"] self.current_energy_efficiency: int = int( self.prepared_epc["current_energy_efficiency"] @@ -1056,18 +1056,18 @@ class EPCDifferenceRecord: "heating_cost_ending": self.record2.get("heating_cost_current"), "hot_water_cost_starting": self.record1.get("hot_water_cost_current"), "hot_water_cost_ending": self.record2.get("hot_water_cost_current"), - # "potential_energy_efficiency": self.earliest_record.get( - # "potential_energy_efficiency" - # ), - # "environment_impact_potential": self.earliest_record.get( - # "environment_impact_potential" - # ), - # "energy_consumption_potential": self.earliest_record.get( - # "energy_consumption_potential" - # ), - # "co2_emissions_potential": self.earliest_record.get( - # "co2_emissions_potential" - # ), + "potential_energy_efficiency": self.earliest_record.get( + "potential_energy_efficiency" + ), + "environment_impact_potential": self.earliest_record.get( + "environment_impact_potential" + ), + "energy_consumption_potential": self.earliest_record.get( + "energy_consumption_potential" + ), + "co2_emissions_potential": self.earliest_record.get( + "co2_emissions_potential" + ), **ending_record, **starting_record, } diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 68870841..ce459528 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -18,23 +18,23 @@ regional_labour_variations = [ {"Region": "Northern Ireland", "Adjustment_Factor": 0.76} ] -# This data is based on the MCS database +# This data is based on the MCS database - taken the figures for June 2024 MCS_SOLAR_PV_COST_DATA = { - "last_updated": "2024-06-10", - "average_cost_per_kwh": 1750, - "average_cost_per_kwh-Outer London": 1776, - "average_cost_per_kwh-Inner London": 1776, - "average_cost_per_kwh-South East England": 1672, - "average_cost_per_kwh-South West England": 1732, - "average_cost_per_kwh-East of England": 1721, + "last_updated": "2024-07-10", + "average_cost_per_kwh": 1825, + "average_cost_per_kwh-Outer London": 1950, + "average_cost_per_kwh-Inner London": 1950, + "average_cost_per_kwh-South East England": 1966, + "average_cost_per_kwh-South West England": 1864, + "average_cost_per_kwh-East of England": 1719, "average_cost_per_kwh-East Midlands": 1730, - "average_cost_per_kwh-West Midlands": 1761, - "average_cost_per_kwh-North East England": 1669, - "average_cost_per_kwh-North West England": 1764, - "average_cost_per_kwh-Yorkshire and the Humber": 1705, - "average_cost_per_kwh-Wales": 1896, - "average_cost_per_kwh-Scotland": 1767, - "average_cost_per_kwh-Northern Ireland": 1767, + "average_cost_per_kwh-West Midlands": 1789, + "average_cost_per_kwh-North East England": 1872, + "average_cost_per_kwh-North West England": 1860, + "average_cost_per_kwh-Yorkshire and the Humber": 1789, + "average_cost_per_kwh-Wales": 1676, + "average_cost_per_kwh-Scotland": 1781, + "average_cost_per_kwh-Northern Ireland": 1347, } # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, @@ -92,6 +92,12 @@ CONDENSING_BOILER_COSTS = { "40kw": 1625 } +# Electric boiler prices base on +# https://www.greenmatch.co.uk/boilers/combi-boilers/electric-combi-boilers +# https://www.tlc-direct.co.uk/Products/ERMAC15.html +# The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800 +ELECTRIC_BOILER_COSTS = 1800 + # Assumes 3 hours to remove each heater (including re-decorating) ROOM_HEATER_REMOVAL_COST = 120 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 @@ -1037,13 +1043,13 @@ class Costs: vat = total_cost - subtotal_before_vat # Labour hours are based on estimates from online research but an average team seems to consist of 3 people - # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of + # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of # labour return { "total": total_cost, "subtotal": subtotal_before_vat, "vat": vat, - "labour_hours": 72, + "labour_hours": 48, "labour_days": 2, } @@ -1145,6 +1151,25 @@ class Costs: "labour_days": 1, } + def cylinder_thermostat(self): + """ + Calculate the cost of installing a cylinder thermostat + """ + + # The £200 cost is a rough estimate based on internet research + total_cost = 200 + subtotal_before_vat = total_cost / (1 + self.VAT_RATE) + vat = total_cost - subtotal_before_vat + + # We estimate the labour hours to be 2 + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": 2, + "labour_days": 1, + } + def hot_water_tank_insulation(self): """ Calculate the cost of installing hot water tank insulation @@ -1285,7 +1310,7 @@ class Costs: estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators) return round(estimated_radiators) - def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): + def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms, is_electric=False): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler First time central heating vosts can als be found here: @@ -1293,7 +1318,10 @@ class Costs: :return: """ - unit_cost = CONDENSING_BOILER_COSTS[size] + if not is_electric: + unit_cost = CONDENSING_BOILER_COSTS[size] + else: + unit_cost = ELECTRIC_BOILER_COSTS # The unit cost is the cost without VAT # We now need to estimate the cost of the works labour_days = 2 @@ -1307,8 +1335,6 @@ class Costs: # Add contingency and preliminaries labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES) - # labour_days = labour_days + (removal_labour_hours / 8) - vat = labour_cost * self.VAT_RATE subtotal_before_vat = unit_cost + labour_cost diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 3f764d83..9faedb89 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -227,6 +227,11 @@ class FloorRecommendations(Definitions): "new_u_value": new_u_value, "sap_points": None, "already_installed": already_installed, + "description_simulation": { + "floor-description": "Solid, insulated" if + material["type"] == "solid_floor_insulation" + else "Suspended, insulated" + }, **cost_result } ) diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index 76da6c37..1aae3973 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -35,6 +35,10 @@ class HeatingControlRecommender: return + if heating_description in ["Boiler and radiators, electric"]: + self.recommend_roomstat_programmer_trvs() + return + if heating_description in ["Air source heat pump, radiators, electric"]: self.recommend_time_temperature_zone_controls() @@ -186,7 +190,11 @@ class HeatingControlRecommender: "new_u_value": None, "sap_points": None, "already_installed": already_installed, - "simulation_config": simulation_config + "simulation_config": simulation_config, + "description_simulation": { + "mainheatcont-description": "Programmer, room thermostat and TRVS", + "mainheatc-energy-eff": "Good" + } } ) @@ -246,6 +254,10 @@ class HeatingControlRecommender: "new_u_value": None, "sap_points": None, "already_installed": already_installed, - "simulation_config": simulation_config + "simulation_config": simulation_config, + "description_simulation": { + "mainheatcont-description": "Time and temperature zone control", + "mainheatc-energy-eff": "Very Good" + } } ) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index ac8c4973..0afdc18f 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -42,18 +42,21 @@ class HeatingRecommender: return self.has_electric_heating_description or electric_heating_assumed - def recommend(self, has_cavity_or_loft_recommendations, phase=0): + def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None): """ Produces heating recommendations :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation recommendation. If there are cavity or loft recommendations, the property would need to complete those measures before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to :param phase: indicates the phase of the retrofit programme + :param exclusions: A list of exclusions for the recommendations """ # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this # in the Costs class, stored as SYSTEM_FLUSH_COST + + exclusions = [] if exclusions is None else exclusions self.heating_recommendations = [] self.heating_control_recommendations = [] @@ -112,14 +115,84 @@ class HeatingRecommender: # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions # and either allow or prevent the recommendation of an air source heat pump - if self.is_ashp_valid(): + if self.is_ashp_valid(exclusions=exclusions): self.recommend_air_source_heat_pump( phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations ) return - def is_ashp_valid(self): + def recommend_electric_boiler_upgrade(self, phase): + + # Small initial scope, just handles the case of properties that have electric boilers where the efficiency + # is poor or very poor + # We recommend upgrading to a new electric boiler + + recommendation_phase = phase + + if self.property.data["mainheat-energy-eff"] not in ["Poor", "Very Poor"]: + return + + hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"] + hotwater_from_cylinder = self.property.hotwater["clean_description"] in [ + "From main system, no cylinder thermostat" + ] + # if the hotwater is from the mains, we probably have a combi boiler so we recommend a new electric boiler + + if hotwater_from_mains: + description = f"Upgrade to a higher efficiency electric boiler" + + simulation_config = { + "mainheat_energy_eff_ending": "Average", + "hot_water_energy_eff_ending": "Average" + } + + boiler_costs = self.costs.boiler( + size=None, + exising_room_heaters=False, + system_change=False, + n_heated_rooms=self.property.data["number-heated-rooms"], + n_rooms=self.property.number_of_rooms, + is_electric=True + ) + + already_installed = "heating" in self.property.already_installed + if already_installed: + boiler_costs = override_costs(boiler_costs) + description = "Heating system has already been upgraded, no further action needed." + + boiler_recommendation = { + "phase": recommendation_phase, + "parts": [], + "type": "heating", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": already_installed, + "simulation_config": simulation_config, + **boiler_costs + } + + controls_recommender = HeatingControlRecommender(self.property) + controls_recommender.recommend(heating_description="Boiler and radiators, electric") + + self.heating_recommendations.extend([boiler_recommendation] + controls_recommender.recommendation) + return + + if hotwater_from_cylinder: + # We recommend a change from a system boiler, with a cylinder to a combi boiler + description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi " + "boiler") + + def is_ashp_valid(self, exclusions): + + if "air_source_heat_pump" in self.property.non_invasive_recommendations: + return True + + if "air_source_heat_pump" in exclusions: + return False + suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"] has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] @@ -169,6 +242,12 @@ class HeatingRecommender: "mainheat_energy_eff_ending": "Good", "hot_water_energy_eff_ending": "Good" } + description_simulation = { + "mainheat-description": "Air source heat pump, radiators, electric", + "mainheat-energy-eff": "Good", + "hot-water-energy-eff": "Good", + "hotwater-description": "From main system", + } # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process() @@ -178,6 +257,10 @@ class HeatingRecommender: fuel_ending_config = {} if self.property.main_fuel["fuel_type"] != "electricity": fuel_ending_config = MainFuelAttributes("electricity (not community)").process() + description_simulation = { + **description_simulation, + "main-fuel": "electricity (not community)" + } # Check the simulation differences heating_simulation_config = check_simulation_difference( @@ -207,6 +290,12 @@ class HeatingRecommender: **controls_recommender.recommendation[0]["simulation_config"] } + description_simulation = { + **description_simulation, + "mainheatcont-description": "time and temperature zone control", + "mainheatc-energy-eff": "Very Good" + } + ashp_recommendation = { "phase": phase, "parts": [ @@ -219,6 +308,7 @@ class HeatingRecommender: "sap_points": None, "already_installed": already_installed, "simulation_config": simulation_config, + "description_simulation": description_simulation, **ashp_costs } @@ -458,6 +548,19 @@ class HeatingRecommender: return closest_size + @staticmethod + def estimate_electric_boiler_size(num_heated_rooms): + """ + We use the approach similar to as defined in + https://www.greenmatch.co.uk/boilers/combi-boilers/electric-combi-boilers + Instead of radiators as a proxy, we do the number of heated rooms + + :param num_heated_rooms: The number of heated rooms in the property + :return: + """ + + return max(num_heated_rooms * 1.5, 6) + def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py index 9c5c7045..0d34c894 100644 --- a/recommendations/HotwaterRecommendations.py +++ b/recommendations/HotwaterRecommendations.py @@ -1,6 +1,7 @@ from backend.Property import Property from recommendations.Costs import Costs -from recommendations.recommendation_utils import override_costs +from recommendations.recommendation_utils import override_costs, check_simulation_difference +from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes class HotwaterRecommendations: @@ -34,10 +35,15 @@ class HotwaterRecommendations: self.recommend_tank_insulation(phase=phase) return + if self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat": + self.recommend_cylinder_thermostat(phase=phase) + return + def recommend_tank_insulation(self, phase): """ If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water - tank. This is a very simple and cost effective improvement that can be made to the home. + tank. This is a very simple and cost effective improvement that can be made to the home. It will likely + take the efficiency from very poor to poor. """ recommendation_cost = self.costs.hot_water_tank_insulation() @@ -62,7 +68,56 @@ class HotwaterRecommendations: "sap_points": None, "already_installed": already_installed, **recommendation_cost, - "simulation_config": {"hot_water_energy_eff_ending": "Average"} + "simulation_config": {"hot_water_energy_eff_ending": "Poor"}, + "description_simulation": { + "hot-water-energy-eff": "Poor" + } + } + ) + return + + def recommend_cylinder_thermostat(self, phase): + """ + If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water + tank. This is a very simple and cost effective improvement that can be made to the home. + """ + + recommendation_cost = self.costs.cylinder_thermostat() + + already_installed = "cylinder_thermostat" in self.property.already_installed + if already_installed: + recommendation_cost = override_costs(recommendation_cost) + description = "Cylinder thermostat has already been installed, no further action required" + else: + description = "Install a smart cylinder thermostat on the hot water tank" + + new_epc_description = "From main system" + hotwater_ending_config = HotWaterAttributes(new_epc_description).process() + hotwater_simulation_config = check_simulation_difference( + new_config=hotwater_ending_config, old_config=self.property.hotwater + ) + + simulation_config = { + "hot_water_energy_eff_ending": self.property.data["hot-water-energy-eff"], + **hotwater_simulation_config + } + + self.recommendations.append( + { + "phase": phase, + "parts": [], + "type": "cylinder_thermostat", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": already_installed, + **recommendation_cost, + "simulation_config": simulation_config, + "description_simulation": { + "hot-water-energy-eff": self.property.data["hot-water-energy-eff"], + "hotwater-description": new_epc_description, + } } ) return diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py index 31720579..1186b0a9 100644 --- a/recommendations/LightingRecommendations.py +++ b/recommendations/LightingRecommendations.py @@ -109,8 +109,12 @@ class LightingRecommendations: # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to # the proportion of lights that will be set to low energy "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2), - "heat_demand": heat_demand_change, + "kwh_savings": heat_demand_change, "co2_equivalent_savings": carbon_change, + "description_simulation": { + "lighting-energy-eff": "Very Good", + "lighting-description": "Low energy lighting in all fixed outlets", + }, **cost_result } ] diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 827ca928..fcdd513f 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -1,3 +1,4 @@ +import pandas as pd from backend.Property import Property from typing import List from itertools import groupby @@ -13,6 +14,7 @@ from recommendations.HeatingRecommender import HeatingRecommender from recommendations.HotwaterRecommendations import HotwaterRecommendations from recommendations.SecondaryHeating import SecondaryHeating from backend.ml_models.AnnualBillSavings import AnnualBillSavings +from backend.apis.GoogleSolarApi import GoogleSolarApi class Recommendations: @@ -117,7 +119,9 @@ class Recommendations: has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0 self.heating_recommender.recommend( - phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations + phase=phase, + has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations, + exclusions=self.exclusions ) if ( self.heating_recommender.heating_recommendations or @@ -221,6 +225,7 @@ class Recommendations: has_u_value = recommendations_by_type[0].get("new_u_value") is not None has_sap_points = recommendations_by_type[0].get("sap_points") is not None + has_rank = recommendations_by_type[0].get("rank") is not None # When check if these recommendations have two different types, such as solid wall insulation # If we have multiple types, we group by type and then select the best recommendation for each type @@ -238,6 +243,10 @@ class Recommendations: # Sort the options by the cost per SAP point improvement - the lower the better for rec in recommendations: rec["efficiency"] = rec["total"] / rec["sap_points"] + elif has_rank: + # Sort the options by rank - the lower the better + for rec in recommendations: + rec["efficiency"] = rec["rank"] else: # Sort the options by cost - the lower the better for rec in recommendations: @@ -270,8 +279,79 @@ class Recommendations: return property_recommendations + @staticmethod + def _calculate_appliance_solar_savings( + rec, property_instance, heating_kwh_reduction, hot_water_kwh_reduction, lighting_kwh_reduction + ): + """ + Calculates the impact on kwh and cost of installing solar panels on appliances + :param rec: The recommendation + :param property_instance: Instance of the Property class + :param heating_kwh_reduction: The kwh reduction from heating + :param hot_water_kwh_reduction: The kwh reduction from hot water + :param lighting_kwh_reduction: The kwh reduction from lighting + :return: + """ + + if rec["type"] != "solar_pv": + return 0, 0 + + if property_instance.solar_panel_configuration is None: + print("PLACEHOLDER ESTIMATES") + # 50% reduction average + kwh_reduction = property_instance.energy_consumption_estimates["adjusted"]["appliances"] * 0.5 + predicted_appliances_cost_reduction = kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP + return predicted_appliances_cost_reduction, kwh_reduction + + # Calulate the amount of energy the solar panel array will generate for this unit + unit_energy_consumption = ( + rec["initial_ac_kwh_per_year"] * + property_instance.solar_panel_configuration["unit_share_of_energy"] + ) + + unit_energy_utilised = unit_energy_consumption * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION + unit_energy_exported = unit_energy_consumption - unit_energy_utilised + unit_energy_exported_value = unit_energy_exported * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT + + # We assume that 50% of the energy generated will be used by the property without a battery + # to be conservative + + # of the energy utilised, some of it is used by heating, hot water and lighting so we + # remove that from the total + unit_energy_utilised -= ( + heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction + ) + unit_energy_utilised = 0 if unit_energy_utilised < 0 else unit_energy_utilised + + # This is how much energy the appliances will use after install + post_install_appliance_kwh = ( + property_instance.energy_consumption_estimates["adjusted"]["appliances"] - + unit_energy_utilised + ) + post_install_appliance_kwh = ( + 0 if post_install_appliance_kwh < 0 else post_install_appliance_kwh + ) + + predicted_appliances_kwh_reduction = ( + property_instance.energy_consumption_estimates["adjusted"]["appliances"] - + post_install_appliance_kwh + ) + + predicted_appliances_cost_reduction = unit_energy_exported_value + ( + predicted_appliances_kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP + ) + + return predicted_appliances_cost_reduction, predicted_appliances_kwh_reduction + @classmethod - def calculate_recommendation_impact(cls, property_instance, all_predictions, recommendations): + def calculate_recommendation_impact( + cls, + property_instance, + all_predictions, + recommendations, + representative_recommendations, + energy_consumption_client + ): """ Given predictions from the model apis, with method will update the recommendations with the predicted @@ -280,6 +360,8 @@ class Recommendations: :param property_instance: Instance of the Property class, for the home associated to property_id :param all_predictions: dictionary of predictions from the model apis :param recommendations: dictionary of recommendations for the property + :param representative_recommendations: dictionary of representative recommendations for the property + :param energy_consumption_client: Instance of the EnergyConsumptionClient class :return: """ @@ -292,6 +374,34 @@ class Recommendations: property_carbon_predictions = all_predictions["carbon_change_predictions"][ all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id) ].copy() + property_lighting_cost_predictions = all_predictions["lighting_cost_predictions"][ + all_predictions["lighting_cost_predictions"]["property_id"] == str(property_instance.id) + ].copy() + property_heating_cost_predictions = all_predictions["heating_cost_predictions"][ + all_predictions["heating_cost_predictions"]["property_id"] == str(property_instance.id) + ].copy() + property_hot_water_cost_predictions = all_predictions["hot_water_cost_predictions"][ + all_predictions["hot_water_cost_predictions"]["property_id"] == str(property_instance.id) + ].copy() + + # We apply adjustments to each of the heating costs + property_lighting_cost_predictions["adjusted_cost"] = property_lighting_cost_predictions["predictions"].apply( + lambda x: AnnualBillSavings.adjust_energy_to_metered( + x, current_epc_rating=property_instance.data["current-energy-rating"] + ) + ) + + property_heating_cost_predictions["adjusted_cost"] = property_heating_cost_predictions["predictions"].apply( + lambda x: AnnualBillSavings.adjust_energy_to_metered( + x, current_epc_rating=property_instance.data["current-energy-rating"] + ) + ) + + property_hot_water_cost_predictions["adjusted_cost"] = property_hot_water_cost_predictions["predictions"].apply( + lambda x: AnnualBillSavings.adjust_energy_to_metered( + x, current_epc_rating=property_instance.data["current-energy-rating"] + ) + ) property_recommendations = recommendations[property_instance.id].copy() @@ -299,33 +409,27 @@ class Recommendations: sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index() heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index() carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index() - - # The heat demand change is the difference between the starting heat demand and the value at the final phase - expected_heat_demand = property_instance.floor_area * ( - heat_phase_impact[heat_phase_impact["phase"] == max(heat_phase_impact["phase"])]["predictions"].values[0] + # lighting_cost_phase_impact = ( + # property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median( + # ).reset_index() + # ) + heating_cost_phase_impact = ( + property_heating_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index() ) - starting_heat_demand = ( - float(property_instance.data["energy-consumption-current"]) * property_instance.floor_area + hot_water_cost_phase_impact = ( + property_hot_water_cost_predictions.groupby("phase")[ + ["adjusted_cost", "predictions"] + ].median().reset_index() ) - # This is the unadjusted resulting heat demand - predicted_heat_demand_change = starting_heat_demand - expected_heat_demand - - # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are - # actually implemented - expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=expected_heat_demand, - current_epc_rating=property_instance.data["current-energy-rating"], - total_floor_area=property_instance.floor_area - ) - - adjusted_heat_demand_change = ( - property_instance.current_adjusted_energy - expected_adjusted_energy - ) - - # TODO: We should determine if the home is gas & electricity or just electricity - expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy) + representative_rec_ids = [ + rec["recommendation_id"] for rec in representative_recommendations[property_instance.id] + ] + phase_lighting_costs = {} + phase_kwh_figures = {} + bill_savings_list = [] + kwh_savings_list = [] for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: @@ -345,12 +449,163 @@ class Recommendations: rec["recommendation_id"] )]["predictions"].values[0] + # Lighting costs won't change unless we have a lighting recommendation + new_lighting_cost_data = property_lighting_cost_predictions[ + property_lighting_cost_predictions["recommendation_id"] == str(rec["recommendation_id"]) + ] + + new_lighting_cost = new_lighting_cost_data["adjusted_cost"].values[0] + new_lighting_cost_unadjusted = new_lighting_cost_data["predictions"].values[0] + + new_heating_cost_data = property_heating_cost_predictions[ + property_heating_cost_predictions["recommendation_id"] == str(rec["recommendation_id"]) + ] + + new_heating_cost = new_heating_cost_data["adjusted_cost"].values[0] + new_heating_cost_unadjusted = new_heating_cost_data["predictions"].values[0] + + new_hot_water_cost_data = property_hot_water_cost_predictions[ + property_hot_water_cost_predictions["recommendation_id"] == str(rec["recommendation_id"]) + ] + + new_hot_water_cost = new_hot_water_cost_data["adjusted_cost"].values[0] + new_hot_water_cost_unadjusted = new_hot_water_cost_data["predictions"].values[0] + if rec["phase"] == 0: predicted_sap_points = new_sap - float(property_instance.data["current-energy-efficiency"]) predicted_co2_savings = float(property_instance.data["co2-emissions-current"]) - new_carbon predicted_heat_demand = property_instance.floor_area * ( float(property_instance.data["energy-consumption-current"]) - new_heat_demand ) + + if rec["type"] == "lighting": + new_heating_cost = property_instance.energy_cost_estimates["adjusted"]["heating"] + new_hot_water_cost = property_instance.energy_cost_estimates["adjusted"]["hot_water"] + new_lighting_cost = min( + new_lighting_cost, property_instance.energy_cost_estimates["adjusted"]["lighting"] + ) + scoring_heating_cost = property_instance.energy_cost_estimates["unadjusted"]["heating"] + scoring_hot_water_cost = property_instance.energy_cost_estimates["unadjusted"]["hot_water"] + scoring_lighting_cost = min( + property_instance.energy_cost_estimates["unadjusted"]["lighting"], + new_lighting_cost_unadjusted + ) + else: + new_heating_cost = min( + new_heating_cost, property_instance.energy_cost_estimates["adjusted"]["heating"] + ) + new_hot_water_cost = min( + new_hot_water_cost, property_instance.energy_cost_estimates["adjusted"]["hot_water"] + ) + new_lighting_cost = property_instance.energy_cost_estimates["adjusted"]["lighting"] + + scoring_heating_cost = min( + property_instance.energy_cost_estimates["unadjusted"]["heating"], + new_heating_cost_unadjusted + ) + scoring_hot_water_cost = min( + property_instance.energy_cost_estimates["unadjusted"]["hot_water"], + new_hot_water_cost_unadjusted + ) + scoring_lighting_cost = property_instance.energy_cost_estimates["unadjusted"]["lighting"] + + predicted_heating_cost_reduction = ( + property_instance.energy_cost_estimates["adjusted"]["heating"] - new_heating_cost + ) + predicted_hot_water_cost_reduction = ( + property_instance.energy_cost_estimates["adjusted"]["hot_water"] - new_hot_water_cost + ) + + predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else ( + property_instance.energy_cost_estimates["adjusted"]["lighting"] - new_lighting_cost + ) + # We store this value for later + phase_lighting_costs[rec["phase"]] = { + "adjusted": new_lighting_cost, + "unadjusted": scoring_lighting_cost + } + + # We now predict the kwh savings using the xgb model + + simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy() + # The current heating, hot water and energy kwh should be based on the new, unadjusted + # costs for lighting, heating, hot water + simulation_epc["heating-cost-current"] = int(scoring_heating_cost) + simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost) + simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost) + # We predict with the energy consumption model + scoring_df = pd.DataFrame([simulation_epc]) + # Change columns from underscores to hyphens + scoring_df.columns = [ + x.lower().replace("_", "-") for x in scoring_df.columns + ] + for col in ["heating_kwh", "hot_water_kwh"]: + scoring_df[col] = None + + energy_consumption_client.data = None + new_heating_kwh = energy_consumption_client.score_new_data( + new_data=scoring_df, target="heating_kwh" + )[0] + + new_hot_water_kwh = energy_consumption_client.score_new_data( + new_data=scoring_df, target="hot_water_kwh" + )[0] + + # Adjust these figures + new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered( + new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"] + ) + new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered( + new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"] + ) + + heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else ( + property_instance.energy_consumption_estimates["adjusted"]["heating"] - new_heating_kwh_adjusted + ) + + hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else ( + property_instance.energy_consumption_estimates["adjusted"]["hot_water"] - + new_hot_water_kwh_adjusted + ) + + lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP + + ( + predicted_appliances_cost_reduction, + predicted_appliances_kwh_reduction + ) = cls._calculate_appliance_solar_savings( + rec=rec, + property_instance=property_instance, + heating_kwh_reduction=heating_kwh_reduction, + hot_water_kwh_reduction=hot_water_kwh_reduction, + lighting_kwh_reduction=lighting_kwh_reduction + ) + + kwh_reduction = ( + heating_kwh_reduction + + hot_water_kwh_reduction + + lighting_kwh_reduction + + predicted_appliances_kwh_reduction + ) + + predicted_bill_savings = ( + predicted_heating_cost_reduction + + predicted_hot_water_cost_reduction + + predicted_lighting_cost_reduction + + predicted_appliances_cost_reduction + ) + + phase_kwh_figures[rec["phase"]] = { + "adjusted": { + "heating": new_heating_kwh_adjusted, + "hot_water": new_hot_water_kwh_adjusted + }, + "unadjusted": { + "heating": new_heating_kwh, + "hot_water": new_hot_water_kwh + } + } + else: previous_phase = rec["phase"] - 1 predicted_sap_points = ( @@ -365,11 +620,173 @@ class Recommendations: new_heat_demand ) + if rec["type"] == "lighting": + # If we have a lighting recommendation, the heating, hot water and lighting costs will + # be from the previous phase - nothing will change + new_heating_cost = heating_cost_phase_impact[ + heating_cost_phase_impact["phase"] == previous_phase + ]["adjusted_cost"].values[0] + new_hot_water_cost = hot_water_cost_phase_impact[ + hot_water_cost_phase_impact["phase"] == previous_phase + ]["adjusted_cost"].values[0] + + new_lighting_cost = min( + new_lighting_cost, phase_lighting_costs[previous_phase]["adjusted"] + ) + # We also use the unadjusted costs for the scoring from the previous phase + scoring_heating_cost = heating_cost_phase_impact[ + heating_cost_phase_impact["phase"] == previous_phase + ]["predictions"].values[0] + scoring_hot_water_cost = hot_water_cost_phase_impact[ + hot_water_cost_phase_impact["phase"] == previous_phase + ]["predictions"].values[0] + scoring_lighting_cost = min( + new_lighting_cost_unadjusted, + phase_lighting_costs[previous_phase]["unadjusted"] + ) + else: + # Whereas for other recommendations, we use the new costs + new_heating_cost = min( + new_heating_cost, + heating_cost_phase_impact[ + heating_cost_phase_impact["phase"] == previous_phase + ]["adjusted_cost"].values[0] + ) + new_hot_water_cost = min( + new_hot_water_cost, + hot_water_cost_phase_impact[ + hot_water_cost_phase_impact["phase"] == previous_phase + ]["adjusted_cost"].values[0] + ) + new_lighting_cost = phase_lighting_costs[previous_phase]["adjusted"] + + scoring_heating_cost = min( + new_heating_cost_unadjusted, + heating_cost_phase_impact[ + heating_cost_phase_impact["phase"] == previous_phase + ]["predictions"].values[0] + ) + scoring_hot_water_cost = min( + new_hot_water_cost_unadjusted, + hot_water_cost_phase_impact[ + hot_water_cost_phase_impact["phase"] == previous_phase + ]["predictions"].values[0] + ) + scoring_lighting_cost = phase_lighting_costs[previous_phase]["unadjusted"] + + # We now estimate the adjusted cost savings for the recommendation + predicted_heating_cost_reduction = ( + heating_cost_phase_impact[heating_cost_phase_impact["phase"] == previous_phase][ + "adjusted_cost" + ].values[0] - new_heating_cost + ) + + predicted_hot_water_cost_reduction = ( + hot_water_cost_phase_impact[hot_water_cost_phase_impact["phase"] == previous_phase][ + "adjusted_cost" + ].values[0] - new_hot_water_cost + ) + + # Only lighting recommendations can have an impact here + predicted_lighting_cost_reduction = ( + phase_lighting_costs[previous_phase]["adjusted"] - new_lighting_cost + ) + + # We now predict the kwh savings using the xgb model - this is based on + # the new costs at this phase + + simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy() + # The current heating, hot water and energy kwh should be based on the new, unadjusted + # costs for lighting, heating, hot water + simulation_epc["heating-cost-current"] = int(scoring_heating_cost) + simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost) + simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost) + # We predict with the energy consumption model + scoring_df = pd.DataFrame([simulation_epc]) + # Change columns from underscores to hyphens + scoring_df.columns = [ + x.lower().replace("_", "-") for x in scoring_df.columns + ] + for col in ["heating_kwh", "hot_water_kwh"]: + scoring_df[col] = None + + energy_consumption_client.data = None + new_heating_kwh = energy_consumption_client.score_new_data( + new_data=scoring_df, target="heating_kwh" + )[0] + + new_hot_water_kwh = energy_consumption_client.score_new_data( + new_data=scoring_df, target="hot_water_kwh" + )[0] + + # Adjust these figures + new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered( + new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"] + ) + new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered( + new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"] + ) + + heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else ( + phase_kwh_figures[previous_phase]["adjusted"]["heating"] - new_heating_kwh_adjusted + ) + if heating_kwh_reduction < 0: + heating_kwh_reduction = 0 + + hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else ( + phase_kwh_figures[previous_phase]["adjusted"]["hot_water"] - new_hot_water_kwh_adjusted + ) + if hot_water_kwh_reduction < 0: + hot_water_kwh_reduction = 0 + + lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP + + ( + predicted_appliances_cost_reduction, + predicted_appliances_kwh_reduction + ) = cls._calculate_appliance_solar_savings( + rec=rec, + property_instance=property_instance, + heating_kwh_reduction=heating_kwh_reduction, + hot_water_kwh_reduction=hot_water_kwh_reduction, + lighting_kwh_reduction=lighting_kwh_reduction + ) + + # We now calculate the predicted_bill_savings + predicted_bill_savings = ( + predicted_heating_cost_reduction + predicted_hot_water_cost_reduction + + predicted_lighting_cost_reduction + predicted_appliances_cost_reduction + ) + + kwh_reduction = ( + heating_kwh_reduction + + hot_water_kwh_reduction + + lighting_kwh_reduction + + predicted_appliances_kwh_reduction + ) + + # We store this value for later + phase_lighting_costs[rec["phase"]] = { + "adjusted": new_lighting_cost, + "unadjusted": scoring_lighting_cost + } + + phase_kwh_figures[rec["phase"]] = { + "adjusted": { + "heating": new_heating_kwh_adjusted, + "hot_water": new_hot_water_kwh_adjusted + }, + "unadjusted": { + "heating": new_heating_kwh, + "hot_water": new_hot_water_kwh + } + } + if rec["type"] == "low_energy_lighting": # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2 rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT) rec["co2_equivalent_savings"] = min(predicted_co2_savings, rec["co2_equivalent_savings"]) - rec["heat_demand"] = min(predicted_heat_demand, rec["heat_demand"]) + rec["heat_demand"] = predicted_heat_demand else: rec["sap_points"] = predicted_sap_points rec["co2_equivalent_savings"] = predicted_co2_savings @@ -378,28 +795,22 @@ class Recommendations: # Round to 2 decimal places rec["sap_points"] = round(rec["sap_points"], 2) - # We now calculate the adjusted heat demand for this recommendation, which is simply the percentage - # of the total adjusted heat demand change. The percentage we use is this recommendation's percentage - # of the total heat demand per square meter change + rec["kwh_savings"] = kwh_reduction + rec["energy_cost_savings"] = predicted_bill_savings - rec["adjusted_heat_demand"] = adjusted_heat_demand_change * ( - rec["heat_demand"] / predicted_heat_demand_change - ) - # We make sure this is NOT below 0 - rec["adjusted_heat_demand"] = max(0, rec["adjusted_heat_demand"]) - - # Depending on the property's tarriff, we calculate the amount of energy savings this measure will bring - if property_instance.energy_source == "electricity": - rec["energy_cost_savings"] = AnnualBillSavings.estimate_electric(rec["adjusted_heat_demand"]) - elif property_instance.energy_source == "electricity_and_gas": - rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["adjusted_heat_demand"]) - else: - raise ValueError("Invalid value for energy source") + if rec["recommendation_id"] in representative_rec_ids: + bill_savings_list.append(predicted_bill_savings) + kwh_savings_list.append(kwh_reduction) if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or ( rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None): raise ValueError("sap points, co2 or heat demand is missing") + # We sum up the total savings for the property and that is our expected energy bill + + expected_energy_bill = property_instance.current_energy_bill - sum(bill_savings_list) + expected_adjusted_energy = property_instance.current_adjusted_energy - sum(kwh_savings_list) + return ( property_recommendations, expected_adjusted_energy, diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 81f514b1..a1f8c67c 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -23,6 +23,7 @@ class RoofRecommendations: # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of # loft insulation in place already, we do not recommend anything for the moment MINIMUM_LOFT_ISULATION_MM = 200 + MINIMUM_RECOMMENDED_LOFT_INSULATION = 280 # Flat roof should have at least 100mm of insulation MINIMUM_FLAT_ROOF_ISULATION_MM = 100 @@ -79,6 +80,11 @@ class RoofRecommendations: """ Check if the loft is already insulated """ + + # If we have a non-invasive recommendation for the loft insulation, we can assume that the loft is not insulated + if "loft_insulation" in self.property.non_invasive_recommendations: + return False + return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"] def recommend(self, phase): @@ -115,12 +121,17 @@ class RoofRecommendations: u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band}) self.estimated_u_value = u_value - if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE: + if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and ( + "loft_insulation" not in self.property.non_invasive_recommendations + ): # The Roof is already compliant return if self.property.roof["is_pitched"] or self.property.roof["is_flat"]: - self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase) + insulation_thickness = ( + 0 if "loft_insulation" not in self.property.non_invasive_recommendations else self.insulation_thickness + ) + self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase) return if self.property.roof["is_roof_room"]: @@ -200,7 +211,9 @@ class RoofRecommendations: # We make sure we hit a depth of 270mm. We should factor in any existing insulation if the # loft is already partially insulated. # Note: This requirement is only for loft insulation - if ((material["depth"] + insulation_thickness) < self.MINIMUM_LOFT_ISULATION_MM) and roof["is_pitched"]: + if ( + (material["depth"] + insulation_thickness) < self.MINIMUM_RECOMMENDED_LOFT_INSULATION + ) and roof["is_pitched"]: continue part_u_value = r_value_per_mm_to_u_value(material["depth"], material["r_value_per_mm"]) @@ -245,6 +258,35 @@ class RoofRecommendations: else: raise ValueError("Invalid material type") + # This is based on the values we have in the training data + valid_numeric_values = [ + 12, + 25, + 50, + 75, + 100, + 150, + 200, + 250, + 270, + 300, + 350, + 400, + ] + + proposed_depth = new_thickness + if new_thickness not in valid_numeric_values: + # Take the nearest value for scoring + proposed_depth = min( + valid_numeric_values, key=lambda x: abs(x - proposed_depth) + ) + + if proposed_depth >= 270: + new_efficiency = "Very Good" + else: + if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]: + new_efficiency = "Good" + recommendations.append( { "phase": phase, @@ -263,6 +305,10 @@ class RoofRecommendations: "sap_points": None, "already_installed": already_installed, "new_thickness": new_thickness, + "description_simulation": { + "roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation", + "roof-energy-eff": new_efficiency + }, **cost_result } ) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 458eae12..276573ec 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -79,6 +79,11 @@ class SolarPvRecommendations: ] def is_solar_pv_valid(self): + + # If the property is a flat but we are looking at building solar potential, we can include this + if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None): + return True + is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"] is_valid_roof_type = ( self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] @@ -90,6 +95,61 @@ class SolarPvRecommendations: return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv + def recommend_building_analysis(self, phase): + """ + This recommendation approach handles the case of producing solar PV recommendations at the building level, + across multiple flats. For these recommendations, we don't include the battery option since it's impractical + from a space perspective. + :return: + """ + + panel_performance = self.property.solar_panel_configuration["panel_performance"] + total_roof_area = ( + self.property.solar_panel_configuration["insights_data"]["solarPotential"]["wholeRoofStats"]["areaMeters2"] + ) + n_units = self.property.solar_panel_configuration["n_units"] + + # At a building level, we take a single configuration so that all properties a guaranteed to use + # the same configuration + best_configurations = panel_performance.head(1).reset_index(drop=True) + + for rank, recommendation_config in best_configurations.iterrows(): + roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100) + # Spread the cost to the individual units - adding a 20% contingency + total_cost = recommendation_config["total_cost"] / n_units + kw = np.floor(recommendation_config["array_warrage"] / 100) / 10 + # Default to a weeks work for a team of 3 people doing 8 hour days + labour_days = 5 + labour_hours = 3 * 8 * labour_days + + description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof " + "of the building") + + initial_ac_kwh_per_year = recommendation_config["initial_ac_kwh_per_year"] + + self.recommendation.append( + { + "phase": phase, + "parts": [], + "type": "solar_pv", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": False, + "total": total_cost, + "labour_days": labour_days, + "labour_hours": labour_hours, + # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale + # back up here + "photo_supply": roof_coverage_percent, + "has_battery": False, + "initial_ac_kwh_per_year": initial_ac_kwh_per_year, + "description_simulation": {"photo-supply": roof_coverage_percent}, + "rank": rank # Rank is used to get the representative recommendation - rank 0 will be chosen + } + ) + def recommend(self, phase): """ We check if a property is potentially suitable for solar PV based on the following criteria: @@ -102,6 +162,11 @@ class SolarPvRecommendations: if not self.is_solar_pv_valid(): return + # If we have a buiilding level analysis, we implement separate logic + if self.property.building_id is not None: + self.recommend_building_analysis(phase) + return + solar_pv_percentage = self.property.solar_pv_percentage # We round up to the neaest 10% solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10 @@ -179,6 +244,7 @@ class SolarPvRecommendations: # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale # back up here "photo_supply": 100 * roof_coverage, - "has_battery": has_battery + "has_battery": has_battery, + "description_simulation": {"photo-supply": 100 * roof_coverage}, } ) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 5b36bd9c..1120654a 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -72,7 +72,7 @@ class VentilationRecommendations(Definitions): "already_installed": already_installed, "sap_points": 0, "heat_demand": 0, - "adjusted_heat_demand": 0, + "kwh_savings": 0, "co2_equivalent_savings": 0, "energy_cost_savings": 0, "total": estimated_cost, diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index fb228b49..a1a1491b 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -252,7 +252,7 @@ class WallRecommendations(Definitions): self.estimated_u_value = u_value - if is_cavity_wall: + if is_cavity_wall or "cavity_extract_and_refill" in self.property.non_invasive_recommendations: if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE: # Test filling cavity self.find_cavity_insulation(u_value, insulation_thickness, phase) @@ -357,7 +357,7 @@ class WallRecommendations(Definitions): simulation_config = { **simulation_config, **walls_simulation_config, - "walls_thermal_transmittance_ending": new_u_value + "walls_thermal_transmittance_ending": new_u_value, } recommendations.append( @@ -378,6 +378,10 @@ class WallRecommendations(Definitions): "sap_points": None, "already_installed": already_installed, "simulation_config": simulation_config, + "description_simulation": { + "walls-description": "Cavity wall, filled cavity", + "walls-energy-eff": "Good" + }, **cost_result } ) diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 8c0cc493..29c75989 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -115,5 +115,12 @@ class WindowsRecommendations: "already_installed": already_installed, **cost_result, "is_secondary_glazing": is_secondary_glazing, + # TODO: Make this condition on is_secondary_glazing + "description_simulation": { + "multi-glaze-proportion": 100, + "windows-energy-eff": "Average", + "windows-description": "Fully double glazed", + "glazed-type": "double glazing installed during or after 2002", + } } ]