From 743422e8fec13381c552f177a1caad15cedd7471 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 11 Jun 2024 18:23:19 +0100 Subject: [PATCH] Parity comparison investiagtion, stonewater wip --- backend/Property.py | 67 ++++++ backend/SearchEpc.py | 17 +- backend/apis/GoogleSolarApi.py | 211 +++++++++++++++++- backend/app/plan/router.py | 38 ---- backend/ml_models/Valuation.py | 3 + etl/customers/goldman/property_ownership.py | 76 +++++++ .../northern_gorup/test_asset_list.py | 43 ++++ .../places_for_people/parity_comparison.py | 164 ++++++++++++++ etl/customers/stonewater/shdf_3_clustering.py | 71 ++++++ recommendations/Costs.py | 30 +-- recommendations/SolarPvRecommendations.py | 7 +- recommendations/WallRecommendations.py | 2 +- 12 files changed, 666 insertions(+), 63 deletions(-) create mode 100644 etl/customers/northern_gorup/test_asset_list.py create mode 100644 etl/customers/places_for_people/parity_comparison.py diff --git a/backend/Property.py b/backend/Property.py index 6336e42d..3599f21b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -162,6 +162,9 @@ class Property: self.current_energy_bill = None self.expected_energy_bill = None + self.heating_energy_source = None + self.hot_water_energy_source = None + self.recommendations_scoring_data = [] self.parse_kwargs(kwargs) @@ -585,6 +588,7 @@ class Property: floor_area_decile_thresholds=floor_area_decile_thresholds, ) self.set_energy_source() + self.find_energy_sources() def set_spatial(self, spatial: pd.DataFrame): """ @@ -993,3 +997,66 @@ class Property: # Set the energy source based on the conditions above self.energy_source = energy_source + + def find_energy_sources(self): + # Based on the heating and the hot water + heating_fuel_mapping = { + 'has_mains_gas': 'Natural Gas', + 'has_electric': 'Electricity', + 'has_oil': 'Oil', + 'has_wood_logs': 'Wood Logs', + 'has_coal': 'Coal', + 'has_anthracite': 'Anthracite', + 'has_smokeless_fuel': 'Smokeless Fuel', + 'has_lpg': 'LPG', + 'has_b30k': 'B30K Biofuel', + 'has_air_source_heat_pump': 'Electricity', + 'has_ground_source_heat_pump': 'Electricity', + 'has_water_source_heat_pump': 'Electricity', + 'has_electric_heat_pump': 'Electricity', + 'has_solar_assisted_heat_pump': 'Electricity', + 'has_exhaust_source_heat_pump': 'Electricity', + 'has_community_heat_pump': 'Electricity', + 'has_wood_pellets': 'Wood Pellets', + 'has_community_scheme': 'Varied (Community Scheme)' + } + + # Hot water + heater_type_to_fuel = { + 'gas instantaneous': 'Natural Gas', + 'electric heat pump': 'Electricity', + 'electric immersion': 'Electricity', + 'gas boiler': 'Natural Gas', + 'oil boiler': 'Oil', + 'electric instantaneous': 'Electricity', + 'gas multipoint': 'Natural Gas', + 'heat pump': 'Electricity', + 'solid fuel boiler': 'Solid Fuel', + 'solid fuel range cooker': 'Solid Fuel', + 'room heaters': 'Varied' # Could be any fuel, further specifics needed based on context + } + + # Define a mapping from system types to general categories or modifications of fuel types + system_type_modification = { + 'from main system': 'Main System', + 'from secondary system': 'Secondary System', + 'from second main heating system': 'Secondary System', + 'community scheme': 'Community Scheme' + } + + self.heating_energy_source = [ + fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False) + ] + if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1: + raise Exception("Investigate em") + + self.heating_energy_source = self.heating_energy_source[0] + + if self.hotwater["heater_type"] is not None: + self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]] + else: + fuel = system_type_modification[self.hotwater["system_type"]] + if fuel == 'Main System': + self.hot_water_energy_source = self.heating_energy_source + else: + raise Exception("Investiage me") diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 9724ffd1..275669cc 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -434,7 +434,8 @@ class SearchEpc: self, initial_postcode: str, lmks_to_drop: list[str] | None = None, built_form: str = "", - property_type: str = "" + property_type: str = "", + exclude_old: bool = False ): """ Fetches and processes EPC data for a given initial postcode, applying successive trimming @@ -453,6 +454,7 @@ class SearchEpc: :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data. :param built_form: The 'built-form' value to be used for filtering the EPC data. :param property_type: The 'property-type' value to be used for filtering the EPC data. + :param exclude_old: Flag to exclude EPC data older than 10 years. :return: """ @@ -483,6 +485,13 @@ class SearchEpc: if not epc_data.empty: # Further processing of the EPC data epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce') + + if exclude_old: + # Exclude EPC data older than 10 years + epc_data = epc_data[ + epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10)) + ] + epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1) epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1)) epc_data["numeric_house_number"] = epc_data["house_number"].apply( @@ -583,7 +592,8 @@ class SearchEpc: initial_postcode=self.postcode, lmks_to_drop=lmks_to_drop, built_form=built_form, - property_type=property_type + property_type=property_type, + exclude_old=exclude_old ) # If we have missing lodgment date, we fill it with inspection-date @@ -591,9 +601,6 @@ class SearchEpc: # If we still have missing dates, we set it to the mean of the non NA dates epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean()) - if exclude_old: - epc_data = epc_data[epc_data["lodgement-datetime"] > pd.Timestamp.now() - pd.DateOffset(years=10)] - # For each attribute, we need to determine the datatype and use an appropriate method # to estimate. estimated_epc = {} diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 205a3560..8ee7017e 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -1,10 +1,15 @@ +import pandas as pd + from backend.Property import Property from backend.SearchEpc import SearchEpc from etl.epc.Record import EPCRecord from dotenv import load_dotenv -from utils.s3 import read_dataframe_from_s3_parquet +from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 import os import requests +import msgpack +from functools import lru_cache +import time load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") @@ -13,6 +18,8 @@ EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") uprn = 100040099104 # This is for 353A, Hermitage Lane, ME16 9NT (one of the e.on properties) uprn = 200000964454 +# This is for 14 Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY +uprn = 100050346517 cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", @@ -49,6 +56,25 @@ p = Property( p.get_spatial_data(uprn_filenames) +cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" +) + +cleaned = msgpack.unpackb(cleaned, raw=False) + +from etl.solar.SolarPhotoSupply import SolarPhotoSupply + +photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + +p.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds +) +p.hot_water_energy_source +p.heating_energy_source + longitude = p.spatial["longitude"] latitude = p.spatial["latitude"] @@ -73,14 +99,29 @@ from pprint import pprint pprint(solar_potential) +# This is the maximum number of panels that can be installed +solar_potential["maxArrayPanelsCount"] + # This is the size of the panels used in the calculation - 400 watt solar_potential["panelCapacityWatts"] + # Height of the panels used solar_potential["panelHeightMeters"] + # Width of the panels used solar_potential["panelWidthMeters"] -solar_potential["wholeRoofStats"] +# This is the maximum area that can be covered by the panels +solar_potential["maxArrayAreaMeters2"] + +# This is the area of the roof +solar_potential["wholeRoofStats"]["areaMeters2"] + +# This is the area of the floor +solar_potential["wholeRoofStats"]["groundAreaMeters2"] + +solar_potential["solarPanelConfigs"][0] +solar_potential["solarPanelConfigs"][1] # Copy of response for testing - 6 Laura Close, Tintagel, PL34 0EB # {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833}, @@ -334,3 +375,169 @@ solar_potential["wholeRoofStats"] # 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 278.3281, 'segmentIndex': 1}]}, 'boundingBox': {'sw': {'latitude': # 50.6672904, 'longitude': -4.741778}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415536}}, # 'imageryQuality': 'MEDIUM', 'imageryProcessedDate': {'year': 2024, 'month': 4, 'day': 18}} + + +self = GoogleSolarApi(api_key=api_key) +import numpy as np +from recommendations.Costs import MCS_SOLAR_PV_COST_DATA + + +class GoogleSolarApi: + NORTH_FACING_AZIMUTH_RANGE = (-30, 30) + + def __init__(self, api_key, max_retries=5): + """ + Initialize the GoogleSolarApi class with the provided API key and maximum retries. + + :param api_key: The API key to authenticate requests to the Google Solar API. + :param max_retries: The maximum number of retries for the API request (default is 5). + """ + self.api_key = api_key + self.max_retries = max_retries + self.base_url = "https://solar.googleapis.com/v1" + + self.insights_data = None + self.roof_segments = [] + + # property attributes: + self.floor_area = None + self.roof_area = None + self.roof_segment_indexes = None + self.panel_area = None + + def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None): + """ + Make an API request to retrieve building insights based on the given longitude and latitude, with retry + mechanism. + + :param longitude: The longitude of the location. + :param latitude: The latitude of the location. + :param required_quality: The required quality of the data (default is "MEDIUM"). + :param max_retries: The maximum number of retries for the API request (default is None, which uses the + instance's max_retries). + :return: The JSON response containing the building insights data. + """ + if max_retries is None: + max_retries = self.max_retries + + insights_url = f"{self.base_url}/buildingInsights:findClosest" + params = { + 'location.latitude': f'{latitude:.5f}', + 'location.longitude': f'{longitude:.5f}', + 'requiredQuality': required_quality, + 'key': self.api_key + } + + attempt = 0 + while attempt < max_retries: + try: + response = requests.get(insights_url, params=params) + response.raise_for_status() # Raise an error for bad status codes + return response.json() + except requests.exceptions.RequestException as e: + attempt += 1 + print(f"Attempt {attempt} failed: {e}") + time.sleep(2 ** attempt) # Exponential backoff + if attempt >= max_retries: + raise + + @lru_cache(maxsize=128) + def get(self, longitude, latitude, required_quality="MEDIUM"): + """ + Wrapper function that calls get_building_insights and extracts roof segments, with caching. + + :param longitude: The longitude of the location. + :param latitude: The latitude of the location. + :param required_quality: The required quality of the data (default is "MEDIUM"). + :return: The JSON response containing the building insights data. + """ + + # TODO - can we make a request which includes the 30cm buffer from the edge of the roof? + self.insights_data = self.get_building_insights(longitude, latitude, required_quality) + + # Extract key data from the insights response + self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', []) + self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] + self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2'] + self.panel_area = ( + self.insights_data["solarPotential"]["panelHeightMeters"] * + self.insights_data["solarPotential"]["panelWidthMeters"] + ) + + # Automatically exclude north-facing segments + self.exclude_north_facing_segments() + + self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments] + + # We now start finding the solar panel configurations + self.optimise_solar_configuration() + + def optimise_solar_configuration(self): + """ + Optimise the solar panel configuration for the building. + :return: + """ + + # Remove any north facing roof segments + panel_performance = [] + for config in self.insights_data["solarPotential"]["solarPanelConfigs"]: + roof_segment_summaries = config["roofSegmentSummaries"] + # Filter on just the segments in self.roof_segment_indexes + roof_segment_summaries = [ + segment for segment in roof_segment_summaries if segment["segmentIndex"] in self.roof_segment_indexes + ] + + roi_summary = [] + for segment in roof_segment_summaries: + wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"] + generated_energy = segment["yearlyEnergyDcKwh"] + ratio = generated_energy / wattage + cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_energy / 1000) + roi_summary.append( + { + "segmentIndex": segment["segmentIndex"], + "wattage": wattage, + "generatedEnergy": generated_energy, + "ratio": ratio, + "n_panels": segment["panelsCount"], + "cost": cost + } + ) + + roi_summary = pd.DataFrame(roi_summary) + + weighted_ratio = np.average( + roi_summary["ratio"].values, weights=roi_summary["generatedEnergy"].values + ) + total_cost = roi_summary["cost"].sum() + total_energy = roi_summary["generatedEnergy"].sum() + + panel_performance.append( + { + "n_panels": roi_summary["n_panels"].sum(), + "total_energy": total_energy, + "total_cost": total_cost, + "weighted_ratio": weighted_ratio + } + ) + + panel_performance = pd.DataFrame(panel_performance) + panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False) + + def exclude_north_facing_segments(self): + """ + Filter out any north-facing roof segments from the roof_segments attribute. + + North-facing segments are defined as those with an azimuth between -30 and 30 degrees. + """ + + filtered_segments = [] + for segment_index, segment in enumerate(self.roof_segments): + segment["segmentIndex"] = segment_index + # Check if the segment is north-facing + if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]: + continue + + filtered_segments.append(segment) + + self.roof_segments = filtered_segments diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 91a5ce0d..9caab324 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1206,41 +1206,3 @@ def check_mds(results, input_properties, recommendations, optimise_measures): hhr_check = pd.DataFrame(hhr_check) return walls_check, hhr_check - - -from utils.s3 import read_dataframe_from_s3_parquet - -z = read_dataframe_from_s3_parquet( - bucket_name="retrofit-data-dev", - file_key="sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet" -) - -k = z[z["heat_demand_ending"] != z["heat_demand_starting"]] -k = k[k["walls_thermal_transmittance"] == k["walls_thermal_transmittance_ending"]] -k = k[k["roof_thermal_transmittance"] == k["roof_thermal_transmittance_ending"]] -k = k[k["floor_thermal_transmittance"] == k["floor_thermal_transmittance_ending"]] -ending_cols = [c for c in k.columns if "_ending" in c] -eg = k.head(2).tail(1).squeeze() - -diff = [] -for c in ending_cols: - split = c.split("_ending")[0] - if split + "_starting" in k.columns: - starting_col = split + "_starting" - else: - starting_col = split - - b4 = eg[starting_col] - after = eg[c] - if b4 != after: - diff.append( - { - "measure": split, - "starting": b4, - "ending": after - } - ) -diff = pd.DataFrame(diff) -eg["heat_demand_starting"] -eg["heat_demand_ending"] -eg["uprn"] diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index dd77fb4b..1af38194 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -90,6 +90,9 @@ class PropertyValuation: 41222760: 46_000, # Based on Zoopla 41222761: 270_000, # Based on Zoopla 41212534: 38_000, # Based on Zoopla + # Northern Group Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 10070868263: 194_000, # Based on Zoopla + 10070868244: 195_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 44fa7142..500963a1 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -527,3 +527,79 @@ def company_aggregation(): aggregation = aggregation.sort_values("Number of Properties", ascending=False) aggregation.to_excel("Company ownership aggregation.xlsx") + + +def prepare_anonymised_data(): + investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0) + investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0) + valuations = pd.read_excel("property value.xlsx", header=0) + + # Merge these datasets + df = investment_50m_properties.merge( + investment_epc_data[ + ["UPRN", "PROPERTY_TYPE", "BUILT_FORM", "TOTAL_FLOOR_AREA", "LODGEMENT_DATE", "POSTCODE"] + ].rename( + columns={ + "PROPERTY_TYPE": "Property Type", + "BUILT_FORM": "Property Archetype", + "TOTAL_FLOOR_AREA": "Total Floor Area", + "LODGEMENT_DATE": "Date EPC Lodged", + "POSTCODE": "Postcode on EPC" + } + ), + how="inner", + on="UPRN" + ).merge( + valuations.drop(columns=["ADDRESS", "POSTCODE"]).rename( + columns={ + "Zoopla Valuation": "Expected Valuation", + "Zoopla Lower Bound": "Valuation - Lower Bound", + "Zoopla Upper Bound": "Valuation - Upper Bound", + } + ), + how="inner", + on="UPRN" + ).rename( + columns={ + "CURRENT_ENERGY_RATING": "Current EPC", + "CURRENT_ENERGY_EFFICIENCY": "Current SAP Score", + "epc_address": "Address on EPC" + } + ).drop( + columns=["Title Number", "match_type", "UPRN"] + ) + + redacted_owner_names = df[["Company Registration No. (1)"]].drop_duplicates() + redacted_owner_names["Owner"] = ["Owner" + str(i) for i in range(1, len(redacted_owner_names) + 1)] + + df = df.merge( + redacted_owner_names, how="left", on="Company Registration No. (1)" + ) + + df = df.drop(columns=["Company Registration No. (1)", "Proprietor Name (1)", "Property Address"]) + df = df.sort_values(["Owner", "Date EPC Lodged"], ascending=False) + + redacted_index = [] + for _, owner_properties in df.groupby("Owner"): + top_50_percent = round(owner_properties.shape[0] / 2 + 0.00001) + indexes = owner_properties.tail( + owner_properties.shape[0] - top_50_percent + ).index + + redacted_index.extend(indexes.tolist()) + + import numpy as np + # Redact addresses and postcodes + df["Address on EPC"] = np.where( + df.index.isin(redacted_index), + "Redacted", + df["Address on EPC"] + ) + + df["Postcode on EPC"] = np.where( + df.index.isin(redacted_index), + "Redacted", + df["Postcode on EPC"] + ) + + df.to_excel("Property List - 50% redacted.xlsx", index=False) diff --git a/etl/customers/northern_gorup/test_asset_list.py b/etl/customers/northern_gorup/test_asset_list.py new file mode 100644 index 00000000..46a4bb75 --- /dev/null +++ b/etl/customers/northern_gorup/test_asset_list.py @@ -0,0 +1,43 @@ +import pandas as pd +from utils.s3 import save_csv_to_s3 + +USER_ID = 8 +PORTFOLIO_ID = 81 + + +def app(): + asset_list = [ + { + 'uprn': 10070868263, + "address": "Apartment 307, Flint Glass Wharf", + "postcode": "M4 6AD", + }, + { + 'uprn': 10070868244, + "address": "Apartment 106, Flint Glass Wharf", + "postcode": "M4 6AD", + } + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/etl/customers/places_for_people/parity_comparison.py b/etl/customers/places_for_people/parity_comparison.py new file mode 100644 index 00000000..64ab8591 --- /dev/null +++ b/etl/customers/places_for_people/parity_comparison.py @@ -0,0 +1,164 @@ +""" +This script is used to pull together some case studies for the Parity Projects comparison +""" + +import pandas as pd +from backend.SearchEpc import SearchEpc +from dotenv import load_dotenv +import os + +load_dotenv("backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +parity_measures = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Places For People/Parity Sample All Addresses and Measures.xlsx", + sheet_name="Total Measures" +) + +solar_measures = parity_measures[parity_measures["Category"] == "SolarPV"] + +example_1 = parity_measures[ + parity_measures["Address Id (used by website)"] == 6125299 + ].copy() + +config = { + "address": "14 Victoria Road", + "postcode": "BD20 8SY", + "uprn": 100050346517 +} + +# Point 1: +# Parity tends to re-score the EPCs, even if they're extrememly recent. +# For example for '14, Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY' +# The most recent EPC was done 15 May 2023, and landed at a 66D, however for some reason, parity re-score this +# home to be a 63.91. It's unclear why this is done + +example_1_measures = example_1[["MeasureGroupName", "Individual SAP increase"]].copy() +# - LEDS: 0.25 SAP points +# - 300mm of loft insulation from 200mm: 0.43 SAP points - where is this deduced from? Since the latest survey +# indicates 250mm insulation in place +# - Check construction of unknown party wall and fill cavity if appropriate: 0.12 SAP points (highly speculative, +# not based on any data) +# - Block open chimneys: 1.61 SAP points - latest survey showed 0 open fireplaces +# - ASHP (45 degree emitters) with enhanced existing radiator central heating and hot water, from E rated gas boiler +# 6.38 SAP points +# - 4kWp PV array south and 30 degree pitch with no shading: 30.24 SAP points + +# Notes on solar - 30.34 seems like a lot +# 400 watt is the solar panel output +# Let's do a test for this property +# This would be 10 solar panels +# Using typical solar panel dimensions, this would be 19.63555m2 of roof space +# The area of the roof is between 60 - 64.5 m2 (we use a API to get the roof data), implying only +# around 30% of the roof is covered by solar panels +# Using our machine learning model to simulate the impact of this on SAP, this would more likely result in +# a + +from utils.s3 import read_dataframe_from_s3_parquet + +training_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", + file_key="sap_change_model/2024-06-09-10-36-53/dataset_rooms.parquet" +) +# Look for properties where the only difference is solar +ending_cols = [ + c for c in training_data.columns if "_ending" in c and "photo_supply" not in c +] +ending_cols = [ + c for c in ending_cols if + c not in ["sap_ending", "heat_demand_ending", "carbon_ending", "transaction_type_ending", "days_to_ending"] +] + +column_pairs = {} +for col in ending_cols: + starting = col.split("_ending")[0] + if starting + "_starting" in training_data.columns: + starting_col = starting + "_starting" + else: + starting_col = starting + + column_pairs[col] = starting_col + +filtered = training_data.copy() +# Take rows that had solar installs +filtered = filtered[filtered["photo_supply_ending"] != filtered["photo_supply_starting"]] +for ending_col, starting_col in column_pairs.items(): + filtered = filtered[filtered[ending_col] == filtered[starting_col]] + print(f"ending_col: {ending_col}, filtered shape: {filtered.shape}") + +avg_change = filtered.groupby("photo_supply_ending")["rdsap_change"].mean().reset_index() + +# I've take every single case of there being two EPCs for a property, where the only difference between the first +# and second is the solar installation. This is 2692 properties, across the UK. In only 4 instances has this resulted in +# 30 or more SAP points + + +# Some functions based on the SAP methodology: +import numpy as np + +total_floor_area = 50 +occupants = calculate_occupants(total_floor_area) +appliances_energy_use = estimate_electrical_appliances(occupants, total_floor_area) +cooking_energy_use = estimate_cooking(occupants) + + +def calculate_occupants(total_floor_area): + """ + From Table 1b + :param total_floor_area: + :return: + """ + return 1 + (1.76 * (1 - np.exp(-0.000349 * (total_floor_area - 13.9) * (total_floor_area - 13.9))) + 0.0013 * ( + total_floor_area - 13.9)) + + +def estimate_electrical_appliances(occupants, total_floor_area): + """ + From seciont L2 Electrical appliances + :param occupants: + :param total_floor_area: + :return: + """ + e_a = 207.8 * np.power(total_floor_area * occupants, 0.4717) + + days_in_month = { + 1: 31, + 2: 28, + 3: 31, + 4: 30, + 5: 31, + 6: 30, + 7: 31, + 8: 31, + 9: 30, + 10: 31, + 11: 30, + 12: 31 + } + + eam = 0 + for m in range(1, 13): + nm = days_in_month[m] + eam += e_a * (1 + 0.157 * np.cos(2 * np.pi * (m - 1.78) / 12)) * nm / 365 + + return eam + + +def estimate_cooking(occupants): + """ + From section L3 Cooking + :param occupants: + :return: + """ + + return 35 + 7 * occupants + + +primary_energy_per_m2 = 288 # kWh/m2 per year +primary_energy_regulated = primary_energy_per_m2 * total_floor_area + +primary_energy_factor_electricity = 1.1 # Example factor +primary_energy_appliances = appliances_energy_use * primary_energy_factor_electricity +primary_energy_cooking = cooking_energy_use * primary_energy_factor_electricity * 365 # Annualize cooking energy + +total_primary_energy_use = primary_energy_regulated + primary_energy_appliances diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py index f2ef9a8b..75917a55 100644 --- a/etl/customers/stonewater/shdf_3_clustering.py +++ b/etl/customers/stonewater/shdf_3_clustering.py @@ -593,3 +593,74 @@ def app(): # "City/Town": "city_town", # "County": "county", # "Address ID": "external_address_id", + + +def compile_data(): + """ + Various data sources have been produced to create the final data source for Stonewater. + This function combines them + :return: + """ + ######################################################################## + # Read in data + ######################################################################## + asset_list = read_excel_from_s3( + file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", + bucket_name="retrofit-data-dev", + header_row=4 + ) + + # TODO: Read in UPRNs + + ######################################################################## + # Prepare asset list + ######################################################################## + # TODO: Merge on UPRNs + # Drop the bottom 4 rows, which are completely missing + asset_list = asset_list.head(-4) + + # Keep just the columns we're interested in + asset_list = asset_list[ + [ + "Osm. ID", + "Org. ref.", + "Postcode", + "House no", + "Name", + "Address line 2", + "City/Town", + "County", + "Address ID", # This is not uprn + ] + ].rename( + columns={ + "Osm. ID": "internal_id", + "Org. ref.": "customer_asset_id", + "Postcode": "postcode", + "House no": "house_number", + "Name": "address1", + "Address line 2": "address2", + "City/Town": "city_town", + "County": "county", + "Address ID": "external_address_id", + } + ) + + # Create full address + asset_list["full_address"] = np.where( + ~pd.isnull(asset_list["address2"]), + ( + asset_list["address1"] + ", " + + asset_list["address2"] + ", " + + asset_list["city_town"].str.title() + ", " + + # asset_list["county"] + ", " + + asset_list["postcode"] + ), + asset_list["address1"] + ", " + + asset_list["city_town"].str.title() + ", " + + # asset_list["county"] + ", " + + asset_list["postcode"] + ) + + if pd.isnull(asset_list["full_address"]).sum(): + raise ValueError("Missing full addresses") diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 03190727..5f752730 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -20,21 +20,21 @@ regional_labour_variations = [ # This data is based on the MCS database MCS_SOLAR_PV_COST_DATA = { - "last_updated": "2024-01-04", - "average_cost_per_kwh": 2013.94, - "average_cost_per_kwh-Outer London": 2618.75, - "average_cost_per_kwh-Inner London": 2618.75, - "average_cost_per_kwh-South East England": 2083.33, - "average_cost_per_kwh-South West England": 2113, - "average_cost_per_kwh-East of England": 1973.86, - "average_cost_per_kwh-East Midlands": 1981.86, - "average_cost_per_kwh-West Midlands": 1926.55, - "average_cost_per_kwh-North East England": 2028.49, - "average_cost_per_kwh-North West England": 1620.42, - "average_cost_per_kwh-Yorkshire and the Humber": 2060.9, - "average_cost_per_kwh-Wales": 1898.83, - "average_cost_per_kwh-Scotland": 1967.97, - "average_cost_per_kwh-Northern Ireland": 2126.09, + "last_updated": "2024-06-10", + "average_cost_per_kwh": 1750, + "average_cost_per_kwh-Outer London": 1776, + "average_cost_per_kwh-Inner London": 1776, + "average_cost_per_kwh-South East England": 1672, + "average_cost_per_kwh-South West England": 1732, + "average_cost_per_kwh-East of England": 1721, + "average_cost_per_kwh-East Midlands": 1730, + "average_cost_per_kwh-West Midlands": 1761, + "average_cost_per_kwh-North East England": 1669, + "average_cost_per_kwh-North West England": 1764, + "average_cost_per_kwh-Yorkshire and the Humber": 1705, + "average_cost_per_kwh-Wales": 1896, + "average_cost_per_kwh-Scotland": 1767, + "average_cost_per_kwh-Northern Ireland": 1767, } # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index a9255370..458eae12 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -4,10 +4,13 @@ from recommendations.recommendation_utils import override_costs class SolarPvRecommendations: + # Solar panel specs based on Eurener 400s solar panels + # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono # Approximate area of the solar panels - SOLAR_PANEL_AREA = 1.6 + SOLAR_PANEL_AREA = 1.79 # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w - SOLAR_PANEL_WATTAGE = 250 + # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group + SOLAR_PANEL_WATTAGE = 400 MAX_SYSTEM_WATTAGE = 6000 MIN_SYSTEM_WATTAGE = 1000 diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 868c08c0..fb228b49 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -189,7 +189,7 @@ class WallRecommendations(Definitions): # recommend internal wall insulation as a possible measure u_value = self.property.walls["thermal_transmittance"] - u_value = None if math.isnan(u_value) else u_value + u_value = None if pd.isnull(u_value) else u_value is_cavity_wall = self.property.walls["is_cavity_wall"] insulation_thickness = self.property.walls["insulation_thickness"]