import inspect import pandas as pd from etl.epc.settings import EARLIEST_EPC_DATE from pathlib import Path import numpy as np from utils.s3 import save_csv_to_s3 src_file_path = inspect.getfile(lambda: None) EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data" USER_ID = 8 PORTFOLIO_ID = 90 def make_asset_list(): """ Set up a small asset list for the study """ # Read in EPC data for Lewes lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv" epc_data = pd.read_csv(lewes_directory, low_memory=False) # Rename the columns to the same format as the api returns epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns] # Take just date before the date threshold epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE] epc_data = epc_data[~pd.isnull(epc_data["uprn"])] epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str) # Take the newest EPC per uprn epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index() # /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/ # We read in the multiple data sources address_base = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv", low_memory=False, ) # Filter on resi address_base = address_base[address_base["Primary Code Description"] == "Residential"] address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str) pv_potential = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv", low_memory=False, ) pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str) ashp_potential = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv", low_memory=False, ) ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str) ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze() insulation_potential = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv", low_memory=False, ) insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str) renewables_cost = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv", low_memory=False, ) renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str) # Merge the EPC data onto address base asset_list = address_base[ [ "UPRN", "Class Description", "Relative Height - Eaves", ] ].merge( epc_data[ ["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height", "property-type", "built-form", "co2-emissions-current"]], how="left", left_on="UPRN", right_on="uprn" ).drop( columns=["uprn"] ).merge( insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]], how="left", on="UPRN" ).rename( columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"} ) had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])] below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape had_an_epc["energy-efficiency-rating"].value_counts() asset_list["current-energy-rating"].value_counts() asset_list["co2-emissions-current"].mean() # # Get the underlying data of a histograme import matplotlib.pyplot as plt n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7) # bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10 # # # Step 3: Calculate the frequency of data in each bin hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins) # Take properties below a B - there are 2844 units asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80] # Drop caravans asset_list = asset_list[asset_list["Class Description"] != "Caravan"] asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])] # Take a sample, for properties that have an EPC, with a seed # asset_list = asset_list.sample(frac=0.5, random_state=42) AVG_FLOOR_HEIGHT = asset_list["floor-height"].median() def estimate_n_floors( building_height, floor_height, address_base_property_description, epc_property_type, ): if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)": if epc_property_type in ["Flat"]: return 1 if epc_property_type == "Maisonette": return 2 return None if pd.isnull(floor_height): return np.round(building_height / AVG_FLOOR_HEIGHT) return np.round(building_height / floor_height) # Estimate the number of floors asset_list["number_of_floors"] = asset_list.apply( lambda x: estimate_n_floors( building_height=x["Relative Height - Eaves"], floor_height=x["floor-height"], address_base_property_description=x["Class Description"], epc_property_type=x["property-type"], ), axis=1 ) # Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])] # Drop any entries with null insulation wall area asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])] # D 0.419929 # C 0.391459 # E 0.160142 # F 0.017794 # G 0.010676 # Total asset list: # D 0.450409 # C 0.412016 # E 0.110203 # F 0.020263 # G 0.007110 # We do the followings: # 1) Create final asset list # 2) Create Non-intrusive recommendations # 3) Create a third party costing object cost_testing = renewables_cost.merge( insulation_potential, how="inner", on="UPRN" ) cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"] # Their cavity wall insulation is £8 per m^2 cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[ "Wall Area [m^2]"] cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"] cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[ "Building Area [m^2]"] final_asset_list = asset_list.rename( columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"} )[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]] # Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources non_invasive_recommendations = [] for _, row in final_asset_list.iterrows(): property_ashp_potential = ashp_potential[ (ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"] ] property_pv_potential = pv_potential[ (pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"] ] property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]] property_non_invasive_recs = [] if not property_ashp_potential.empty: if property_costs.empty: similar_properties = ashp_potential[ ashp_potential["Overall Suitability Rating"] & (ashp_potential["Recommended Heat Pump Size [kW]"] == property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0]) ].merge( renewables_cost, how="inner", on="UPRN" ) property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T property_non_invasive_recs.append( { "type": "air_source_heat_pump", "suitable": True, "size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0], "cost": property_costs["Air Source Heat Pump - Total"].values[0], "ashp_only_heating_recommendation": True } ) else: property_non_invasive_recs.append( { "type": "air_source_heat_pump", "suitable": False } ) if not property_pv_potential.empty: property_non_invasive_recs.append( { "type": "solar_pv", "suitable": True, "array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000, "initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0], "panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0], "cost": property_costs["Rooftop PV - Total"].values[0], } ) else: property_non_invasive_recs.append( { "type": "solar_pv", "suitable": False } ) non_invasive_recommendations.append( { "uprn": row["uprn"], "recommendations": property_non_invasive_recs, } ) # Save the asset list # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( dataframe=final_asset_list, bucket_name="retrofit-plan-inputs-dev", file_name=filename ) # Store non-invasive recommendations in S3 non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" save_csv_to_s3( dataframe=pd.DataFrame(non_invasive_recommendations), bucket_name="retrofit-plan-inputs-dev", file_name=non_invasive_recommendations_filename ) # We add a patch to one of the units because there's no data for the built form # We would be able to handle this automatically in the future, when using OS API patches = [ { "uprn": "10033266220", "built-form": "Semi-Detached", }, {'uprn': '10033266219', 'built-form': 'Semi-Detached'} ] # Store patches in s3 patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" save_csv_to_s3( dataframe=pd.DataFrame(patches), bucket_name="retrofit-plan-inputs-dev", file_name=patches_filename ) # Create three scenarios body1 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "scenario_name": "Demand Reduction - no solid wall, windows, LEDs", "multi_plan": True, "exclusions": [ "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv", "lighting", "windows", "secondary_heating" ], "budget": None, } print(body1) body2 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "scenario_name": "Demand Reduction - no solid wall, floors or heating", "multi_plan": True, "exclusions": [ "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv", ], "budget": None, } print(body2) # 2.5 - full fabric, no decant body2_5 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "scenario_name": "Demand Reduction - no solid wall, floors or heating", "multi_plan": True, "exclusions": [ "internal_wall_insulation", "floor_insulation", "heating", "solar_pv", ], "budget": None, } print(body2_5) # Scenario B body3 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors", "multi_plan": True, "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"], "budget": None, } print(body3) # Scenario 4 - deep fabric, no IWI, floor body4 = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "A", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "scenario_name": "Whole House", "multi_plan": True, "budget": None, } print(body4)