mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
378 lines
14 KiB
Python
378 lines
14 KiB
Python
import inspect
|
|
import pandas as pd
|
|
from etl.epc.settings import EARLIEST_EPC_DATE
|
|
from pathlib import Path
|
|
import numpy as np
|
|
from utils.s3 import save_csv_to_s3
|
|
|
|
src_file_path = inspect.getfile(lambda: None)
|
|
|
|
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
|
|
CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
|
|
|
|
USER_ID = 8
|
|
PORTFOLIO_ID = 90
|
|
|
|
|
|
def make_asset_list():
|
|
"""
|
|
Set up a small asset list for the study
|
|
"""
|
|
|
|
# Read in EPC data for Lewes
|
|
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
|
|
epc_data = pd.read_csv(lewes_directory, low_memory=False)
|
|
# Rename the columns to the same format as the api returns
|
|
epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
|
|
|
|
# Take just date before the date threshold
|
|
epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
|
|
|
|
epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
|
|
epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
|
|
# Take the newest EPC per uprn
|
|
epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
|
|
# /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
|
|
# We read in the multiple data sources
|
|
address_base = pd.read_csv(
|
|
f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv",
|
|
low_memory=False,
|
|
)
|
|
# Filter on resi
|
|
address_base = address_base[address_base["Primary Code Description"] == "Residential"]
|
|
address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str)
|
|
|
|
pv_potential = pd.read_csv(
|
|
f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv",
|
|
low_memory=False,
|
|
)
|
|
pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str)
|
|
|
|
ashp_potential = pd.read_csv(
|
|
f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv",
|
|
low_memory=False,
|
|
)
|
|
ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
|
|
|
|
ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
|
|
|
|
insulation_potential = pd.read_csv(
|
|
f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
|
|
low_memory=False,
|
|
)
|
|
insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str)
|
|
|
|
renewables_cost = pd.read_csv(
|
|
f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv",
|
|
low_memory=False,
|
|
)
|
|
renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str)
|
|
|
|
# Merge the EPC data onto address base
|
|
asset_list = address_base[
|
|
[
|
|
"UPRN", "Class Description", "Relative Height - Eaves",
|
|
]
|
|
].merge(
|
|
epc_data[
|
|
["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
|
|
"property-type", "built-form", "co2-emissions-current"]],
|
|
how="left",
|
|
left_on="UPRN",
|
|
right_on="uprn"
|
|
).drop(
|
|
columns=["uprn"]
|
|
).merge(
|
|
insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]],
|
|
how="left",
|
|
on="UPRN"
|
|
).rename(
|
|
columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
|
|
)
|
|
|
|
had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
|
|
below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
|
|
below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
|
|
had_an_epc["energy-efficiency-rating"].value_counts()
|
|
asset_list["current-energy-rating"].value_counts()
|
|
asset_list["co2-emissions-current"].mean()
|
|
# # Get the underlying data of a histograme
|
|
import matplotlib.pyplot as plt
|
|
n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
|
|
#
|
|
bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
|
|
#
|
|
# # Step 3: Calculate the frequency of data in each bin
|
|
hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
|
|
|
|
# Take properties below a B - there are 2844 units
|
|
asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
|
|
# Drop caravans
|
|
asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
|
|
asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
|
|
|
|
# Take a sample, for properties that have an EPC, with a seed
|
|
# asset_list = asset_list.sample(frac=0.5, random_state=42)
|
|
|
|
AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
|
|
|
|
def estimate_n_floors(
|
|
building_height, floor_height, address_base_property_description, epc_property_type,
|
|
):
|
|
|
|
if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
|
|
if epc_property_type in ["Flat"]:
|
|
return 1
|
|
if epc_property_type == "Maisonette":
|
|
return 2
|
|
return None
|
|
|
|
if pd.isnull(floor_height):
|
|
return np.round(building_height / AVG_FLOOR_HEIGHT)
|
|
|
|
return np.round(building_height / floor_height)
|
|
|
|
# Estimate the number of floors
|
|
asset_list["number_of_floors"] = asset_list.apply(
|
|
lambda x: estimate_n_floors(
|
|
building_height=x["Relative Height - Eaves"],
|
|
floor_height=x["floor-height"],
|
|
address_base_property_description=x["Class Description"],
|
|
epc_property_type=x["property-type"],
|
|
),
|
|
axis=1
|
|
)
|
|
# Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
|
|
asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
|
|
# Drop any entries with null insulation wall area
|
|
asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])]
|
|
|
|
# D 0.419929
|
|
# C 0.391459
|
|
# E 0.160142
|
|
# F 0.017794
|
|
# G 0.010676
|
|
|
|
# Total asset list:
|
|
# D 0.450409
|
|
# C 0.412016
|
|
# E 0.110203
|
|
# F 0.020263
|
|
# G 0.007110
|
|
|
|
# We do the followings:
|
|
# 1) Create final asset list
|
|
# 2) Create Non-intrusive recommendations
|
|
# 3) Create a third party costing object
|
|
|
|
cost_testing = renewables_cost.merge(
|
|
insulation_potential, how="inner", on="UPRN"
|
|
)
|
|
|
|
cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"]
|
|
# Their cavity wall insulation is £8 per m^2
|
|
|
|
cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
|
|
"Wall Area [m^2]"]
|
|
|
|
cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"]
|
|
|
|
cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[
|
|
"Building Area [m^2]"]
|
|
|
|
final_asset_list = asset_list.rename(
|
|
columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
|
|
)[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]
|
|
|
|
# Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources
|
|
non_invasive_recommendations = []
|
|
for _, row in final_asset_list.iterrows():
|
|
property_ashp_potential = ashp_potential[
|
|
(ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"]
|
|
]
|
|
property_pv_potential = pv_potential[
|
|
(pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"]
|
|
]
|
|
property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]]
|
|
|
|
property_non_invasive_recs = []
|
|
if not property_ashp_potential.empty:
|
|
|
|
if property_costs.empty:
|
|
similar_properties = ashp_potential[
|
|
ashp_potential["Overall Suitability Rating"] &
|
|
(ashp_potential["Recommended Heat Pump Size [kW]"] ==
|
|
property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0])
|
|
].merge(
|
|
renewables_cost, how="inner", on="UPRN"
|
|
)
|
|
property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T
|
|
|
|
property_non_invasive_recs.append(
|
|
{
|
|
"type": "air_source_heat_pump",
|
|
"suitable": True,
|
|
"size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
|
|
"cost": property_costs["Air Source Heat Pump - Total"].values[0],
|
|
"ashp_only_heating_recommendation": True
|
|
}
|
|
)
|
|
else:
|
|
property_non_invasive_recs.append(
|
|
{
|
|
"type": "air_source_heat_pump",
|
|
"suitable": False
|
|
}
|
|
)
|
|
|
|
if not property_pv_potential.empty:
|
|
property_non_invasive_recs.append(
|
|
{
|
|
"type": "solar_pv",
|
|
"suitable": True,
|
|
"array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
|
|
"initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
|
|
"panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
|
|
"cost": property_costs["Rooftop PV - Total"].values[0],
|
|
}
|
|
)
|
|
else:
|
|
property_non_invasive_recs.append(
|
|
{
|
|
"type": "solar_pv",
|
|
"suitable": False
|
|
}
|
|
)
|
|
|
|
non_invasive_recommendations.append(
|
|
{
|
|
"uprn": row["uprn"],
|
|
"recommendations": property_non_invasive_recs,
|
|
}
|
|
)
|
|
|
|
# Save the asset list
|
|
|
|
# Store the asset list in s3
|
|
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
|
|
save_csv_to_s3(
|
|
dataframe=final_asset_list,
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename
|
|
)
|
|
|
|
# Store non-invasive recommendations in S3
|
|
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(non_invasive_recommendations),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=non_invasive_recommendations_filename
|
|
)
|
|
|
|
# We add a patch to one of the units because there's no data for the built form
|
|
# We would be able to handle this automatically in the future, when using OS API
|
|
patches = [
|
|
{
|
|
"uprn": "10033266220",
|
|
"built-form": "Semi-Detached",
|
|
},
|
|
{'uprn': '10033266219', 'built-form': 'Semi-Detached'}
|
|
]
|
|
|
|
# Store patches in s3
|
|
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(patches),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=patches_filename
|
|
)
|
|
|
|
# Create three scenarios
|
|
body1 = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "A",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": patches_filename,
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
|
|
"multi_plan": True,
|
|
"exclusions": [
|
|
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
|
|
"lighting", "windows", "secondary_heating"
|
|
],
|
|
"budget": None,
|
|
}
|
|
print(body1)
|
|
|
|
body2 = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "A",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": patches_filename,
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
|
|
"multi_plan": True,
|
|
"exclusions": [
|
|
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
|
|
],
|
|
"budget": None,
|
|
}
|
|
print(body2)
|
|
|
|
# 2.5 - full fabric, no decant
|
|
body2_5 = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "A",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": patches_filename,
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
|
|
"multi_plan": True,
|
|
"exclusions": [
|
|
"internal_wall_insulation", "floor_insulation", "heating", "solar_pv",
|
|
],
|
|
"budget": None,
|
|
}
|
|
print(body2_5)
|
|
|
|
# Scenario B
|
|
body3 = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "A",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": patches_filename,
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
|
|
"multi_plan": True,
|
|
"exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
|
|
"budget": None,
|
|
}
|
|
print(body3)
|
|
|
|
# Scenario 4 - deep fabric, no IWI, floor
|
|
body4 = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "A",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": patches_filename,
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"scenario_name": "Whole House",
|
|
"multi_plan": True,
|
|
"budget": None,
|
|
}
|
|
print(body4)
|