Model/etl/customers/mod/pilot/1. Create Sample.py
2025-03-19 18:50:21 +00:00

205 lines
6.4 KiB
Python

import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from asset_list.utils import get_data
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 139
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
Given the sample data and additonal properties, this function prepares the data
:return:
"""
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme"
sample_list = pd.read_excel(f"{folder_path}/20250227_DIO_Accommodation_Sample_Properties.xlsx")
asset_data = pd.read_excel(f"{folder_path}/20250303_DIO_Accommodation_Property_Attribution.xlsx")
sample_list = sample_list[sample_list["BLDNG_COUNTRY_NAME"].isin(["ENGLAND", "WALES"])]
# Merge on the UPRN
sample_list = sample_list.merge(
asset_data[["BLDNG_ID", "BLNDG_GOVERMENT_UPRN"]].drop_duplicates(),
how="left", on="BLDNG_ID"
)
sample_list["BLNDG_GOVERMENT_UPRN"] = sample_list["BLNDG_GOVERMENT_UPRN"].astype("Int64")
# Use the EPC API to get corrected postcodes
model_asset_list = []
missed = []
for _, x in tqdm(sample_list.iterrows(), total=len(sample_list)):
if pd.isnull(x["BLNDG_GOVERMENT_UPRN"]):
continue
searcher = SearchEpc(
address1="",
postcode="",
uprn=x["BLNDG_GOVERMENT_UPRN"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=""
)
searcher.find_property(skip_os=True)
newest_epc = searcher.newest_epc
if newest_epc is None:
missed.append(x["BLNDG_GOVERMENT_UPRN"])
continue
model_asset_list.append(newest_epc)
model_asset_list = pd.DataFrame(model_asset_list)
model_asset_list["uprn"] = model_asset_list["uprn"].astype(int)
spatial_data = OpenUprnClient.get_spatial_data(
uprns=model_asset_list["uprn"].tolist(), bucket_name="retrofit-data-dev"
)
# We determine if the building is listed, heritage or in a conservation area
# Merge on the property features
features = asset_data.drop(
columns=["BUILDING_SYSTEM_ITEM_NAME", "OBSERVED_CONDITION_DESCRIPTION"]
).drop_duplicates()
df = features.merge(
model_asset_list, how="inner", right_on="uprn", left_on="BLNDG_GOVERMENT_UPRN"
).merge(
pd.DataFrame(spatial_data).rename(columns={"UPRN": "uprn"}), how="left", on="uprn"
)
# Store data locally
# df.to_csv(folder_path + "/MOD property data.csv", index=False)
# Produce as asset list for analysis
df["row_id"] = df.index
epc_data, errors, no_epc = get_data(
df=df,
manual_uprn_map={},
epc_auth_token=EPC_AUTH_TOKEN,
uprn_column="uprn",
fulladdress_column="address",
address1_column="address1",
postcode_column="postcode",
property_type_column=None,
built_form_column=None,
epc_api_only=False,
row_id_name="row_id",
)
non_invasive_recommendations = []
for x in epc_data:
non_invasive_recommendations.append(
{
"uprn": x["uprn"],
"recommendations": x["find_my_epc_data"]["recommendations"]
}
)
# also include the floor area
asset_list = df[
["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", "BLDNG_MSRMNT_VAL"]
].rename(
columns={
"address1": "address",
"NUMBER_OF_BEDROOMS": "n_bedrooms",
"BLDNG_STOREYS_QTY": "number_of_floors",
"BLDNG_MSRMNT_VAL": "floor_area"
}
)
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Scenario 1 - EPC C
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC C",
"multi_plan": True,
"budget": None,
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)
# Scenario 2 - EPC B
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC B",
"multi_plan": True,
"budget": None,
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)
# Scenario 3 - EPC B, 3.5 COP ASHP
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC B - 3.5 COP ASHP",
"multi_plan": True,
"budget": None,
"ashp_cop": 3.5
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)