mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
425 lines
16 KiB
Python
425 lines
16 KiB
Python
import os
|
|
import time
|
|
import re
|
|
|
|
from etl.epc.settings import EARLIEST_EPC_DATE
|
|
from dotenv import load_dotenv
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
import numpy as np
|
|
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
|
from etl.spatial.OpenUprnClient import OpenUprnClient
|
|
from backend.SearchEpc import SearchEpc
|
|
from utils.s3 import save_csv_to_s3
|
|
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
USER_ID = 8
|
|
PORTFOLIO_ID = 117
|
|
CAHA_PORTFOLIO_ID = 118
|
|
|
|
|
|
def hornsey():
|
|
"""
|
|
This script prepares the asset lists for the additional housing associations, CAHA and Hornsey Housing Trust,
|
|
that are forming a consortium led by AIHA
|
|
:return:
|
|
"""
|
|
|
|
hornsey_asset_list = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
|
|
"Trust.xlsx",
|
|
sheet_name="Ksquared-All units information",
|
|
header=3
|
|
)
|
|
|
|
# We don't need the first row
|
|
hornsey_asset_list = hornsey_asset_list.iloc[1:]
|
|
# Fill NA values with empty strings
|
|
hornsey_asset_list = hornsey_asset_list.fillna("")
|
|
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
|
|
str
|
|
).str.strip()
|
|
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
|
|
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
|
|
# Replace double spaces
|
|
for col in ["Address letter or number", "Street address", "Postcode"]:
|
|
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
|
|
|
|
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
|
|
|
|
hornsey_asset_list["Wall Type Cleaned"] = np.where(
|
|
"Cavity" in hornsey_asset_list["Wall type"],
|
|
"Cavity",
|
|
"Solid"
|
|
)
|
|
|
|
missed_uprns = {
|
|
"Flat 13A Stowell House": 100021213098,
|
|
"Flat 24 Stowell House": 100021213110,
|
|
"Flat 1 36 Haringey Park": None
|
|
}
|
|
extracted_data = []
|
|
asset_list = []
|
|
hornsey_asset_list["row_id"] = hornsey_asset_list.index
|
|
for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)):
|
|
|
|
if home["Address letter or number"] == "Flat 1 36 Haringey Park":
|
|
continue
|
|
|
|
# Some properties do not have an epc
|
|
if not home["Energy starting band (EPC)"]:
|
|
asset_list.append(
|
|
{
|
|
"uprn": missed_uprns[home["Address letter or number"]],
|
|
"address": home["Address letter or number"],
|
|
"postcode": home["Postcode"],
|
|
"property_type": "Flat", # They're all flats
|
|
}
|
|
)
|
|
continue
|
|
|
|
unit_number = home["Address letter or number"]
|
|
street = home["Street address"]
|
|
postcode = home["Postcode"]
|
|
address = ", ".join([x for x in [unit_number, street] if x])
|
|
find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
|
|
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
|
|
time.sleep(0.5)
|
|
# We need uprn
|
|
searcher = SearchEpc(
|
|
address1=address,
|
|
postcode=postcode,
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key="",
|
|
full_address=address,
|
|
)
|
|
searcher.find_property(skip_os=True)
|
|
newest_epc = searcher.newest_epc
|
|
if newest_epc["current-energy-efficiency"] != home["Energy starting band (EPC)"].split("-")[1]:
|
|
raise Exception("Something went wrong with the EPC data")
|
|
|
|
extracted_data.append(
|
|
{
|
|
"uprn": newest_epc["uprn"],
|
|
**find_epc_data,
|
|
"hotwater-description": newest_epc["hotwater-description"],
|
|
}
|
|
)
|
|
|
|
asset_list.append(
|
|
{
|
|
"uprn": newest_epc["uprn"],
|
|
"row_id": home["row_id"],
|
|
"address": home["Address letter or number"],
|
|
"postcode": home["Postcode"],
|
|
"property_type": "Flat", # They're all flats
|
|
}
|
|
)
|
|
|
|
# Get conservation area data
|
|
# uprns = [x["uprn"] for x in extracted_data]
|
|
# conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
|
|
#
|
|
# addresses = pd.DataFrame(asset_list)
|
|
# addresses["uprn"] = addresses["uprn"].astype(int)
|
|
# conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
|
|
# conservation_area_df.to_csv(
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv"
|
|
# )
|
|
|
|
# We format the extracted data so that is has the same structure as non-intrusive recommendations
|
|
# We then get the UPRNs and create the asset list
|
|
|
|
non_invasive_recommendations = [
|
|
{
|
|
"uprn": r["uprn"],
|
|
"recommendations": r["recommendations"]
|
|
} for r in extracted_data
|
|
]
|
|
for r in non_invasive_recommendations:
|
|
new_recommendations = []
|
|
extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
|
|
for rec in r["recommendations"]:
|
|
if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
|
|
if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
|
|
continue
|
|
rec["survey"] = False
|
|
new_recommendations.append(rec)
|
|
r["recommendations"] = new_recommendations
|
|
|
|
# Store the asset list in s3
|
|
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(asset_list),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename
|
|
)
|
|
|
|
# Store the non-invasive recommendations in s3
|
|
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(non_invasive_recommendations),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=non_invasive_recommendations_filename
|
|
)
|
|
|
|
body = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Social",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": "",
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"valuation_file_path": "",
|
|
"scenario_name": "Wave 3 Packages",
|
|
"multi_plan": True,
|
|
"budget": None,
|
|
"exclusions": ["boiler_upgrade"]
|
|
}
|
|
print(body)
|
|
|
|
|
|
def caha():
|
|
caha_asset_list = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Copy of AIHA - WHSHF Wave 3 bid - Consortium "
|
|
"member properties - CAHA.xlsx",
|
|
sheet_name="Ksquared-All units information",
|
|
header=3
|
|
)
|
|
|
|
caha_asset_list = caha_asset_list.iloc[1:]
|
|
# Fill NA values with empty strings
|
|
caha_asset_list = caha_asset_list.fillna("")
|
|
caha_asset_list["Address letter or number"] = caha_asset_list["Address letter or number"].astype(
|
|
str
|
|
).str.strip()
|
|
|
|
# We Add POstcode as it wasn't populated - split on space and take the last two entries and re-concatenate on space
|
|
caha_asset_list["Street address"] = caha_asset_list["Street address"].str.strip()
|
|
caha_asset_list["Postcode"] = caha_asset_list["Street address"].str.split(" ").str[-2:].str.join(" ")
|
|
# Take just the columns we need
|
|
caha_asset_list = caha_asset_list[["Address letter or number", "Street address", "Postcode"]]
|
|
|
|
for col in ["Address letter or number", "Street address", "Postcode"]:
|
|
caha_asset_list[col] = caha_asset_list[col].str.replace(" ", " ")
|
|
|
|
# Pull the data from find my epc
|
|
remap = {
|
|
"Flat A, 50 Talbot Road N6 4QP": "50a Talbot Road",
|
|
"Flat A, 51 First Avenue EN1 1BN": "51a, First Avenue",
|
|
"Flat B, 51 First Avenue EN1 1BN": "51b, First Avenue"
|
|
}
|
|
|
|
def remap_address(address):
|
|
# Match patterns like 'Flat A, 30 Grove Park Road'
|
|
match = re.match(r'Flat (\w), (\d+) (.+)', address)
|
|
if match:
|
|
flat_letter = match.group(1) # e.g., 'A'
|
|
number = match.group(2) # e.g., '30'
|
|
rest_of_address = match.group(3) # e.g., 'Grove Park Road'
|
|
|
|
# Format the new address as '30A Grove Park Road'
|
|
return f"{number}{flat_letter} {rest_of_address}"
|
|
|
|
# If pattern doesn't match, return original address
|
|
return address
|
|
|
|
caha_asset_list["row_id"] = caha_asset_list.index
|
|
|
|
extracted_data = []
|
|
asset_list = []
|
|
for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)):
|
|
if home["Street address"] == "35 Stanford road N11 3HY" and home["Address letter or number"] == "":
|
|
continue
|
|
|
|
if home["Street address"] == "29 Victoria Avenue N3 1BD" and home["Address letter or number"] == "":
|
|
continue
|
|
|
|
if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat A":
|
|
continue
|
|
|
|
if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat C":
|
|
continue
|
|
|
|
if home["Street address"] == "10 Forest Gardens N17 6XA" and home["Address letter or number"] == "Flat C":
|
|
continue
|
|
|
|
if home["Street address"] == "219 Cann Hall Road E11 3NJ" and home["Address letter or number"] == "Flat B":
|
|
continue
|
|
|
|
unit_number = home["Address letter or number"]
|
|
street = home["Street address"]
|
|
postcode = home["Postcode"]
|
|
address = ", ".join([x for x in [unit_number, street] if x])
|
|
address = remap.get(address, address)
|
|
address = address.replace(postcode, "").strip()
|
|
if "Victoria Avenue" not in address:
|
|
address = remap_address(address)
|
|
|
|
find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
|
|
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data(sap_2012_date=EARLIEST_EPC_DATE)
|
|
time.sleep(0.5)
|
|
# We need uprn
|
|
searcher = SearchEpc(
|
|
address1=address,
|
|
postcode=postcode,
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key="",
|
|
full_address=address,
|
|
)
|
|
searcher.find_property(skip_os=True)
|
|
newest_epc = searcher.newest_epc
|
|
|
|
uprn = newest_epc["uprn"]
|
|
if address in ["Flat D, 11 Victoria Avenue", "Flat B, 11 Victoria Avenue"]:
|
|
uprn = None
|
|
|
|
extracted_data.append(
|
|
{
|
|
"uprn": uprn,
|
|
**find_epc_data,
|
|
}
|
|
)
|
|
|
|
asset_list.append(
|
|
{
|
|
"row_id": home["row_id"],
|
|
"uprn": uprn,
|
|
"address": address,
|
|
"postcode": home["Postcode"],
|
|
"property_type": newest_epc["property-type"],
|
|
"wall_type": newest_epc["walls-description"],
|
|
"built_form": newest_epc["built-form"],
|
|
"flat_storey_count": newest_epc['flat-storey-count'],
|
|
}
|
|
)
|
|
|
|
# Missing row ids
|
|
missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]]
|
|
|
|
no_data = [x for x in asset_list if x["uprn"] in [None, ""]]
|
|
no_data = pd.DataFrame(no_data)
|
|
|
|
# Get conservation area data
|
|
uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]]
|
|
conservation_area_data = OpenUprnClient.get_spatial_data([36284], "retrofit-data-dev")
|
|
|
|
addresses = pd.DataFrame(asset_list)
|
|
addresses["uprn"] = addresses["uprn"].astype(str)
|
|
conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str)
|
|
conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
|
|
conservation_area_df.to_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv"
|
|
)
|
|
|
|
non_invasive_recommendations = [
|
|
{
|
|
"uprn": r["uprn"],
|
|
"recommendations": r["recommendations"]
|
|
} for r in extracted_data
|
|
]
|
|
# for r in non_invasive_recommendations:
|
|
# new_recommendations = []
|
|
# extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
|
|
# for rec in r["recommendations"]:
|
|
# if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
|
|
# if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
|
|
# continue
|
|
# rec["survey"] = False
|
|
# new_recommendations.append(rec)
|
|
# r["recommendations"] = new_recommendations
|
|
|
|
# We model the two properties separately
|
|
asset_list = pd.DataFrame(asset_list)
|
|
# Drop Flat D, 11 Victoria Avenue
|
|
asset_list1 = asset_list[asset_list["address"] != "Flat D, 11 Victoria Avenue"]
|
|
asset_list2 = asset_list[asset_list["address"] == "Flat D, 11 Victoria Avenue"]
|
|
|
|
# Store the asset list in s3
|
|
filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list1.csv"
|
|
save_csv_to_s3(
|
|
dataframe=asset_list1,
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename
|
|
)
|
|
|
|
filename2 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list2.csv"
|
|
save_csv_to_s3(
|
|
dataframe=asset_list2,
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename2
|
|
)
|
|
|
|
# Store the non-invasive recommendations in s3
|
|
non_invasive_recommendations_filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/non_invasive_recommendations.csv"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(non_invasive_recommendations),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=non_invasive_recommendations_filename
|
|
)
|
|
|
|
body = {
|
|
"portfolio_id": str(CAHA_PORTFOLIO_ID),
|
|
"housing_type": "Social",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": "",
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"valuation_file_path": "",
|
|
"scenario_name": "Wave 3 Packages",
|
|
"multi_plan": True,
|
|
"budget": None,
|
|
"exclusions": ["boiler_upgrade"]
|
|
}
|
|
print(body)
|
|
|
|
body2 = {
|
|
"portfolio_id": str(CAHA_PORTFOLIO_ID),
|
|
"housing_type": "Social",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename2,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": "",
|
|
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
|
"valuation_file_path": "",
|
|
"scenario_name": "Wave 3 Packages",
|
|
"multi_plan": True,
|
|
"budget": None,
|
|
"exclusions": ["boiler_upgrade"]
|
|
}
|
|
print(body2)
|
|
|
|
#
|
|
asset_list3 = [
|
|
{
|
|
"address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197
|
|
}
|
|
]
|
|
filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv"
|
|
save_csv_to_s3(
|
|
dataframe=pd.DataFrame(asset_list3),
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename3
|
|
)
|
|
body3 = {
|
|
"portfolio_id": str(119),
|
|
"housing_type": "Social",
|
|
"goal": "Increasing EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename3,
|
|
"already_installed_file_path": "",
|
|
"patches_file_path": "",
|
|
"non_invasive_recommendations_file_path": "",
|
|
"valuation_file_path": "",
|
|
"scenario_name": "Wave 3 Packages",
|
|
"multi_plan": True,
|
|
"budget": None,
|
|
"exclusions": ["boiler_upgrade"]
|
|
}
|
|
print(body3)
|