stonewater model completed

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-21 11:41:16 +00:00
parent 0fafb03dee
commit 631a76cb99
2 changed files with 56 additions and 11 deletions

View file

@ -8,6 +8,7 @@ from tqdm import tqdm
import pandas as pd
import numpy as np
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
@ -60,6 +61,7 @@ def hornsey():
}
extracted_data = []
asset_list = []
hornsey_asset_list["row_id"] = hornsey_asset_list.index
for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)):
if home["Address letter or number"] == "Flat 1 36 Haringey Park":
@ -108,12 +110,24 @@ def hornsey():
asset_list.append(
{
"uprn": newest_epc["uprn"],
"row_id": home["row_id"],
"address": home["Address letter or number"],
"postcode": home["Postcode"],
"property_type": "Flat", # They're all flats
}
)
# Get conservation area data
# uprns = [x["uprn"] for x in extracted_data]
# conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
#
# addresses = pd.DataFrame(asset_list)
# addresses["uprn"] = addresses["uprn"].astype(int)
# conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
# conservation_area_df.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv"
# )
# We format the extracted data so that is has the same structure as non-intrusive recommendations
# We then get the UPRNs and create the asset list
@ -213,6 +227,8 @@ def caha():
# If pattern doesn't match, return original address
return address
caha_asset_list["row_id"] = caha_asset_list.index
extracted_data = []
asset_list = []
for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)):
@ -270,6 +286,7 @@ def caha():
asset_list.append(
{
"row_id": home["row_id"],
"uprn": uprn,
"address": address,
"postcode": home["Postcode"],
@ -280,6 +297,24 @@ def caha():
}
)
# Missing row ids
missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]]
no_data = [x for x in asset_list if x["uprn"] in [None, ""]]
no_data = pd.DataFrame(no_data)
# Get conservation area data
uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]]
conservation_area_data = OpenUprnClient.get_spatial_data([100022526362], "retrofit-data-dev")
addresses = pd.DataFrame(asset_list)
addresses["uprn"] = addresses["uprn"].astype(str)
conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str)
conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
conservation_area_df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv"
)
non_invasive_recommendations = [
{
"uprn": r["uprn"],

View file

@ -729,6 +729,7 @@ def extract_epr(pdf_path):
"Main Building Alternative Wall Insulation": None,
"Main Building Alternative Wall Dry-lining": None,
"Main Building Alternative Wall Thickness": None,
"Main Fuel": None
}
with open(pdf_path, "rb") as file:
@ -1086,7 +1087,6 @@ def main():
retrofit_packages_board = retrofit_packages_board[
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
]
# populated_primary_energy = retrofit_packages_board[
# ~pd.isnull(retrofit_packages_board['BASE Primary energy (13a-272)'])
# ]
@ -2442,8 +2442,11 @@ def propsed_wave_3_sample():
# Label final outputs
# We create a summary of packages by street
results["Package Ref"] = results["Package Ref"].fillna("Incomplete")
results["Package Ref"] = results["Package Ref"].fillna("EPC C - No Package")
results["Package Ref"] = results["Package Ref"].astype(str)
results["Package Ref"] = np.where(
results["Package Ref"] == "4.0", "4", results["Package Ref"]
)
package_summary = results.pivot_table(
index='Street and Region',
columns='Package Ref',
@ -2451,6 +2454,8 @@ def propsed_wave_3_sample():
fill_value=0
).reset_index()
assert sum([v for k, v in package_summary.sum().items() if k != "Street and Region"]) == results.shape[0]
street_bid_structure = street_summary.merge(
package_summary, how="left", on="Street and Region"
)
@ -2471,11 +2476,6 @@ def propsed_wave_3_sample():
asset_list_ids = asset_list_ids[~pd.isnull(asset_list_ids["Address ID"])]
asset_list_ids = asset_list_ids[asset_list_ids["Address ID"] != "Address ID"]
asset_list_ids["Address ID"] = asset_list_ids["Address ID"].astype(int)
individual_units_programme = individual_units_programme.merge(
asset_list_ids,
how="left",
on="Address ID",
)
individual_units_programme = individual_units_programme.merge(
asset_list_ids.rename(
@ -2571,14 +2571,24 @@ def propsed_wave_3_sample():
for c in ['Low Carbon Heating Infill?', 'Possible Flat Infill?']:
street_bid_structure[c] = street_bid_structure[c].fillna(0)
street_bid_structure.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
master_sheet = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master "
"sheet.csv",
encoding='latin1'
)
master_sheet = master_sheet[["Address ID", "Main Fuel"]]
individual_units_programme = individual_units_programme.merge(
master_sheet, how="left", on="Address ID"
)
# TODO: Add the full Address!!!
street_bid_structure.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure V2.csv"), index=False
)
individual_units_programme.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme V2.csv"), index=False
)
# if __name__ == "__main__":