From 631a76cb99d213d857c732ea1a58dd9d4291a716 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 21 Nov 2024 11:41:16 +0000 Subject: [PATCH] stonewater model completed --- etl/customers/ksquared/Wave3 Modelling.py | 35 +++++++++++++++++++ .../stonewater/Wave 3 Preparation.py | 32 +++++++++++------ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/etl/customers/ksquared/Wave3 Modelling.py b/etl/customers/ksquared/Wave3 Modelling.py index 96ea2b03..7bfa33b3 100644 --- a/etl/customers/ksquared/Wave3 Modelling.py +++ b/etl/customers/ksquared/Wave3 Modelling.py @@ -8,6 +8,7 @@ from tqdm import tqdm import pandas as pd import numpy as np from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +from etl.spatial.OpenUprnClient import OpenUprnClient from backend.SearchEpc import SearchEpc from utils.s3 import save_csv_to_s3 @@ -60,6 +61,7 @@ def hornsey(): } extracted_data = [] asset_list = [] + hornsey_asset_list["row_id"] = hornsey_asset_list.index for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)): if home["Address letter or number"] == "Flat 1 36 Haringey Park": @@ -108,12 +110,24 @@ def hornsey(): asset_list.append( { "uprn": newest_epc["uprn"], + "row_id": home["row_id"], "address": home["Address letter or number"], "postcode": home["Postcode"], "property_type": "Flat", # They're all flats } ) + # Get conservation area data + # uprns = [x["uprn"] for x in extracted_data] + # conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev") + # + # addresses = pd.DataFrame(asset_list) + # addresses["uprn"] = addresses["uprn"].astype(int) + # conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN") + # conservation_area_df.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv" + # ) + # We format the extracted data so that is has the same structure as non-intrusive recommendations # We then get the UPRNs and create the asset list @@ -213,6 +227,8 @@ def caha(): # If pattern doesn't match, return original address return address + caha_asset_list["row_id"] = caha_asset_list.index + extracted_data = [] asset_list = [] for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)): @@ -270,6 +286,7 @@ def caha(): asset_list.append( { + "row_id": home["row_id"], "uprn": uprn, "address": address, "postcode": home["Postcode"], @@ -280,6 +297,24 @@ def caha(): } ) + # Missing row ids + missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]] + + no_data = [x for x in asset_list if x["uprn"] in [None, ""]] + no_data = pd.DataFrame(no_data) + + # Get conservation area data + uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]] + conservation_area_data = OpenUprnClient.get_spatial_data([100022526362], "retrofit-data-dev") + + addresses = pd.DataFrame(asset_list) + addresses["uprn"] = addresses["uprn"].astype(str) + conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str) + conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN") + conservation_area_df.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv" + ) + non_invasive_recommendations = [ { "uprn": r["uprn"], diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 34ab778a..b6c29863 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -729,6 +729,7 @@ def extract_epr(pdf_path): "Main Building Alternative Wall Insulation": None, "Main Building Alternative Wall Dry-lining": None, "Main Building Alternative Wall Thickness": None, + "Main Fuel": None } with open(pdf_path, "rb") as file: @@ -1086,7 +1087,6 @@ def main(): retrofit_packages_board = retrofit_packages_board[ retrofit_packages_board["RA"].isin(["Invoiced", "Completed"]) ] - # populated_primary_energy = retrofit_packages_board[ # ~pd.isnull(retrofit_packages_board['BASE Primary energy (13a-272)']) # ] @@ -2442,8 +2442,11 @@ def propsed_wave_3_sample(): # Label final outputs # We create a summary of packages by street - results["Package Ref"] = results["Package Ref"].fillna("Incomplete") + results["Package Ref"] = results["Package Ref"].fillna("EPC C - No Package") results["Package Ref"] = results["Package Ref"].astype(str) + results["Package Ref"] = np.where( + results["Package Ref"] == "4.0", "4", results["Package Ref"] + ) package_summary = results.pivot_table( index='Street and Region', columns='Package Ref', @@ -2451,6 +2454,8 @@ def propsed_wave_3_sample(): fill_value=0 ).reset_index() + assert sum([v for k, v in package_summary.sum().items() if k != "Street and Region"]) == results.shape[0] + street_bid_structure = street_summary.merge( package_summary, how="left", on="Street and Region" ) @@ -2471,11 +2476,6 @@ def propsed_wave_3_sample(): asset_list_ids = asset_list_ids[~pd.isnull(asset_list_ids["Address ID"])] asset_list_ids = asset_list_ids[asset_list_ids["Address ID"] != "Address ID"] asset_list_ids["Address ID"] = asset_list_ids["Address ID"].astype(int) - individual_units_programme = individual_units_programme.merge( - asset_list_ids, - how="left", - on="Address ID", - ) individual_units_programme = individual_units_programme.merge( asset_list_ids.rename( @@ -2571,14 +2571,24 @@ def propsed_wave_3_sample(): for c in ['Low Carbon Heating Infill?', 'Possible Flat Infill?']: street_bid_structure[c] = street_bid_structure[c].fillna(0) - street_bid_structure.to_csv( - os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False + master_sheet = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - " + "master " + "sheet.csv", + encoding='latin1' + ) + master_sheet = master_sheet[["Address ID", "Main Fuel"]] + + individual_units_programme = individual_units_programme.merge( + master_sheet, how="left", on="Address ID" ) - # TODO: Add the full Address!!! + street_bid_structure.to_csv( + os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure V2.csv"), index=False + ) individual_units_programme.to_csv( - os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False + os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme V2.csv"), index=False ) # if __name__ == "__main__":