From 294506853dd32fb9aa21ce6500d6eebed7e41be6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Nov 2024 18:24:26 +0000 Subject: [PATCH] adding in new features --- etl/customers/aiha/bid_numbers.py | 18 +++++- etl/customers/remote_assessments/app.py | 1 + .../stonewater/Wave 3 Preparation.py | 59 +++++++++++++++++-- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/etl/customers/aiha/bid_numbers.py b/etl/customers/aiha/bid_numbers.py index 96859f99..b371e2e5 100644 --- a/etl/customers/aiha/bid_numbers.py +++ b/etl/customers/aiha/bid_numbers.py @@ -52,6 +52,20 @@ aiha_wave_3_features = aiha_original_asset_data[ wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts() property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index() +aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]] + +# 4 Yetev Lev Court  ... Semi-Detached mid - Medium +# B 86 Bethune Road ... Mid-Terrace top. - Low +# A 80 Bethune Road ... Mid-Terrace ground. - Low +# B 80 Bethune Road ... \n \n - Low +# A 9 Clapton Common ... Semi-Detached ground. - Low +# C 9 Clapton Common ... End-Terrace \n. - Low +# B 89 Manor Road ... \n \n. - Low +# A 6 Northfield Road ... Detached top. - Low +# 13 Northfield Rd ... Semi-Detached \n - Low +# A 73 Manor Road ... End-Terrace \n - Low +# B 73 Manor Road ... Detached top - Low + # Hornsey data - contained in original asset list hornsey_asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing " @@ -88,5 +102,5 @@ caha_epc_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx" ) -caha_epc_data["property_type"].value_counts() -caha_epc_data["wall_type"].value_counts() +caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts() +caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts() diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index 33015d87..59e0e868 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -17,6 +17,7 @@ def app(): "address": "5, Lynton Street", "postcode": "DE22 3RW" } + ] asset_list = pd.DataFrame(asset_list) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 974cd908..81b5915f 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -6,6 +6,7 @@ import numpy as np from tqdm import tqdm from collections import Counter from scipy.optimize import linprog +from utils.s3 import read_pickle_from_s3 CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater" SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}") @@ -1264,7 +1265,7 @@ def main(): stonewater_data[c] = stonewater_data[c].astype(str) # Save this data to excel - stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False) + stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False) cost_sheet = [ { @@ -1654,17 +1655,66 @@ def propsed_wave_3_sample(): "Property Type", "Wall Type", "Roof Type", "Heating"] ] + # Updated packages: to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False) survey_results = pd.read_excel( os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"), header=13, sheet_name="Modelled Packages" ) + additional_survey_data = pd.read_excel( + os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"), + header=0 + ) + survey_results = survey_results.merge( + additional_survey_data[ + [ + "Address ID", + "Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness", + "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation", + "Main Building Alternative Wall Thickness" + ] + ].rename(columns={"Main Wall Insulation_x": "Main Wall Insulation Type"}), + how="left", + on="Address ID" + ) + # TOOD: We probably want the actual surveyed wall, roof, heating type survey_results = survey_results[ - ["Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode"] - ] - survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0] + [ + "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode", + "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness", + "Existing Primary Heating System", + "Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness", + "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation", + "Main Building Alternative Wall Thickness" + ] + ].rename( + columns={ + "Existing Primary Heating System": "Surveyed Primary Heating System" + } + ) + + # Concatenate from the wall information + survey_results["Surveyed: Wall Type"] = survey_results["Main Wall Type"] + ": " + survey_results[ + "Main Wall Insulation Type"] + # Alternative wall + survey_results["Survey: Main Alternative Wall"] = ( + survey_results["Main Building Alternative Wall Type"] + ": " + survey_results[ + "Main Building Alternative Wall Insulation"] + ) + # Roof information + survey_results["Survey: Type"] = survey_results["Main Roof Type"] + ": " + survey_results[ + "Main Roof Insulation"] + ": " + survey_results["Main Roof Insulation Thickness"].astype(str) + + # Drop the individual columns: + survey_results = survey_results.drop( + columns=[ + "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness", + "Main Wall Type", "Main Wall Insulation Type", + "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation" + ] + ) survey_results_with_original_features = survey_results.merge( asset_list[["UPRN", "Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]], @@ -1676,7 +1726,6 @@ def propsed_wave_3_sample(): raise ValueError("Something went wrong") # We get longitude & Latitude - from utils.s3 import read_pickle_from_s3 archetyping_spatial_features = read_pickle_from_s3( bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl", )