adding in new features

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-18 18:24:26 +00:00
parent efba61c6ac
commit 294506853d
3 changed files with 71 additions and 7 deletions

View file

@ -52,6 +52,20 @@ aiha_wave_3_features = aiha_original_asset_data[
wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]]
# 4 Yetev Lev Court  ... Semi-Detached mid - Medium
# B 86 Bethune Road ... Mid-Terrace top. - Low
# A 80 Bethune Road ... Mid-Terrace ground. - Low
# B 80 Bethune Road ... \n \n - Low
# A 9 Clapton Common ... Semi-Detached ground. - Low
# C 9 Clapton Common ... End-Terrace \n. - Low
# B 89 Manor Road ... \n \n. - Low
# A 6 Northfield Road ... Detached top. - Low
# 13 Northfield Rd ... Semi-Detached \n - Low
# A 73 Manor Road ... End-Terrace \n - Low
# B 73 Manor Road ... Detached top - Low
# Hornsey data - contained in original asset list
hornsey_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
@ -88,5 +102,5 @@ caha_epc_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
)
caha_epc_data["property_type"].value_counts()
caha_epc_data["wall_type"].value_counts()
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts()
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts()

View file

@ -17,6 +17,7 @@ def app():
"address": "5, Lynton Street",
"postcode": "DE22 3RW"
}
]
asset_list = pd.DataFrame(asset_list)

View file

@ -6,6 +6,7 @@ import numpy as np
from tqdm import tqdm
from collections import Counter
from scipy.optimize import linprog
from utils.s3 import read_pickle_from_s3
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
@ -1264,7 +1265,7 @@ def main():
stonewater_data[c] = stonewater_data[c].astype(str)
# Save this data to excel
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
cost_sheet = [
{
@ -1654,17 +1655,66 @@ def propsed_wave_3_sample():
"Property Type", "Wall Type", "Roof Type", "Heating"]
]
# Updated packages: to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
survey_results = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"),
header=13,
sheet_name="Modelled Packages"
)
additional_survey_data = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
header=0
)
survey_results = survey_results.merge(
additional_survey_data[
[
"Address ID",
"Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
"Main Building Alternative Wall Thickness"
]
].rename(columns={"Main Wall Insulation_x": "Main Wall Insulation Type"}),
how="left",
on="Address ID"
)
# TOOD: We probably want the actual surveyed wall, roof, heating type
survey_results = survey_results[
["Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode"]
[
"Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
"Existing Primary Heating System",
"Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
"Main Building Alternative Wall Thickness"
]
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
].rename(
columns={
"Existing Primary Heating System": "Surveyed Primary Heating System"
}
)
# Concatenate from the wall information
survey_results["Surveyed: Wall Type"] = survey_results["Main Wall Type"] + ": " + survey_results[
"Main Wall Insulation Type"]
# Alternative wall
survey_results["Survey: Main Alternative Wall"] = (
survey_results["Main Building Alternative Wall Type"] + ": " + survey_results[
"Main Building Alternative Wall Insulation"]
)
# Roof information
survey_results["Survey: Type"] = survey_results["Main Roof Type"] + ": " + survey_results[
"Main Roof Insulation"] + ": " + survey_results["Main Roof Insulation Thickness"].astype(str)
# Drop the individual columns:
survey_results = survey_results.drop(
columns=[
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
"Main Wall Type", "Main Wall Insulation Type",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation"
]
)
survey_results_with_original_features = survey_results.merge(
asset_list[["UPRN", "Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
@ -1676,7 +1726,6 @@ def propsed_wave_3_sample():
raise ValueError("Something went wrong")
# We get longitude & Latitude
from utils.s3 import read_pickle_from_s3
archetyping_spatial_features = read_pickle_from_s3(
bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
)