Model/etl/customers/aiha/bid_numbers.py
Khalim Conn-Kowlessar 294506853d adding in new features
2024-11-18 18:24:26 +00:00

106 lines
4.3 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
This is an adhoc script, used to pull together some of the figures that are being included in the
Warm Homes: Social Housing Wave 3 funding application
"""
import pandas as pd
import numpy as np
aiha_all_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="All Properties - AIHA",
header=2
)
modelled_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="Modelled Properties - Measures",
header=5
)
aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
aiha_extracted_property_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
)
aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
# TODO: The EPC C property isn't a C!
aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
# For CAHA
caha_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("CAHA")
]['Current EPC Rating'].value_counts()
# For Hornsey
hornsey_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("HORNSEY")
]['Current EPC Rating'].value_counts()
aiha_original_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
"bid - Supplementary information.xlsx",
sheet_name="Archetyping Data",
header=2
)
# Get the units in the bid:
aiha_wave_3_features = aiha_original_asset_data[
['Address letter or number', 'Street address', 'Postcode', "Wall type",
"Property type", "built-form", "floor"]
].merge(
aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
how="inner",
on=["Address letter or number", "Street address", "Postcode"]
)
wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]]
# 4 Yetev Lev Court  ... Semi-Detached mid - Medium
# B 86 Bethune Road ... Mid-Terrace top. - Low
# A 80 Bethune Road ... Mid-Terrace ground. - Low
# B 80 Bethune Road ... \n \n - Low
# A 9 Clapton Common ... Semi-Detached ground. - Low
# C 9 Clapton Common ... End-Terrace \n. - Low
# B 89 Manor Road ... \n \n. - Low
# A 6 Northfield Road ... Detached top. - Low
# 13 Northfield Rd ... Semi-Detached \n - Low
# A 73 Manor Road ... End-Terrace \n - Low
# B 73 Manor Road ... Detached top - Low
# Hornsey data - contained in original asset list
hornsey_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
"Trust.xlsx",
sheet_name="Ksquared-All units information",
header=3
)
# We don't need the first row
hornsey_asset_list = hornsey_asset_list.iloc[1:]
# Fill NA values with empty strings
hornsey_asset_list = hornsey_asset_list.fillna("")
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
str
).str.strip()
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
# Replace double spaces
for col in ["Address letter or number", "Street address", "Postcode"]:
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
hornsey_asset_list["Wall Type Cleaned"] = np.where(
hornsey_asset_list["Wall type"].str.contains("Cavity"),
"Cavity",
"Solid"
)
hornsey_asset_list["Property type"].value_counts()
# CAHA
caha_epc_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
)
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts()
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts()