This commit is contained in:
Jun-te Kim 2026-01-27 18:52:51 +00:00
parent 0254c945e8
commit 1acf4f4d6a
6 changed files with 340 additions and 328 deletions

View file

@ -18,4 +18,5 @@ pytest-cov==7.0.0
ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1
pydantic>=1.10.7,<2
pydantic>=1.10.7,<2
sqlmodel

View file

@ -1,7 +1,7 @@
from sqlalchemy import create_engine
from contextlib import contextmanager
from backend.app.config import get_settings
from sqlmodel import Session
# from sqlmodel import Session
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
db_string = connection_string.format(

View file

@ -10,4 +10,5 @@ boto3==1.35.44
# Data
openpyxl==3.1.2
# Basic
pytz
pytz
sqlmodel

View file

@ -98,8 +98,17 @@ def main():
results.append(tmp)
final_df = pd.concat(results, ignore_index=True)
a = final_df[["best_match_lexiscore","Address 1", "best_match_address", "Postcode", "UPRN", "best_match_uprn"]] # add levi score to viewing
a = final_df[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
b = b[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]]
if __name__ == "__main__":
main()

View file

@ -1,111 +1,111 @@
import pandas as pd
epc_c_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
epc_b_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
# epc_c_recommendations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, ashp 3.0 - corrected.xlsx"
# )
# epc_b_recommendations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
# "solid floor, ashp 3.0 - corrected.xlsx"
# )
epc_c_movers = epc_b_recommendations[
epc_b_recommendations["current_epc_rating"] == "Epc.C"
]
epc_c_movers["property_type"].value_counts()
# epc_c_movers = epc_b_recommendations[
# epc_b_recommendations["current_epc_rating"] == "Epc.C"
# ]
# epc_c_movers["property_type"].value_counts()
house_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "House"
]
house_epc_c_movers_with_solar = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
]
# house_epc_c_movers = epc_c_movers[
# epc_c_movers["property_type"] == "House"
# ]
# house_epc_c_movers_with_solar = house_epc_c_movers[
# ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
# ]
house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
]
# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
# ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
# ]
flat_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "Flat"
]
# flat_epc_c_movers = epc_c_movers[
# epc_c_movers["property_type"] == "Flat"
# ]
epc_c_recommendations["sap_points"].mean()
epc_c_recommendations["sap_points"].mean()
# epc_c_recommendations["sap_points"].mean()
# epc_c_recommendations["sap_points"].mean()
measure_cols = [
"air_source_heat_pump",
"boiler_upgrade",
"cavity_wall_insulation",
"double_glazing",
"external_wall_insulation",
"flat_roof_insulation",
"high_heat_retention_storage_heaters",
"internal_wall_insulation",
"loft_insulation",
"low_energy_lighting",
"mechanical_ventilation",
"room_roof_insulation",
"roomstat_programmer_trvs",
"sealing_open_fireplace",
"secondary_glazing",
"secondary_heating",
"solar_pv",
"solar_pv_with_battery",
"suspended_floor_insulation",
"time_temperature_zone_control",
]
# measure_cols = [
# "air_source_heat_pump",
# "boiler_upgrade",
# "cavity_wall_insulation",
# "double_glazing",
# "external_wall_insulation",
# "flat_roof_insulation",
# "high_heat_retention_storage_heaters",
# "internal_wall_insulation",
# "loft_insulation",
# "low_energy_lighting",
# "mechanical_ventilation",
# "room_roof_insulation",
# "roomstat_programmer_trvs",
# "sealing_open_fireplace",
# "secondary_glazing",
# "secondary_heating",
# "solar_pv",
# "solar_pv_with_battery",
# "suspended_floor_insulation",
# "time_temperature_zone_control",
# ]
epc_c_melted = (
epc_c_recommendations
.melt(
id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
# epc_c_melted = (
# epc_c_recommendations
# .melt(
# id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
# value_vars=measure_cols,
# var_name="measure_type",
# value_name="value",
# )
# .dropna(subset=["value"])
# )
# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
epc_b_melted = (
epc_b_recommendations
.melt(
id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
# epc_b_melted = (
# epc_b_recommendations
# .melt(
# id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
# value_vars=measure_cols,
# var_name="measure_type",
# value_name="value",
# )
# .dropna(subset=["value"])
# )
epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
measures_compared = epc_c_measures.merge(
epc_b_measures,
left_on="measure_type",
right_on="measure_type",
suffixes=("_epc_c", "_epc_b"),
)
# measures_compared = epc_c_measures.merge(
# epc_b_measures,
# left_on="measure_type",
# right_on="measure_type",
# suffixes=("_epc_c", "_epc_b"),
# )
epc_c_retrofits = epc_c_recommendations[
epc_c_recommendations["total_retrofit_cost"] > 0
]
# epc_c_retrofits = epc_c_recommendations[
# epc_c_recommendations["total_retrofit_cost"] > 0
# ]
epc_b_retrofits = epc_b_recommendations[
epc_b_recommendations["total_retrofit_cost"] > 0
]
# epc_b_retrofits = epc_b_recommendations[
# epc_b_recommendations["total_retrofit_cost"] > 0
# ]
epc_c_retrofits["sap_points"].mean()
epc_b_retrofits["sap_points"].mean()
# epc_c_retrofits["sap_points"].mean()
# epc_b_retrofits["sap_points"].mean()
properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
properties_in_both["total_retrofit_cost_epc_c"].mean()
properties_in_both["sap_points_epc_c"].mean()
properties_in_both["total_retrofit_cost_epc_b"].mean()
properties_in_both["sap_points_epc_b"].mean()
# properties_in_both["total_retrofit_cost_epc_c"].mean()
# properties_in_both["sap_points_epc_c"].mean()
# properties_in_both["total_retrofit_cost_epc_b"].mean()
# properties_in_both["sap_points_epc_b"].mean()
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
@ -114,14 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 434 # Peabody
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
904,
905
970
]
scenario_names = {
904: "EPC C - no solid floor, ashp 3.0",
905: "EPC B - no solid floor, ashp 3.0",
970: "EPC C - no solid floor, ashp 3.0",
}
@ -233,259 +231,266 @@ properties_data, plans_data, recommendations_data = get_data(
recommendations_df = pd.DataFrame(recommendations_data)
properties_df = pd.DataFrame(properties_data)
solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
properties_df.to_excel(writer, sheet_name="properties", index=False)
# Check tenures
initial_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Properties"
)
sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
sustainability_sample = sustainability_data[
sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
]
sustainability_sample = sustainability_sample.merge(
initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
)
block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
# # Check tenures
# initial_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
# "- Data Extracts for Domna.xlsx",
# sheet_name="Properties"
# )
# sustainability_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
# "- Data Extracts for Domna.xlsx",
# sheet_name="Sustainability"
# )
initial_asset_data.columns
initial_asset_data["LeaseType"].value_counts()
# sustainability_sample = sustainability_data[
# sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
# ]
# sustainability_sample["Tenure Group"].value_counts()
# Tenure Group
# General Needs 57787
# Home Ownership 25471
# Care & Supported Housing 4239
# Rental 2677
# Other 188
# sustainability_sample = sustainability_sample.merge(
# initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
# )
df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
# initial_asset_data.columns
# initial_asset_data["LeaseType"].value_counts()
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
# # sustainability_sample["Tenure Group"].value_counts()
# # Tenure Group
# # General Needs 57787
# # Home Ownership 25471
# # Care & Supported Housing 4239
# # Rental 2677
# # Other 188
sample_data = initial_asset_data[
~initial_asset_data["Ownership Type"].isin(
[
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
# Freeholder
"FREEHOLDER", # 19517 properties
# HOMEBUY / EQUITY LOAN
"Rent to Homebuy", # 1 property
# Leaseholder
"LEASEHOLD 100%", # 8455 properties
"Owned and Managed - 999 year lease", # 2076 properties
"Managed but not Owned-Private Lease", # 159 properties
"Owned and managed LEASEHOLD", # 26 properties
# Outright Sale - can't find anything matching
# SHARED EQUITY
"Shared Ownership", # 4065 properties
"Shared Ownership Owned Not Managed", # 23 properties
# Extra categories which seem sensible to exclude
"NOT MANAGED AND NOT OWNED"
]
)
]
# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
sample_data["Ownership Type"].value_counts()
# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
sample_data = initial_asset_data[
initial_asset_data["Ownership Type"].isin(
[
"Owned and Managed",
"Owned and Managed - 999 year lease",
"Owned and managed LEASEHOLD",
"LEASEHOLD 100%",
"DATALOAD DEFAULT"
]
)
]
dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
dropped["Ownership Type"].value_counts()
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
for value in [
# Commercial # Everything is resi, so should be fine. No matches
# Freeholder
"FREEHOLDER", # 19517 properties
# HOMEBUY / EQUITY LOAN
"Rent to Homebuy", # 1 property
# Leaseholder
"LEASEHOLD 100%", # 8455 properties
"Owned and Managed - 999 year lease", # 2076 properties
"Managed but not Owned-Private Lease", # 159 properties
"Owned and managed LEASEHOLD", # 26 properties
# Outright Sale - can't find anything matching
# SHARED EQUITY
"Shared Ownership", # 4065 properties
"Shared Ownership Owned Not Managed", # 23 properties
]:
print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
# sample_data = initial_asset_data[
# ~initial_asset_data["Ownership Type"].isin(
# [
# # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
# # Freeholder
# "FREEHOLDER", # 19517 properties
# # HOMEBUY / EQUITY LOAN
# "Rent to Homebuy", # 1 property
# # Leaseholder
# "LEASEHOLD 100%", # 8455 properties
# "Owned and Managed - 999 year lease", # 2076 properties
# "Managed but not Owned-Private Lease", # 159 properties
# "Owned and managed LEASEHOLD", # 26 properties
# # Outright Sale - can't find anything matching
# # SHARED EQUITY
# "Shared Ownership", # 4065 properties
# "Shared Ownership Owned Not Managed", # 23 properties
# # Extra categories which seem sensible to exclude
# "NOT MANAGED AND NOT OWNED"
# ]
# )
# ]
house_types = [
"HOUSE",
"BUNGALOW",
"MAISONETTE",
"DUPLEX",
]
# sample_data["Ownership Type"].value_counts()
guaranteed_control = [
"Owned and Managed",
"Owned and Managed - 999 year lease",
"Owned and managed LEASEHOLD",
"LEASEHOLD 100%",
"DATALOAD DEFAULT",
]
# sample_data = initial_asset_data[
# initial_asset_data["Ownership Type"].isin(
# [
# "Owned and Managed",
# "Owned and Managed - 999 year lease",
# "Owned and managed LEASEHOLD",
# "LEASEHOLD 100%",
# "DATALOAD DEFAULT"
# ]
# )
# ]
# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
# dropped["Ownership Type"].value_counts()
sample_data = initial_asset_data[
(
initial_asset_data["Ownership Type"].isin(guaranteed_control)
)
|
(
(initial_asset_data["Ownership Type"] == "FREEHOLDER")
&
(initial_asset_data["Property Type"].isin(house_types))
)
]
# for value in [
# # Commercial # Everything is resi, so should be fine. No matches
# # Freeholder
# "FREEHOLDER", # 19517 properties
# # HOMEBUY / EQUITY LOAN
# "Rent to Homebuy", # 1 property
# # Leaseholder
# "LEASEHOLD 100%", # 8455 properties
# "Owned and Managed - 999 year lease", # 2076 properties
# "Managed but not Owned-Private Lease", # 159 properties
# "Owned and managed LEASEHOLD", # 26 properties
# # Outright Sale - can't find anything matching
# # SHARED EQUITY
# "Shared Ownership", # 4065 properties
# "Shared Ownership Owned Not Managed", # 23 properties
# ]:
# print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
fabric_retrofit_sample = initial_asset_data[
initial_asset_data["Ownership Type"].isin(
[
"Owned and Managed",
"FREEHOLDER",
"DATALOAD DEFAULT",
]
)
]
# house_types = [
# "HOUSE",
# "BUNGALOW",
# "MAISONETTE",
# "DUPLEX",
# ]
initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
# guaranteed_control = [
# "Owned and Managed",
# "Owned and Managed - 999 year lease",
# "Owned and managed LEASEHOLD",
# "LEASEHOLD 100%",
# "DATALOAD DEFAULT",
# ]
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
z = initial_asset_data[
~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
]
# sample_data = initial_asset_data[
# (
# initial_asset_data["Ownership Type"].isin(guaranteed_control)
# )
# |
# (
# (initial_asset_data["Ownership Type"] == "FREEHOLDER")
# &
# (initial_asset_data["Property Type"].isin(house_types))
# )
# ]
block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
# fabric_retrofit_sample = initial_asset_data[
# initial_asset_data["Ownership Type"].isin(
# [
# "Owned and Managed",
# "FREEHOLDER",
# "DATALOAD DEFAULT",
# ]
# )
# ]
potential_sample = initial_asset_data[
~pd.isnull(initial_asset_data["BlockCode"])
]
# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Property Type",
right_on="Property Type",
suffixes=("_on_block_codes", "_overall")
)
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
# z = initial_asset_data[
# ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
# ]
# Comparison of smaller sample vs overall
new_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Properties"
)
# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
new_sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Sustainability"
)
# potential_sample = initial_asset_data[
# ~pd.isnull(initial_asset_data["BlockCode"])
# ]
sap_bands = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
"08012026.xlsx",
)
# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
# initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Property Type",
# right_on="Property Type",
# suffixes=("_on_block_codes", "_overall")
# )
combined = new_asset_data.merge(
new_sustainability_data,
left_on="UPRN",
right_on="Org Ref",
suffixes=("_asset", "_sustainability")
).merge(
sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
)
reduced_sample = combined[
~combined["AH Tenure"].isin(
["Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership"]
)
].copy()
# # Comparison of smaller sample vs overall
# new_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
# "- Peabody "
# "- Data Extracts for Domna v2.xlsx",
# sheet_name="Properties"
# )
# property types
property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Property Type",
right_on="Property Type",
suffixes=("_reduced_sample", "_overall")
)
# new_sustainability_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
# "- Peabody "
# "- Data Extracts for Domna v2.xlsx",
# sheet_name="Sustainability"
# )
# lodged ratings
lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
normalize=True).to_frame().reset_index().merge(
combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Lodged EPC Band",
right_on="Lodged EPC Band",
suffixes=("_reduced_sample", "_overall")
)
# sap_bands = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
# "08012026.xlsx",
# )
# modelled ratings
modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
normalize=True).to_frame().reset_index().merge(
combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
left_on="SAP Band",
right_on="SAP Band",
suffixes=("_reduced_sample", "_overall")
)
# combined = new_asset_data.merge(
# new_sustainability_data,
# left_on="UPRN",
# right_on="Org Ref",
# suffixes=("_asset", "_sustainability")
# ).merge(
# sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
# )
# reduced_sample = combined[
# ~combined["AH Tenure"].isin(
# ["Commercial",
# "Freeholder",
# "HOMEBUY / EQUITY LOAN",
# "Leaseholder",
# "Outright Sale",
# "SHARED EQUITY",
# "Shared Ownership"]
# )
# ].copy()
# Testing measures
m1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - 20250113 final.xlsx"
)
m2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
)
# # property types
# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
# combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Property Type",
# right_on="Property Type",
# suffixes=("_reduced_sample", "_overall")
# )
compare = m1.merge(
m2,
left_on="uprn",
right_on="uprn",
suffixes=("_ewi_iwi", "_no_ewi_iwi")
)
# # lodged ratings
# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
# normalize=True).to_frame().reset_index().merge(
# combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Lodged EPC Band",
# right_on="Lodged EPC Band",
# suffixes=("_reduced_sample", "_overall")
# )
# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
only_no_ewi_iwi = compare[
(compare["total_retrofit_cost_ewi_iwi"] == 0) &
(compare["total_retrofit_cost_no_ewi_iwi"] != 0)
]
# # modelled ratings
# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
# normalize=True).to_frame().reset_index().merge(
# combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="SAP Band",
# right_on="SAP Band",
# suffixes=("_reduced_sample", "_overall")
# )
(m1["total_retrofit_cost"] > 0).sum()
(m2["total_retrofit_cost"] > 0).sum()
# # Testing measures
# m1 = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, ashp 3.0 - 20250113 final.xlsx"
# )
# m2 = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
# )
with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
# compare = m1.merge(
# m2,
# left_on="uprn",
# right_on="uprn",
# suffixes=("_ewi_iwi", "_no_ewi_iwi")
# )
z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
# only_no_ewi_iwi = compare[
# (compare["total_retrofit_cost_ewi_iwi"] == 0) &
# (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
# ]
# (m1["total_retrofit_cost"] > 0).sum()
# (m2["total_retrofit_cost"] > 0).sum()
# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]

View file

@ -14,16 +14,12 @@ from collections import defaultdict
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 435 # Peabody
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
908,
909,
910,
970,
]
scenario_names = {
908: "EPC C - no solid floor, ashp 3.0",
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
910: "EPC B - no solid floor, no EWI, ashp 3.0"
970: "EPC C - Nosolid floor, EQI, IWI",
}