diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index a6aebdaf..b95cdc2d 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -18,4 +18,5 @@ pytest-cov==7.0.0 ipykernel>=6.25,<7 pydantic-settings<2 pyyaml>=6.0.1 -pydantic>=1.10.7,<2 \ No newline at end of file +pydantic>=1.10.7,<2 +sqlmodel \ No newline at end of file diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index 74f3bd2e..0c9a7899 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -1,7 +1,7 @@ from sqlalchemy import create_engine from contextlib import contextmanager from backend.app.config import get_settings -from sqlmodel import Session +# from sqlmodel import Session connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}" db_string = connection_string.format( diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index a45738c6..41f21f6a 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -10,4 +10,5 @@ boto3==1.35.44 # Data openpyxl==3.1.2 # Basic -pytz \ No newline at end of file +pytz +sqlmodel \ No newline at end of file diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 8ae91684..d417c8f1 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -98,8 +98,17 @@ def main(): results.append(tmp) final_df = pd.concat(results, ignore_index=True) - a = final_df[["best_match_lexiscore","Address 1", "best_match_address", "Postcode", "UPRN", "best_match_uprn"]] # add levi score to viewing + a = final_df[[ + "best_match_lexiscore","Address 1", + "best_match_address", "Postcode", + "UPRN", "best_match_uprn" + ]] # add levi score to viewing b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing + b = b[[ + "best_match_lexiscore","Address 1", + "best_match_address", "Postcode", + "UPRN", "best_match_uprn" + ]] if __name__ == "__main__": main() diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index cd7fba63..39e8d956 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -1,111 +1,111 @@ import pandas as pd -epc_c_recommendations = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " - "solid floor, ashp 3.0 - corrected.xlsx" -) -epc_b_recommendations = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " - "solid floor, ashp 3.0 - corrected.xlsx" -) +# epc_c_recommendations = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " +# "solid floor, ashp 3.0 - corrected.xlsx" +# ) +# epc_b_recommendations = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " +# "solid floor, ashp 3.0 - corrected.xlsx" +# ) -epc_c_movers = epc_b_recommendations[ - epc_b_recommendations["current_epc_rating"] == "Epc.C" - ] -epc_c_movers["property_type"].value_counts() +# epc_c_movers = epc_b_recommendations[ +# epc_b_recommendations["current_epc_rating"] == "Epc.C" +# ] +# epc_c_movers["property_type"].value_counts() -house_epc_c_movers = epc_c_movers[ - epc_c_movers["property_type"] == "House" - ] -house_epc_c_movers_with_solar = house_epc_c_movers[ - ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"]) - ] +# house_epc_c_movers = epc_c_movers[ +# epc_c_movers["property_type"] == "House" +# ] +# house_epc_c_movers_with_solar = house_epc_c_movers[ +# ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"]) +# ] -house_epc_c_movers_with_a_heatpump = house_epc_c_movers[ - ~pd.isnull(house_epc_c_movers["air_source_heat_pump"]) -] +# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[ +# ~pd.isnull(house_epc_c_movers["air_source_heat_pump"]) +# ] -flat_epc_c_movers = epc_c_movers[ - epc_c_movers["property_type"] == "Flat" - ] +# flat_epc_c_movers = epc_c_movers[ +# epc_c_movers["property_type"] == "Flat" +# ] -epc_c_recommendations["sap_points"].mean() -epc_c_recommendations["sap_points"].mean() +# epc_c_recommendations["sap_points"].mean() +# epc_c_recommendations["sap_points"].mean() -measure_cols = [ - "air_source_heat_pump", - "boiler_upgrade", - "cavity_wall_insulation", - "double_glazing", - "external_wall_insulation", - "flat_roof_insulation", - "high_heat_retention_storage_heaters", - "internal_wall_insulation", - "loft_insulation", - "low_energy_lighting", - "mechanical_ventilation", - "room_roof_insulation", - "roomstat_programmer_trvs", - "sealing_open_fireplace", - "secondary_glazing", - "secondary_heating", - "solar_pv", - "solar_pv_with_battery", - "suspended_floor_insulation", - "time_temperature_zone_control", -] +# measure_cols = [ +# "air_source_heat_pump", +# "boiler_upgrade", +# "cavity_wall_insulation", +# "double_glazing", +# "external_wall_insulation", +# "flat_roof_insulation", +# "high_heat_retention_storage_heaters", +# "internal_wall_insulation", +# "loft_insulation", +# "low_energy_lighting", +# "mechanical_ventilation", +# "room_roof_insulation", +# "roomstat_programmer_trvs", +# "sealing_open_fireplace", +# "secondary_glazing", +# "secondary_heating", +# "solar_pv", +# "solar_pv_with_battery", +# "suspended_floor_insulation", +# "time_temperature_zone_control", +# ] -epc_c_melted = ( - epc_c_recommendations - .melt( - id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols], - value_vars=measure_cols, - var_name="measure_type", - value_name="value", - ) - .dropna(subset=["value"]) -) -epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0] -epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() +# epc_c_melted = ( +# epc_c_recommendations +# .melt( +# id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols], +# value_vars=measure_cols, +# var_name="measure_type", +# value_name="value", +# ) +# .dropna(subset=["value"]) +# ) +# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0] +# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() -epc_b_melted = ( - epc_b_recommendations - .melt( - id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols], - value_vars=measure_cols, - var_name="measure_type", - value_name="value", - ) - .dropna(subset=["value"]) -) +# epc_b_melted = ( +# epc_b_recommendations +# .melt( +# id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols], +# value_vars=measure_cols, +# var_name="measure_type", +# value_name="value", +# ) +# .dropna(subset=["value"]) +# ) -epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0] -epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() +# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0] +# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() -measures_compared = epc_c_measures.merge( - epc_b_measures, - left_on="measure_type", - right_on="measure_type", - suffixes=("_epc_c", "_epc_b"), -) +# measures_compared = epc_c_measures.merge( +# epc_b_measures, +# left_on="measure_type", +# right_on="measure_type", +# suffixes=("_epc_c", "_epc_b"), +# ) -epc_c_retrofits = epc_c_recommendations[ - epc_c_recommendations["total_retrofit_cost"] > 0 - ] +# epc_c_retrofits = epc_c_recommendations[ +# epc_c_recommendations["total_retrofit_cost"] > 0 +# ] -epc_b_retrofits = epc_b_recommendations[ - epc_b_recommendations["total_retrofit_cost"] > 0 - ] +# epc_b_retrofits = epc_b_recommendations[ +# epc_b_recommendations["total_retrofit_cost"] > 0 +# ] -epc_c_retrofits["sap_points"].mean() -epc_b_retrofits["sap_points"].mean() +# epc_c_retrofits["sap_points"].mean() +# epc_b_retrofits["sap_points"].mean() -properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b")) +# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b")) -properties_in_both["total_retrofit_cost_epc_c"].mean() -properties_in_both["sap_points_epc_c"].mean() -properties_in_both["total_retrofit_cost_epc_b"].mean() -properties_in_both["sap_points_epc_b"].mean() +# properties_in_both["total_retrofit_cost_epc_c"].mean() +# properties_in_both["sap_points_epc_c"].mean() +# properties_in_both["total_retrofit_cost_epc_b"].mean() +# properties_in_both["sap_points_epc_b"].mean() # Solar PV savings - we need the amount of solar PV bill savings from sqlalchemy.orm import sessionmaker @@ -114,14 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict -PORTFOLIO_ID = 434 # Peabody +PORTFOLIO_ID = 485 # Peabody SCENARIOS = [ - 904, - 905 + 970 ] scenario_names = { - 904: "EPC C - no solid floor, ashp 3.0", - 905: "EPC B - no solid floor, ashp 3.0", + 970: "EPC C - no solid floor, ashp 3.0", } @@ -233,259 +231,266 @@ properties_data, plans_data, recommendations_data = get_data( recommendations_df = pd.DataFrame(recommendations_data) properties_df = pd.DataFrame(properties_data) -solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] -average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() +with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer: + recommendations_df.to_excel(writer, sheet_name="recommendations", index=False) + properties_df.to_excel(writer, sheet_name="properties", index=False) -# Check tenures -initial_asset_data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " - "- Data Extracts for Domna.xlsx", - sheet_name="Properties" -) -sustainability_data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " - "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" -) + +# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] +# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() -sustainability_sample = sustainability_data[ - sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values) -] -sustainability_sample = sustainability_sample.merge( - initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset") -) -block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) -block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False) +# # Check tenures +# initial_asset_data = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " +# "- Data Extracts for Domna.xlsx", +# sheet_name="Properties" +# ) +# sustainability_data = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " +# "- Data Extracts for Domna.xlsx", +# sheet_name="Sustainability" +# ) -initial_asset_data.columns -initial_asset_data["LeaseType"].value_counts() +# sustainability_sample = sustainability_data[ +# sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values) +# ] -# sustainability_sample["Tenure Group"].value_counts() -# Tenure Group -# General Needs 57787 -# Home Ownership 25471 -# Care & Supported Housing 4239 -# Rental 2677 -# Other 188 +# sustainability_sample = sustainability_sample.merge( +# initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset") +# ) -df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index() -df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False) +# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False) -tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index() -tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) +# initial_asset_data.columns +# initial_asset_data["LeaseType"].value_counts() -initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() +# # sustainability_sample["Tenure Group"].value_counts() +# # Tenure Group +# # General Needs 57787 +# # Home Ownership 25471 +# # Care & Supported Housing 4239 +# # Rental 2677 +# # Other 188 -sample_data = initial_asset_data[ - ~initial_asset_data["Ownership Type"].isin( - [ - # Commercial # Everything is resi - based on the Residential Indicator variable - all are true - # Freeholder - "FREEHOLDER", # 19517 properties - # HOMEBUY / EQUITY LOAN - "Rent to Homebuy", # 1 property - # Leaseholder - "LEASEHOLD 100%", # 8455 properties - "Owned and Managed - 999 year lease", # 2076 properties - "Managed but not Owned-Private Lease", # 159 properties - "Owned and managed LEASEHOLD", # 26 properties - # Outright Sale - can't find anything matching - # SHARED EQUITY - "Shared Ownership", # 4065 properties - "Shared Ownership Owned Not Managed", # 23 properties - # Extra categories which seem sensible to exclude - "NOT MANAGED AND NOT OWNED" - ] - ) -] +# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index() +# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False) -sample_data["Ownership Type"].value_counts() +# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index() +# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) -sample_data = initial_asset_data[ - initial_asset_data["Ownership Type"].isin( - [ - "Owned and Managed", - "Owned and Managed - 999 year lease", - "Owned and managed LEASEHOLD", - "LEASEHOLD 100%", - "DATALOAD DEFAULT" - ] - ) -] -dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)] -dropped["Ownership Type"].value_counts() +# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() -for value in [ - # Commercial # Everything is resi, so should be fine. No matches - # Freeholder - "FREEHOLDER", # 19517 properties - # HOMEBUY / EQUITY LOAN - "Rent to Homebuy", # 1 property - # Leaseholder - "LEASEHOLD 100%", # 8455 properties - "Owned and Managed - 999 year lease", # 2076 properties - "Managed but not Owned-Private Lease", # 159 properties - "Owned and managed LEASEHOLD", # 26 properties - # Outright Sale - can't find anything matching - # SHARED EQUITY - "Shared Ownership", # 4065 properties - "Shared Ownership Owned Not Managed", # 23 properties -]: - print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0]) +# sample_data = initial_asset_data[ +# ~initial_asset_data["Ownership Type"].isin( +# [ +# # Commercial # Everything is resi - based on the Residential Indicator variable - all are true +# # Freeholder +# "FREEHOLDER", # 19517 properties +# # HOMEBUY / EQUITY LOAN +# "Rent to Homebuy", # 1 property +# # Leaseholder +# "LEASEHOLD 100%", # 8455 properties +# "Owned and Managed - 999 year lease", # 2076 properties +# "Managed but not Owned-Private Lease", # 159 properties +# "Owned and managed LEASEHOLD", # 26 properties +# # Outright Sale - can't find anything matching +# # SHARED EQUITY +# "Shared Ownership", # 4065 properties +# "Shared Ownership Owned Not Managed", # 23 properties +# # Extra categories which seem sensible to exclude +# "NOT MANAGED AND NOT OWNED" +# ] +# ) +# ] -house_types = [ - "HOUSE", - "BUNGALOW", - "MAISONETTE", - "DUPLEX", -] +# sample_data["Ownership Type"].value_counts() -guaranteed_control = [ - "Owned and Managed", - "Owned and Managed - 999 year lease", - "Owned and managed LEASEHOLD", - "LEASEHOLD 100%", - "DATALOAD DEFAULT", -] +# sample_data = initial_asset_data[ +# initial_asset_data["Ownership Type"].isin( +# [ +# "Owned and Managed", +# "Owned and Managed - 999 year lease", +# "Owned and managed LEASEHOLD", +# "LEASEHOLD 100%", +# "DATALOAD DEFAULT" +# ] +# ) +# ] +# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)] +# dropped["Ownership Type"].value_counts() -sample_data = initial_asset_data[ - ( - initial_asset_data["Ownership Type"].isin(guaranteed_control) - ) - | - ( - (initial_asset_data["Ownership Type"] == "FREEHOLDER") - & - (initial_asset_data["Property Type"].isin(house_types)) - ) - ] +# for value in [ +# # Commercial # Everything is resi, so should be fine. No matches +# # Freeholder +# "FREEHOLDER", # 19517 properties +# # HOMEBUY / EQUITY LOAN +# "Rent to Homebuy", # 1 property +# # Leaseholder +# "LEASEHOLD 100%", # 8455 properties +# "Owned and Managed - 999 year lease", # 2076 properties +# "Managed but not Owned-Private Lease", # 159 properties +# "Owned and managed LEASEHOLD", # 26 properties +# # Outright Sale - can't find anything matching +# # SHARED EQUITY +# "Shared Ownership", # 4065 properties +# "Shared Ownership Owned Not Managed", # 23 properties +# ]: +# print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0]) -fabric_retrofit_sample = initial_asset_data[ - initial_asset_data["Ownership Type"].isin( - [ - "Owned and Managed", - "FREEHOLDER", - "DATALOAD DEFAULT", - ] - ) -] +# house_types = [ +# "HOUSE", +# "BUNGALOW", +# "MAISONETTE", +# "DUPLEX", +# ] -initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() -initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() +# guaranteed_control = [ +# "Owned and Managed", +# "Owned and Managed - 999 year lease", +# "Owned and managed LEASEHOLD", +# "LEASEHOLD 100%", +# "DATALOAD DEFAULT", +# ] -initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts() -z = initial_asset_data[ - ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types) - ] +# sample_data = initial_asset_data[ +# ( +# initial_asset_data["Ownership Type"].isin(guaranteed_control) +# ) +# | +# ( +# (initial_asset_data["Ownership Type"] == "FREEHOLDER") +# & +# (initial_asset_data["Property Type"].isin(house_types)) +# ) +# ] -block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) -zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"] +# fabric_retrofit_sample = initial_asset_data[ +# initial_asset_data["Ownership Type"].isin( +# [ +# "Owned and Managed", +# "FREEHOLDER", +# "DATALOAD DEFAULT", +# ] +# ) +# ] -potential_sample = initial_asset_data[ - ~pd.isnull(initial_asset_data["BlockCode"]) -] +# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() +# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() -compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( - initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(), - left_on="Property Type", - right_on="Property Type", - suffixes=("_on_block_codes", "_overall") -) +# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts() +# z = initial_asset_data[ +# ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types) +# ] -# Comparison of smaller sample vs overall -new_asset_data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " - "- Peabody " - "- Data Extracts for Domna v2.xlsx", - sheet_name="Properties" -) +# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"] -new_sustainability_data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " - "- Peabody " - "- Data Extracts for Domna v2.xlsx", - sheet_name="Sustainability" -) +# potential_sample = initial_asset_data[ +# ~pd.isnull(initial_asset_data["BlockCode"]) +# ] -sap_bands = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " - "08012026.xlsx", -) +# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( +# initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(), +# left_on="Property Type", +# right_on="Property Type", +# suffixes=("_on_block_codes", "_overall") +# ) -combined = new_asset_data.merge( - new_sustainability_data, - left_on="UPRN", - right_on="Org Ref", - suffixes=("_asset", "_sustainability") -).merge( - sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef" -) -reduced_sample = combined[ - ~combined["AH Tenure"].isin( - ["Commercial", - "Freeholder", - "HOMEBUY / EQUITY LOAN", - "Leaseholder", - "Outright Sale", - "SHARED EQUITY", - "Shared Ownership"] - ) -].copy() +# # Comparison of smaller sample vs overall +# new_asset_data = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " +# "- Peabody " +# "- Data Extracts for Domna v2.xlsx", +# sheet_name="Properties" +# ) -# property types -property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( - combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(), - left_on="Property Type", - right_on="Property Type", - suffixes=("_reduced_sample", "_overall") -) +# new_sustainability_data = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " +# "- Peabody " +# "- Data Extracts for Domna v2.xlsx", +# sheet_name="Sustainability" +# ) -# lodged ratings -lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts( - normalize=True).to_frame().reset_index().merge( - combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(), - left_on="Lodged EPC Band", - right_on="Lodged EPC Band", - suffixes=("_reduced_sample", "_overall") -) +# sap_bands = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " +# "08012026.xlsx", +# ) -# modelled ratings -modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( - normalize=True).to_frame().reset_index().merge( - combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(), - left_on="SAP Band", - right_on="SAP Band", - suffixes=("_reduced_sample", "_overall") -) +# combined = new_asset_data.merge( +# new_sustainability_data, +# left_on="UPRN", +# right_on="Org Ref", +# suffixes=("_asset", "_sustainability") +# ).merge( +# sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef" +# ) +# reduced_sample = combined[ +# ~combined["AH Tenure"].isin( +# ["Commercial", +# "Freeholder", +# "HOMEBUY / EQUITY LOAN", +# "Leaseholder", +# "Outright Sale", +# "SHARED EQUITY", +# "Shared Ownership"] +# ) +# ].copy() -# Testing measures -m1 = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " - "solid floor, ashp 3.0 - 20250113 final.xlsx" -) -m2 = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " - "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" -) +# # property types +# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( +# combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(), +# left_on="Property Type", +# right_on="Property Type", +# suffixes=("_reduced_sample", "_overall") +# ) -compare = m1.merge( - m2, - left_on="uprn", - right_on="uprn", - suffixes=("_ewi_iwi", "_no_ewi_iwi") -) +# # lodged ratings +# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts( +# normalize=True).to_frame().reset_index().merge( +# combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(), +# left_on="Lodged EPC Band", +# right_on="Lodged EPC Band", +# suffixes=("_reduced_sample", "_overall") +# ) -# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario -only_no_ewi_iwi = compare[ - (compare["total_retrofit_cost_ewi_iwi"] == 0) & - (compare["total_retrofit_cost_no_ewi_iwi"] != 0) - ] +# # modelled ratings +# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( +# normalize=True).to_frame().reset_index().merge( +# combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(), +# left_on="SAP Band", +# right_on="SAP Band", +# suffixes=("_reduced_sample", "_overall") +# ) -(m1["total_retrofit_cost"] > 0).sum() -(m2["total_retrofit_cost"] > 0).sum() +# # Testing measures +# m1 = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " +# "solid floor, ashp 3.0 - 20250113 final.xlsx" +# ) +# m2 = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " +# "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +# ) -with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0] +# compare = m1.merge( +# m2, +# left_on="uprn", +# right_on="uprn", +# suffixes=("_ewi_iwi", "_no_ewi_iwi") +# ) -z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])] +# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario +# only_no_ewi_iwi = compare[ +# (compare["total_retrofit_cost_ewi_iwi"] == 0) & +# (compare["total_retrofit_cost_no_ewi_iwi"] != 0) +# ] + +# (m1["total_retrofit_cost"] > 0).sum() +# (m2["total_retrofit_cost"] > 0).sum() + +# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0] + +# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])] diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index f12eb85d..e03a2b5a 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -14,16 +14,12 @@ from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 435 # Peabody +PORTFOLIO_ID = 485 # Peabody SCENARIOS = [ - 908, - 909, - 910, + 970, ] scenario_names = { - 908: "EPC C - no solid floor, ashp 3.0", - 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", - 910: "EPC B - no solid floor, no EWI, ashp 3.0" + 970: "EPC C - Nosolid floor, EQI, IWI", }