peabody

2026-07-27 23:35:01 +00:00 · 2026-01-27 18:52:51 +00:00 · 2026-01-27 18:52:51 +00:00 · 1acf4f4d6a
commit 1acf4f4d6a
parent 0254c945e8
6 changed files with 340 additions and 328 deletions
--- a/.devcontainer/requirements.txt
+++ b/.devcontainer/requirements.txt
@ -18,4 +18,5 @@ pytest-cov==7.0.0
 ipykernel>=6.25,<7
 pydantic-settings<2
 pyyaml>=6.0.1
-pydantic>=1.10.7,<2
+pydantic>=1.10.7,<2
+sqlmodel
--- a/backend/app/db/connection.py
+++ b/backend/app/db/connection.py
@ -1,7 +1,7 @@
 from sqlalchemy import create_engine
 from contextlib import contextmanager
 from backend.app.config import get_settings
-from sqlmodel import Session
+# from sqlmodel import Session

 connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
 db_string = connection_string.format(
--- a/backend/app/requirements/requirements.txt
+++ b/backend/app/requirements/requirements.txt
@ -10,4 +10,5 @@ boto3==1.35.44
 # Data
 openpyxl==3.1.2
 # Basic
-pytz
+pytz
+sqlmodel
--- a/backend/postcode_splitter/main.py
+++ b/backend/postcode_splitter/main.py
@ -98,8 +98,17 @@ def main():
            results.append(tmp)

    final_df = pd.concat(results, ignore_index=True)
-    a = final_df[["best_match_lexiscore","Address 1", "best_match_address", "Postcode", "UPRN", "best_match_uprn"]] # add levi score to viewing
+    a = final_df[[
+            "best_match_lexiscore","Address 1",
+            "best_match_address", "Postcode",
+            "UPRN", "best_match_uprn"
+        ]] # add levi score to viewing
    b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
+    b = b[[
+            "best_match_lexiscore","Address 1",
+            "best_match_address", "Postcode",
+            "UPRN", "best_match_uprn"
+        ]] 

 if __name__ == "__main__":
    main()
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -1,111 +1,111 @@
 import pandas as pd

-epc_c_recommendations = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, ashp 3.0 - corrected.xlsx"
-)
-epc_b_recommendations = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
-    "solid floor, ashp 3.0 - corrected.xlsx"
-)
+# epc_c_recommendations = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, ashp 3.0 - corrected.xlsx"
+# )
+# epc_b_recommendations = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
+#     "solid floor, ashp 3.0 - corrected.xlsx"
+# )

-epc_c_movers = epc_b_recommendations[
-    epc_b_recommendations["current_epc_rating"] == "Epc.C"
-    ]
-epc_c_movers["property_type"].value_counts()
+# epc_c_movers = epc_b_recommendations[
+#     epc_b_recommendations["current_epc_rating"] == "Epc.C"
+#     ]
+# epc_c_movers["property_type"].value_counts()

-house_epc_c_movers = epc_c_movers[
-    epc_c_movers["property_type"] == "House"
-    ]
-house_epc_c_movers_with_solar = house_epc_c_movers[
-    ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
-    ]
+# house_epc_c_movers = epc_c_movers[
+#     epc_c_movers["property_type"] == "House"
+#     ]
+# house_epc_c_movers_with_solar = house_epc_c_movers[
+#     ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
+#     ]

-house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
-    ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
-]
+# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
+#     ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
+# ]

-flat_epc_c_movers = epc_c_movers[
-    epc_c_movers["property_type"] == "Flat"
-    ]
+# flat_epc_c_movers = epc_c_movers[
+#     epc_c_movers["property_type"] == "Flat"
+#     ]

-epc_c_recommendations["sap_points"].mean()
-epc_c_recommendations["sap_points"].mean()
+# epc_c_recommendations["sap_points"].mean()
+# epc_c_recommendations["sap_points"].mean()

-measure_cols = [
-    "air_source_heat_pump",
-    "boiler_upgrade",
-    "cavity_wall_insulation",
-    "double_glazing",
-    "external_wall_insulation",
-    "flat_roof_insulation",
-    "high_heat_retention_storage_heaters",
-    "internal_wall_insulation",
-    "loft_insulation",
-    "low_energy_lighting",
-    "mechanical_ventilation",
-    "room_roof_insulation",
-    "roomstat_programmer_trvs",
-    "sealing_open_fireplace",
-    "secondary_glazing",
-    "secondary_heating",
-    "solar_pv",
-    "solar_pv_with_battery",
-    "suspended_floor_insulation",
-    "time_temperature_zone_control",
-]
+# measure_cols = [
+#     "air_source_heat_pump",
+#     "boiler_upgrade",
+#     "cavity_wall_insulation",
+#     "double_glazing",
+#     "external_wall_insulation",
+#     "flat_roof_insulation",
+#     "high_heat_retention_storage_heaters",
+#     "internal_wall_insulation",
+#     "loft_insulation",
+#     "low_energy_lighting",
+#     "mechanical_ventilation",
+#     "room_roof_insulation",
+#     "roomstat_programmer_trvs",
+#     "sealing_open_fireplace",
+#     "secondary_glazing",
+#     "secondary_heating",
+#     "solar_pv",
+#     "solar_pv_with_battery",
+#     "suspended_floor_insulation",
+#     "time_temperature_zone_control",
+# ]

-epc_c_melted = (
-    epc_c_recommendations
-    .melt(
-        id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
-        value_vars=measure_cols,
-        var_name="measure_type",
-        value_name="value",
-    )
-    .dropna(subset=["value"])
-)
-epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
-epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+# epc_c_melted = (
+#     epc_c_recommendations
+#     .melt(
+#         id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
+#         value_vars=measure_cols,
+#         var_name="measure_type",
+#         value_name="value",
+#     )
+#     .dropna(subset=["value"])
+# )
+# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
+# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()

-epc_b_melted = (
-    epc_b_recommendations
-    .melt(
-        id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
-        value_vars=measure_cols,
-        var_name="measure_type",
-        value_name="value",
-    )
-    .dropna(subset=["value"])
-)
+# epc_b_melted = (
+#     epc_b_recommendations
+#     .melt(
+#         id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
+#         value_vars=measure_cols,
+#         var_name="measure_type",
+#         value_name="value",
+#     )
+#     .dropna(subset=["value"])
+# )

-epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
-epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
+# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()

-measures_compared = epc_c_measures.merge(
-    epc_b_measures,
-    left_on="measure_type",
-    right_on="measure_type",
-    suffixes=("_epc_c", "_epc_b"),
-)
+# measures_compared = epc_c_measures.merge(
+#     epc_b_measures,
+#     left_on="measure_type",
+#     right_on="measure_type",
+#     suffixes=("_epc_c", "_epc_b"),
+# )

-epc_c_retrofits = epc_c_recommendations[
-    epc_c_recommendations["total_retrofit_cost"] > 0
-    ]
+# epc_c_retrofits = epc_c_recommendations[
+#     epc_c_recommendations["total_retrofit_cost"] > 0
+#     ]

-epc_b_retrofits = epc_b_recommendations[
-    epc_b_recommendations["total_retrofit_cost"] > 0
-    ]
+# epc_b_retrofits = epc_b_recommendations[
+#     epc_b_recommendations["total_retrofit_cost"] > 0
+#     ]

-epc_c_retrofits["sap_points"].mean()
-epc_b_retrofits["sap_points"].mean()
+# epc_c_retrofits["sap_points"].mean()
+# epc_b_retrofits["sap_points"].mean()

-properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
+# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))

-properties_in_both["total_retrofit_cost_epc_c"].mean()
-properties_in_both["sap_points_epc_c"].mean()
-properties_in_both["total_retrofit_cost_epc_b"].mean()
-properties_in_both["sap_points_epc_b"].mean()
+# properties_in_both["total_retrofit_cost_epc_c"].mean()
+# properties_in_both["sap_points_epc_c"].mean()
+# properties_in_both["total_retrofit_cost_epc_b"].mean()
+# properties_in_both["sap_points_epc_b"].mean()

 # Solar PV savings - we need the amount of solar PV bill savings
 from sqlalchemy.orm import sessionmaker
@ -114,14 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from collections import defaultdict

-PORTFOLIO_ID = 434  # Peabody
+PORTFOLIO_ID = 485  # Peabody
 SCENARIOS = [
-    904,
-    905
+    970
 ]
 scenario_names = {
-    904: "EPC C - no solid floor, ashp 3.0",
-    905: "EPC B - no solid floor, ashp 3.0",
+    970: "EPC C - no solid floor, ashp 3.0",
 }


@ -233,259 +231,266 @@ properties_data, plans_data, recommendations_data = get_data(
 recommendations_df = pd.DataFrame(recommendations_data)
 properties_df = pd.DataFrame(properties_data)

-solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
-average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
+with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
+    recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
+    properties_df.to_excel(writer, sheet_name="properties", index=False)

-# Check tenures
-initial_asset_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
-    "- Data Extracts for Domna.xlsx",
-    sheet_name="Properties"
-)
-sustainability_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
-    "- Data Extracts for Domna.xlsx",
-    sheet_name="Sustainability"
-)
+    
+# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
+# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()

-sustainability_sample = sustainability_data[
-    sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
-]

-sustainability_sample = sustainability_sample.merge(
-    initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
-)

-block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
-block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
+# # Check tenures
+# initial_asset_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+#     "- Data Extracts for Domna.xlsx",
+#     sheet_name="Properties"
+# )
+# sustainability_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+#     "- Data Extracts for Domna.xlsx",
+#     sheet_name="Sustainability"
+# )

-initial_asset_data.columns
-initial_asset_data["LeaseType"].value_counts()
+# sustainability_sample = sustainability_data[
+#     sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
+# ]

-# sustainability_sample["Tenure Group"].value_counts()
-# Tenure Group
-# General Needs               57787
-# Home Ownership              25471
-# Care & Supported Housing     4239
-# Rental                       2677
-# Other                         188
+# sustainability_sample = sustainability_sample.merge(
+#     initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
+# )

-df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
-df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
+# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)

-tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
-tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
+# initial_asset_data.columns
+# initial_asset_data["LeaseType"].value_counts()

-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
+# # sustainability_sample["Tenure Group"].value_counts()
+# # Tenure Group
+# # General Needs               57787
+# # Home Ownership              25471
+# # Care & Supported Housing     4239
+# # Rental                       2677
+# # Other                         188

-sample_data = initial_asset_data[
-    ~initial_asset_data["Ownership Type"].isin(
-        [
-            # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
-            # Freeholder
-            "FREEHOLDER",  # 19517 properties
-            # HOMEBUY / EQUITY LOAN
-            "Rent to Homebuy",  # 1 property
-            # Leaseholder
-            "LEASEHOLD 100%",  # 8455 properties
-            "Owned and Managed - 999 year lease",  # 2076 properties
-            "Managed but not Owned-Private Lease",  # 159 properties
-            "Owned and managed LEASEHOLD",  # 26 properties
-            # Outright Sale - can't find anything matching
-            # SHARED EQUITY
-            "Shared Ownership",  # 4065 properties
-            "Shared Ownership Owned Not Managed",  # 23 properties
-            # Extra categories which seem sensible to exclude
-            "NOT MANAGED AND NOT OWNED"
-        ]
-    )
-]
+# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
+# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)

-sample_data["Ownership Type"].value_counts()
+# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
+# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)

-sample_data = initial_asset_data[
-    initial_asset_data["Ownership Type"].isin(
-        [
-            "Owned and Managed",
-            "Owned and Managed - 999 year lease",
-            "Owned and managed LEASEHOLD",
-            "LEASEHOLD 100%",
-            "DATALOAD DEFAULT"
-        ]
-    )
-]
-dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
-dropped["Ownership Type"].value_counts()
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()

-for value in [
-    # Commercial # Everything is resi, so should be fine. No matches
-    # Freeholder
-    "FREEHOLDER",  # 19517 properties
-    # HOMEBUY / EQUITY LOAN
-    "Rent to Homebuy",  # 1 property
-    # Leaseholder
-    "LEASEHOLD 100%",  # 8455 properties
-    "Owned and Managed - 999 year lease",  # 2076 properties
-    "Managed but not Owned-Private Lease",  # 159 properties
-    "Owned and managed LEASEHOLD",  # 26 properties
-    # Outright Sale - can't find anything matching
-    # SHARED EQUITY
-    "Shared Ownership",  # 4065 properties
-    "Shared Ownership Owned Not Managed",  # 23 properties
-]:
-    print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
+# sample_data = initial_asset_data[
+#     ~initial_asset_data["Ownership Type"].isin(
+#         [
+#             # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
+#             # Freeholder
+#             "FREEHOLDER",  # 19517 properties
+#             # HOMEBUY / EQUITY LOAN
+#             "Rent to Homebuy",  # 1 property
+#             # Leaseholder
+#             "LEASEHOLD 100%",  # 8455 properties
+#             "Owned and Managed - 999 year lease",  # 2076 properties
+#             "Managed but not Owned-Private Lease",  # 159 properties
+#             "Owned and managed LEASEHOLD",  # 26 properties
+#             # Outright Sale - can't find anything matching
+#             # SHARED EQUITY
+#             "Shared Ownership",  # 4065 properties
+#             "Shared Ownership Owned Not Managed",  # 23 properties
+#             # Extra categories which seem sensible to exclude
+#             "NOT MANAGED AND NOT OWNED"
+#         ]
+#     )
+# ]

-house_types = [
-    "HOUSE",
-    "BUNGALOW",
-    "MAISONETTE",
-    "DUPLEX",
-]
+# sample_data["Ownership Type"].value_counts()

-guaranteed_control = [
-    "Owned and Managed",
-    "Owned and Managed - 999 year lease",
-    "Owned and managed LEASEHOLD",
-    "LEASEHOLD 100%",
-    "DATALOAD DEFAULT",
-]
+# sample_data = initial_asset_data[
+#     initial_asset_data["Ownership Type"].isin(
+#         [
+#             "Owned and Managed",
+#             "Owned and Managed - 999 year lease",
+#             "Owned and managed LEASEHOLD",
+#             "LEASEHOLD 100%",
+#             "DATALOAD DEFAULT"
+#         ]
+#     )
+# ]
+# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
+# dropped["Ownership Type"].value_counts()

-sample_data = initial_asset_data[
-    (
-        initial_asset_data["Ownership Type"].isin(guaranteed_control)
-    )
-    |
-    (
-        (initial_asset_data["Ownership Type"] == "FREEHOLDER")
-        &
-        (initial_asset_data["Property Type"].isin(house_types))
-    )
-    ]
+# for value in [
+#     # Commercial # Everything is resi, so should be fine. No matches
+#     # Freeholder
+#     "FREEHOLDER",  # 19517 properties
+#     # HOMEBUY / EQUITY LOAN
+#     "Rent to Homebuy",  # 1 property
+#     # Leaseholder
+#     "LEASEHOLD 100%",  # 8455 properties
+#     "Owned and Managed - 999 year lease",  # 2076 properties
+#     "Managed but not Owned-Private Lease",  # 159 properties
+#     "Owned and managed LEASEHOLD",  # 26 properties
+#     # Outright Sale - can't find anything matching
+#     # SHARED EQUITY
+#     "Shared Ownership",  # 4065 properties
+#     "Shared Ownership Owned Not Managed",  # 23 properties
+# ]:
+#     print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])

-fabric_retrofit_sample = initial_asset_data[
-    initial_asset_data["Ownership Type"].isin(
-        [
-            "Owned and Managed",
-            "FREEHOLDER",
-            "DATALOAD DEFAULT",
-        ]
-    )
-]
+# house_types = [
+#     "HOUSE",
+#     "BUNGALOW",
+#     "MAISONETTE",
+#     "DUPLEX",
+# ]

-initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+# guaranteed_control = [
+#     "Owned and Managed",
+#     "Owned and Managed - 999 year lease",
+#     "Owned and managed LEASEHOLD",
+#     "LEASEHOLD 100%",
+#     "DATALOAD DEFAULT",
+# ]

-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
-z = initial_asset_data[
-    ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
-    ]
+# sample_data = initial_asset_data[
+#     (
+#         initial_asset_data["Ownership Type"].isin(guaranteed_control)
+#     )
+#     |
+#     (
+#         (initial_asset_data["Ownership Type"] == "FREEHOLDER")
+#         &
+#         (initial_asset_data["Property Type"].isin(house_types))
+#     )
+#     ]

-block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
-zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
+# fabric_retrofit_sample = initial_asset_data[
+#     initial_asset_data["Ownership Type"].isin(
+#         [
+#             "Owned and Managed",
+#             "FREEHOLDER",
+#             "DATALOAD DEFAULT",
+#         ]
+#     )
+# ]

-potential_sample = initial_asset_data[
-    ~pd.isnull(initial_asset_data["BlockCode"])
-]
+# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()

-compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
-    initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Property Type",
-    right_on="Property Type",
-    suffixes=("_on_block_codes", "_overall")
-)
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
+# z = initial_asset_data[
+#     ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
+#     ]

-# Comparison of smaller sample vs overall
-new_asset_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
-    "- Peabody "
-    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Properties"
-)
+# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]

-new_sustainability_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
-    "- Peabody "
-    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Sustainability"
-)
+# potential_sample = initial_asset_data[
+#     ~pd.isnull(initial_asset_data["BlockCode"])
+# ]

-sap_bands = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
-    "08012026.xlsx",
-)
+# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+#     initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Property Type",
+#     right_on="Property Type",
+#     suffixes=("_on_block_codes", "_overall")
+# )

-combined = new_asset_data.merge(
-    new_sustainability_data,
-    left_on="UPRN",
-    right_on="Org Ref",
-    suffixes=("_asset", "_sustainability")
-).merge(
-    sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
-)
-reduced_sample = combined[
-    ~combined["AH Tenure"].isin(
-        ["Commercial",
-         "Freeholder",
-         "HOMEBUY / EQUITY LOAN",
-         "Leaseholder",
-         "Outright Sale",
-         "SHARED EQUITY",
-         "Shared Ownership"]
-    )
-].copy()
+# # Comparison of smaller sample vs overall
+# new_asset_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+#     "- Peabody "
+#     "- Data Extracts for Domna v2.xlsx",
+#     sheet_name="Properties"
+# )

-# property types
-property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
-    combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Property Type",
-    right_on="Property Type",
-    suffixes=("_reduced_sample", "_overall")
-)
+# new_sustainability_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+#     "- Peabody "
+#     "- Data Extracts for Domna v2.xlsx",
+#     sheet_name="Sustainability"
+# )

-# lodged ratings
-lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
-    normalize=True).to_frame().reset_index().merge(
-    combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Lodged EPC Band",
-    right_on="Lodged EPC Band",
-    suffixes=("_reduced_sample", "_overall")
-)
+# sap_bands = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
+#     "08012026.xlsx",
+# )

-# modelled ratings
-modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
-    normalize=True).to_frame().reset_index().merge(
-    combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="SAP Band",
-    right_on="SAP Band",
-    suffixes=("_reduced_sample", "_overall")
-)
+# combined = new_asset_data.merge(
+#     new_sustainability_data,
+#     left_on="UPRN",
+#     right_on="Org Ref",
+#     suffixes=("_asset", "_sustainability")
+# ).merge(
+#     sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
+# )
+# reduced_sample = combined[
+#     ~combined["AH Tenure"].isin(
+#         ["Commercial",
+#          "Freeholder",
+#          "HOMEBUY / EQUITY LOAN",
+#          "Leaseholder",
+#          "Outright Sale",
+#          "SHARED EQUITY",
+#          "Shared Ownership"]
+#     )
+# ].copy()

-# Testing measures
-m1 = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, ashp 3.0 - 20250113 final.xlsx"
-)
-m2 = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
-)
+# # property types
+# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+#     combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Property Type",
+#     right_on="Property Type",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-compare = m1.merge(
-    m2,
-    left_on="uprn",
-    right_on="uprn",
-    suffixes=("_ewi_iwi", "_no_ewi_iwi")
-)
+# # lodged ratings
+# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
+#     normalize=True).to_frame().reset_index().merge(
+#     combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Lodged EPC Band",
+#     right_on="Lodged EPC Band",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
-only_no_ewi_iwi = compare[
-    (compare["total_retrofit_cost_ewi_iwi"] == 0) &
-    (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
-    ]
+# # modelled ratings
+# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
+#     normalize=True).to_frame().reset_index().merge(
+#     combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="SAP Band",
+#     right_on="SAP Band",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-(m1["total_retrofit_cost"] > 0).sum()
-(m2["total_retrofit_cost"] > 0).sum()
+# # Testing measures
+# m1 = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, ashp 3.0 - 20250113 final.xlsx"
+# )
+# m2 = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
+# )

-with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
+# compare = m1.merge(
+#     m2,
+#     left_on="uprn",
+#     right_on="uprn",
+#     suffixes=("_ewi_iwi", "_no_ewi_iwi")
+# )

-z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
+# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
+# only_no_ewi_iwi = compare[
+#     (compare["total_retrofit_cost_ewi_iwi"] == 0) &
+#     (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
+#     ]
+
+# (m1["total_retrofit_cost"] > 0).sum()
+# (m2["total_retrofit_cost"] > 0).sum()
+
+# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
+
+# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -14,16 +14,12 @@ from collections import defaultdict

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 435  # Peabody
+PORTFOLIO_ID = 485  # Peabody
 SCENARIOS = [
-    908,
-    909,
-    910,
+    970,
 ]
 scenario_names = {
-    908: "EPC C - no solid floor, ashp 3.0",
-    909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
-    910: "EPC B - no solid floor, no EWI, ashp 3.0"
+    970: "EPC C - Nosolid floor, EQI, IWI",
 }