mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
commit
9acf65c243
6 changed files with 598 additions and 37 deletions
|
|
@ -1,34 +1,5 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
# import pandas as pd
|
|
||||||
#
|
|
||||||
# sal = pd.read_excel(
|
|
||||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
|
||||||
# "Project/data_validation/to_standardise_uprns - Standardised.xlsx",
|
|
||||||
# sheet_name="Standardised Asset List"
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# # Quick breadown of missingness
|
|
||||||
# missing = sal[
|
|
||||||
# pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"])
|
|
||||||
# ]
|
|
||||||
#
|
|
||||||
# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy()
|
|
||||||
# fetched = fetched[
|
|
||||||
# ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1",
|
|
||||||
# "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"]
|
|
||||||
# ]
|
|
||||||
#
|
|
||||||
# known_issues = [
|
|
||||||
#
|
|
||||||
# ]
|
|
||||||
#
|
|
||||||
# # Missed postcodes
|
|
||||||
# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count")
|
|
||||||
# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False)
|
|
||||||
#
|
|
||||||
# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1]
|
|
||||||
|
|
||||||
### Prepare
|
### Prepare
|
||||||
sustainability_data = pd.read_excel(
|
sustainability_data = pd.read_excel(
|
||||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||||
|
|
|
||||||
|
|
@ -277,3 +277,185 @@ tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().
|
||||||
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
||||||
|
|
||||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
||||||
|
|
||||||
|
sample_data = initial_asset_data[
|
||||||
|
~initial_asset_data["Ownership Type"].isin(
|
||||||
|
[
|
||||||
|
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||||
|
# Freeholder
|
||||||
|
"FREEHOLDER", # 19517 properties
|
||||||
|
# HOMEBUY / EQUITY LOAN
|
||||||
|
"Rent to Homebuy", # 1 property
|
||||||
|
# Leaseholder
|
||||||
|
"LEASEHOLD 100%", # 8455 properties
|
||||||
|
"Owned and Managed - 999 year lease", # 2076 properties
|
||||||
|
"Managed but not Owned-Private Lease", # 159 properties
|
||||||
|
"Owned and managed LEASEHOLD", # 26 properties
|
||||||
|
# Outright Sale - can't find anything matching
|
||||||
|
# SHARED EQUITY
|
||||||
|
"Shared Ownership", # 4065 properties
|
||||||
|
"Shared Ownership Owned Not Managed", # 23 properties
|
||||||
|
# Extra categories which seem sensible to exclude
|
||||||
|
"NOT MANAGED AND NOT OWNED"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
sample_data["Ownership Type"].value_counts()
|
||||||
|
|
||||||
|
sample_data = initial_asset_data[
|
||||||
|
initial_asset_data["Ownership Type"].isin(
|
||||||
|
[
|
||||||
|
"Owned and Managed",
|
||||||
|
"Owned and Managed - 999 year lease",
|
||||||
|
"Owned and managed LEASEHOLD",
|
||||||
|
"LEASEHOLD 100%",
|
||||||
|
"DATALOAD DEFAULT"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
|
||||||
|
dropped["Ownership Type"].value_counts()
|
||||||
|
|
||||||
|
for value in [
|
||||||
|
# Commercial # Everything is resi, so should be fine. No matches
|
||||||
|
# Freeholder
|
||||||
|
"FREEHOLDER", # 19517 properties
|
||||||
|
# HOMEBUY / EQUITY LOAN
|
||||||
|
"Rent to Homebuy", # 1 property
|
||||||
|
# Leaseholder
|
||||||
|
"LEASEHOLD 100%", # 8455 properties
|
||||||
|
"Owned and Managed - 999 year lease", # 2076 properties
|
||||||
|
"Managed but not Owned-Private Lease", # 159 properties
|
||||||
|
"Owned and managed LEASEHOLD", # 26 properties
|
||||||
|
# Outright Sale - can't find anything matching
|
||||||
|
# SHARED EQUITY
|
||||||
|
"Shared Ownership", # 4065 properties
|
||||||
|
"Shared Ownership Owned Not Managed", # 23 properties
|
||||||
|
]:
|
||||||
|
print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
|
||||||
|
|
||||||
|
house_types = [
|
||||||
|
"HOUSE",
|
||||||
|
"BUNGALOW",
|
||||||
|
"MAISONETTE",
|
||||||
|
"DUPLEX",
|
||||||
|
]
|
||||||
|
|
||||||
|
guaranteed_control = [
|
||||||
|
"Owned and Managed",
|
||||||
|
"Owned and Managed - 999 year lease",
|
||||||
|
"Owned and managed LEASEHOLD",
|
||||||
|
"LEASEHOLD 100%",
|
||||||
|
"DATALOAD DEFAULT",
|
||||||
|
]
|
||||||
|
|
||||||
|
sample_data = initial_asset_data[
|
||||||
|
(
|
||||||
|
initial_asset_data["Ownership Type"].isin(guaranteed_control)
|
||||||
|
)
|
||||||
|
|
|
||||||
|
(
|
||||||
|
(initial_asset_data["Ownership Type"] == "FREEHOLDER")
|
||||||
|
&
|
||||||
|
(initial_asset_data["Property Type"].isin(house_types))
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
fabric_retrofit_sample = initial_asset_data[
|
||||||
|
initial_asset_data["Ownership Type"].isin(
|
||||||
|
[
|
||||||
|
"Owned and Managed",
|
||||||
|
"FREEHOLDER",
|
||||||
|
"DATALOAD DEFAULT",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||||
|
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||||
|
|
||||||
|
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
|
||||||
|
z = initial_asset_data[
|
||||||
|
~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
|
||||||
|
]
|
||||||
|
|
||||||
|
block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||||
|
zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
|
||||||
|
|
||||||
|
potential_sample = initial_asset_data[
|
||||||
|
~pd.isnull(initial_asset_data["BlockCode"])
|
||||||
|
]
|
||||||
|
|
||||||
|
compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||||
|
initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||||
|
left_on="Property Type",
|
||||||
|
right_on="Property Type",
|
||||||
|
suffixes=("_on_block_codes", "_overall")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Comparison of smaller sample vs overall
|
||||||
|
new_asset_data = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||||
|
"- Peabody "
|
||||||
|
"- Data Extracts for Domna v2.xlsx",
|
||||||
|
sheet_name="Properties"
|
||||||
|
)
|
||||||
|
|
||||||
|
new_sustainability_data = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||||
|
"- Peabody "
|
||||||
|
"- Data Extracts for Domna v2.xlsx",
|
||||||
|
sheet_name="Sustainability"
|
||||||
|
)
|
||||||
|
|
||||||
|
sap_bands = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
|
||||||
|
"08012026.xlsx",
|
||||||
|
)
|
||||||
|
|
||||||
|
combined = new_asset_data.merge(
|
||||||
|
new_sustainability_data,
|
||||||
|
left_on="UPRN",
|
||||||
|
right_on="Org Ref",
|
||||||
|
suffixes=("_asset", "_sustainability")
|
||||||
|
).merge(
|
||||||
|
sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
|
||||||
|
)
|
||||||
|
reduced_sample = combined[
|
||||||
|
~combined["AH Tenure"].isin(
|
||||||
|
["Commercial",
|
||||||
|
"Freeholder",
|
||||||
|
"HOMEBUY / EQUITY LOAN",
|
||||||
|
"Leaseholder",
|
||||||
|
"Outright Sale",
|
||||||
|
"SHARED EQUITY",
|
||||||
|
"Shared Ownership"]
|
||||||
|
)
|
||||||
|
].copy()
|
||||||
|
|
||||||
|
# property types
|
||||||
|
property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||||
|
combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||||
|
left_on="Property Type",
|
||||||
|
right_on="Property Type",
|
||||||
|
suffixes=("_reduced_sample", "_overall")
|
||||||
|
)
|
||||||
|
|
||||||
|
# lodged ratings
|
||||||
|
lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
|
||||||
|
normalize=True).to_frame().reset_index().merge(
|
||||||
|
combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||||
|
left_on="Lodged EPC Band",
|
||||||
|
right_on="Lodged EPC Band",
|
||||||
|
suffixes=("_reduced_sample", "_overall")
|
||||||
|
)
|
||||||
|
|
||||||
|
# modelled ratings
|
||||||
|
modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
|
||||||
|
normalize=True).to_frame().reset_index().merge(
|
||||||
|
combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||||
|
left_on="SAP Band",
|
||||||
|
right_on="SAP Band",
|
||||||
|
suffixes=("_reduced_sample", "_overall")
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,115 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
initial_asset_data = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||||
|
"- Data Extracts for Domna.xlsx",
|
||||||
|
sheet_name="Properties"
|
||||||
|
)
|
||||||
|
|
||||||
|
sustainability_data = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||||
|
"- Data Extracts for Domna.xlsx",
|
||||||
|
sheet_name="Sustainability"
|
||||||
|
)
|
||||||
|
|
||||||
|
asset_data_v2 = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||||
|
"- Peabody "
|
||||||
|
"- Data Extracts for Domna v2.xlsx",
|
||||||
|
sheet_name="Properties"
|
||||||
|
)
|
||||||
|
|
||||||
|
desired_ownerships = asset_data_v2[
|
||||||
|
~asset_data_v2["AH Tenure"].isin(
|
||||||
|
{"Commercial",
|
||||||
|
"Freeholder",
|
||||||
|
"HOMEBUY / EQUITY LOAN",
|
||||||
|
"Leaseholder",
|
||||||
|
"Outright Sale",
|
||||||
|
"SHARED EQUITY",
|
||||||
|
"Shared Ownership"}
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
desired_ownerships["Ownership Type"].value_counts()
|
||||||
|
|
||||||
|
removed_ownerships = initial_asset_data[
|
||||||
|
~initial_asset_data["UPRN"].isin(desired_ownerships["UPRN"].values)
|
||||||
|
]["Ownership Type"].value_counts()
|
||||||
|
|
||||||
|
sal = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
|
||||||
|
"ownership filtered sal.xlsx",
|
||||||
|
sheet_name="Standardised Asset List"
|
||||||
|
)
|
||||||
|
|
||||||
|
# What did we include, that we shouldn't have?
|
||||||
|
should_have_been_dropped = sal[
|
||||||
|
~sal["landlord_property_id"].isin(desired_ownerships["UPRN"].values)
|
||||||
|
]
|
||||||
|
|
||||||
|
needs_to_be_added = desired_ownerships[
|
||||||
|
~desired_ownerships["UPRN"].isin(sal["landlord_property_id"].values)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Merge on ownership types
|
||||||
|
sal = sal.merge(
|
||||||
|
initial_asset_data[["UPRN", "Ownership Type"]],
|
||||||
|
left_on="domna_property_id",
|
||||||
|
right_on="UPRN",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove the irrelevant ownership types
|
||||||
|
sal = sal[
|
||||||
|
~sal["Ownership Type"].isin(
|
||||||
|
[
|
||||||
|
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||||
|
# Freeholder
|
||||||
|
"FREEHOLDER", # 19517 properties
|
||||||
|
# HOMEBUY / EQUITY LOAN
|
||||||
|
"Rent to Homebuy", # 1 property
|
||||||
|
# Leaseholder
|
||||||
|
"LEASEHOLD 100%", # 8455 properties
|
||||||
|
"Owned and Managed - 999 year lease", # 2076 properties
|
||||||
|
"Managed but not Owned-Private Lease", # 159 properties
|
||||||
|
"Owned and managed LEASEHOLD", # 26 properties
|
||||||
|
# Outright Sale - can't find anything matching
|
||||||
|
# SHARED EQUITY
|
||||||
|
"Shared Ownership", # 4065 properties
|
||||||
|
"Shared Ownership Owned Not Managed", # 23 properties
|
||||||
|
# Extra categories which seem sensible to exclude
|
||||||
|
"NOT MANAGED AND NOT OWNED"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
sal["landlord_property_id"] = sal["domna_property_id"].copy()
|
||||||
|
|
||||||
|
# Store this SAL in three batches
|
||||||
|
filename = (
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
|
||||||
|
"ownership filtered sal.xlsx"
|
||||||
|
)
|
||||||
|
with pd.ExcelWriter(filename) as writer:
|
||||||
|
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||||
|
# Store the three sections
|
||||||
|
sal[0:20000].to_excel(writer, sheet_name="Batch 1", index=False)
|
||||||
|
sal[20000:40000].to_excel(writer, sheet_name="Batch 2", index=False)
|
||||||
|
sal[40000:].to_excel(writer, sheet_name="Batch 3", index=False)
|
||||||
|
|
||||||
|
# Test reading back in and assembling
|
||||||
|
# b1 = pd.read_excel(
|
||||||
|
# filename,
|
||||||
|
# sheet_name="Batch 1"
|
||||||
|
# )
|
||||||
|
# b2 = pd.read_excel(
|
||||||
|
# filename,
|
||||||
|
# sheet_name="Batch 2"
|
||||||
|
# )
|
||||||
|
# b3 = pd.read_excel(
|
||||||
|
# filename,
|
||||||
|
# sheet_name="Batch 3"
|
||||||
|
# )
|
||||||
|
# assembled_sal = pd.concat([b1, b2, b3])
|
||||||
|
# # Make sure we have the right # of UPRNs
|
||||||
|
# assert assembled_sal["epc_os_uprn"].nunique() == sal["epc_os_uprn"].nunique()
|
||||||
|
|
@ -0,0 +1,293 @@
|
||||||
|
# ------ Pull in the full SAL sample ------
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
full_sal = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
|
||||||
|
"SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
|
||||||
|
sheet_name="Standardised Asset List"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------Pull in the reduced sample ------
|
||||||
|
# This has a slightly incorrect mix of ownership types. Some properties will need to be dropped and others, added
|
||||||
|
reduced_sal = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
|
||||||
|
"ownership filtered sal.xlsx",
|
||||||
|
sheet_name="Standardised Asset List"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------ Pull in the confirmed ownership column from Peabody ------
|
||||||
|
new_asset_data = pd.read_excel(
|
||||||
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||||
|
"- Peabody "
|
||||||
|
"- Data Extracts for Domna v2.xlsx",
|
||||||
|
sheet_name="Properties"
|
||||||
|
)
|
||||||
|
|
||||||
|
correct_sample = new_asset_data[
|
||||||
|
~new_asset_data["AH Tenure"].isin(
|
||||||
|
["Commercial",
|
||||||
|
"Freeholder",
|
||||||
|
"HOMEBUY / EQUITY LOAN",
|
||||||
|
"Leaseholder",
|
||||||
|
"Outright Sale",
|
||||||
|
"SHARED EQUITY",
|
||||||
|
"Shared Ownership"]
|
||||||
|
)
|
||||||
|
].copy()
|
||||||
|
|
||||||
|
# ------- Stuff to add -------
|
||||||
|
# These are properties that need to be added to the reduced sample, from the SAL
|
||||||
|
stuff_to_add = correct_sample[
|
||||||
|
~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
|
||||||
|
]["UPRN"].values
|
||||||
|
|
||||||
|
sal_to_add = full_sal[
|
||||||
|
full_sal["domna_property_id"].isin(stuff_to_add)
|
||||||
|
].copy()
|
||||||
|
|
||||||
|
# ------- Stuff to remove -------
|
||||||
|
stuff_to_remove = reduced_sal[
|
||||||
|
~reduced_sal["landlord_property_id"].isin(correct_sample["UPRN"].values)
|
||||||
|
]["landlord_property_id"].values
|
||||||
|
|
||||||
|
to_delete = reduced_sal[
|
||||||
|
reduced_sal["landlord_property_id"].isin(stuff_to_remove)
|
||||||
|
].copy()
|
||||||
|
|
||||||
|
# ------- Create the correctly formatted SAL, with an individual batch for properties we need to add -------
|
||||||
|
|
||||||
|
# This is what is correct, from the reduced sample, after removing the incorrect ownership types
|
||||||
|
reduced_sal_final = reduced_sal[
|
||||||
|
~reduced_sal["landlord_property_id"].isin(stuff_to_remove)
|
||||||
|
].copy()
|
||||||
|
|
||||||
|
sal_to_add["landlord_property_id"] = sal_to_add["domna_property_id"].copy()
|
||||||
|
|
||||||
|
full_sal = pd.concat(
|
||||||
|
[reduced_sal_final, sal_to_add],
|
||||||
|
)
|
||||||
|
|
||||||
|
# filename = (
|
||||||
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - "
|
||||||
|
# "final asset list.xlsx"
|
||||||
|
# )
|
||||||
|
# with pd.ExcelWriter(filename) as writer:
|
||||||
|
# full_sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||||
|
# # Store the three sections
|
||||||
|
# reduced_sal_final[0:25000].to_excel(writer, sheet_name="Batch 1 - was correct", index=False)
|
||||||
|
# reduced_sal_final[25000:].to_excel(writer, sheet_name="Batch 2 - was correct", index=False)
|
||||||
|
# sal_to_add.to_excel(writer, sheet_name="Batch 3 - needs adding", index=False)
|
||||||
|
|
||||||
|
# We now prepare the process of getting the associated
|
||||||
|
# We have the properties we need to delete. We can get their associated plans for all scenario IDs
|
||||||
|
scenario_ids = [908, 909, 910]
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from backend.app.db.models.portfolio import PropertyModel
|
||||||
|
from backend.app.db.connection import db_session, db_read_session
|
||||||
|
from sqlalchemy import select, func
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from backend.app.db.models.recommendations import Plan
|
||||||
|
|
||||||
|
uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()
|
||||||
|
|
||||||
|
# PORTFOLIO_ID = 435
|
||||||
|
|
||||||
|
# SCENARIO_ID_WITH_PLANS_TO_DELETE = 910
|
||||||
|
|
||||||
|
|
||||||
|
# Get the property IDs for these UPRNs
|
||||||
|
# def get_property_ids_for_uprns(session: Session, uprns: list[int], portfolio_id) -> list[int]:
|
||||||
|
# return [
|
||||||
|
# property_id
|
||||||
|
# for (property_id,) in
|
||||||
|
# session.query(PropertyModel.id)
|
||||||
|
# .filter(
|
||||||
|
# PropertyModel.uprn.in_(uprns),
|
||||||
|
# PropertyModel.portfolio_id == portfolio_id
|
||||||
|
# )
|
||||||
|
# .all()
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# with db_read_session() as session:
|
||||||
|
# property_ids_to_delete = get_property_ids_for_uprns(
|
||||||
|
# session, uprns_to_be_deleted, portfolio_id=PORTFOLIO_ID
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def count_plans_for_scenario(session: Session, scenario_id: int, portfolio_id, property_ids) -> int:
|
||||||
|
# return session.execute(
|
||||||
|
# select(func.count())
|
||||||
|
# .select_from(Plan)
|
||||||
|
# .where(
|
||||||
|
# Plan.scenario_id == scenario_id,
|
||||||
|
# Plan.portfolio_id == portfolio_id,
|
||||||
|
# Plan.property_id.in_(property_ids)
|
||||||
|
# )
|
||||||
|
# ).scalar_one()
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# with db_session() as session:
|
||||||
|
# n_plans = count_plans_for_scenario(
|
||||||
|
# session,
|
||||||
|
# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE,
|
||||||
|
# portfolio_id=PORTFOLIO_ID,
|
||||||
|
# property_ids=property_ids_to_delete
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def get_plan_ids_for_scenario(
|
||||||
|
# session: Session, scenario_id: int, portfolio_id, property_ids
|
||||||
|
# ) -> list[int]:
|
||||||
|
# result = session.execute(
|
||||||
|
# select(Plan.id, Plan.property_id)
|
||||||
|
# .where(
|
||||||
|
# Plan.scenario_id == scenario_id,
|
||||||
|
# Plan.portfolio_id == portfolio_id,
|
||||||
|
# Plan.property_id.in_(property_ids)
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# return [{"plan_id": row.id, "property_id": row.property_id} for row in result]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# with db_session() as session:
|
||||||
|
# plan_ids_to_property = get_plan_ids_for_scenario(
|
||||||
|
# session,
|
||||||
|
# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE,
|
||||||
|
# portfolio_id=PORTFOLIO_ID,
|
||||||
|
# property_ids=property_ids_to_delete
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# df = pd.DataFrame(plan_ids_to_property)
|
||||||
|
# df[df["property_id"].duplicated()].shape
|
||||||
|
#
|
||||||
|
# plan_ids = [row["plan_id"] for row in plan_ids_to_property]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def chunked(iterable, size):
|
||||||
|
# for i in range(0, len(iterable), size):
|
||||||
|
# yield iterable[i:i + size]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# from sqlalchemy import text
|
||||||
|
# from sqlalchemy.orm import Session
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def delete_plan_batch(session: Session, plan_ids: list[int]):
|
||||||
|
# if not plan_ids:
|
||||||
|
# return
|
||||||
|
#
|
||||||
|
# session.execute(text("SET LOCAL lock_timeout = '5s'"))
|
||||||
|
#
|
||||||
|
# params = {"plan_ids": plan_ids}
|
||||||
|
#
|
||||||
|
# # ----------------------------
|
||||||
|
# # recommendation_materials
|
||||||
|
# # ----------------------------
|
||||||
|
# session.execute(
|
||||||
|
# text("""
|
||||||
|
# DELETE FROM recommendation_materials rm
|
||||||
|
# USING plan_recommendations pr
|
||||||
|
# WHERE rm.recommendation_id = pr.recommendation_id
|
||||||
|
# AND pr.plan_id = ANY(:plan_ids)
|
||||||
|
# """),
|
||||||
|
# params,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# # ----------------------------
|
||||||
|
# # plan_recommendations
|
||||||
|
# # ----------------------------
|
||||||
|
# session.execute(
|
||||||
|
# text("""
|
||||||
|
# DELETE FROM plan_recommendations
|
||||||
|
# WHERE plan_id = ANY(:plan_ids)
|
||||||
|
# """),
|
||||||
|
# params,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# # ----------------------------
|
||||||
|
# # recommendations (only those used by these plans)
|
||||||
|
# # ----------------------------
|
||||||
|
# session.execute(
|
||||||
|
# text("""
|
||||||
|
# DELETE FROM recommendation r
|
||||||
|
# WHERE r.id IN (
|
||||||
|
# SELECT DISTINCT recommendation_id
|
||||||
|
# FROM plan_recommendations
|
||||||
|
# WHERE plan_id = ANY(:plan_ids)
|
||||||
|
# )
|
||||||
|
# """),
|
||||||
|
# params,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# # ----------------------------
|
||||||
|
# # plans LAST
|
||||||
|
# # ----------------------------
|
||||||
|
# session.execute(
|
||||||
|
# text("""
|
||||||
|
# DELETE FROM plan
|
||||||
|
# WHERE id = ANY(:plan_ids)
|
||||||
|
# """),
|
||||||
|
# params,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# batch_size = 25
|
||||||
|
# total = (len(plan_ids) + batch_size - 1) // batch_size
|
||||||
|
#
|
||||||
|
# for i, batch in enumerate(chunked(plan_ids, batch_size), start=1):
|
||||||
|
# print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)")
|
||||||
|
#
|
||||||
|
# with db_session() as session:
|
||||||
|
# delete_plan_batch(session, batch)
|
||||||
|
#
|
||||||
|
# print(f"Batch {i} committed")
|
||||||
|
#
|
||||||
|
# # Now, we delete the associated properties in batch and associated objects. It should
|
||||||
|
# # largely be property, property details
|
||||||
|
# property_ids_to_delete
|
||||||
|
#
|
||||||
|
# from sqlalchemy import text
|
||||||
|
# from sqlalchemy.orm import Session
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def move_properties_between_portfolios(
|
||||||
|
# session: Session,
|
||||||
|
# property_ids: list[int],
|
||||||
|
# from_portfolio_id: int,
|
||||||
|
# to_portfolio_id: int,
|
||||||
|
# ):
|
||||||
|
# if not property_ids:
|
||||||
|
# return 0
|
||||||
|
#
|
||||||
|
# result = session.execute(
|
||||||
|
# text("""
|
||||||
|
# UPDATE property
|
||||||
|
# SET portfolio_id = :to_portfolio_id
|
||||||
|
# WHERE portfolio_id = :from_portfolio_id
|
||||||
|
# AND id = ANY(:property_ids)
|
||||||
|
# """),
|
||||||
|
# {
|
||||||
|
# "property_ids": property_ids,
|
||||||
|
# "from_portfolio_id": from_portfolio_id,
|
||||||
|
# "to_portfolio_id": to_portfolio_id,
|
||||||
|
# },
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# return result.rowcount
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# # Moved?
|
||||||
|
# # 573476, 586011
|
||||||
|
#
|
||||||
|
# property_ids_to_delete2 = [x for x in property_ids_to_delete if x not in [573476, 586011]]
|
||||||
|
#
|
||||||
|
# with db_session() as session:
|
||||||
|
# n_moved = move_properties_between_portfolios(
|
||||||
|
# session,
|
||||||
|
# property_ids=property_ids_to_delete2,
|
||||||
|
# from_portfolio_id=PORTFOLIO_ID,
|
||||||
|
# to_portfolio_id=32, # Archive portfolio
|
||||||
|
# )
|
||||||
|
|
@ -87,8 +87,6 @@ resource "aws_db_instance" "default" {
|
||||||
apply_immediately = true
|
apply_immediately = true
|
||||||
# Set up storage type to gp3 for better performance
|
# Set up storage type to gp3 for better performance
|
||||||
storage_type = "gp3"
|
storage_type = "gp3"
|
||||||
# Instance size
|
|
||||||
instance_class = "db.t4g.medium"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set up the bucket that recieve the csv uploads of epc to be retrofit
|
# Set up the bucket that recieve the csv uploads of epc to be retrofit
|
||||||
|
|
|
||||||
|
|
@ -14,14 +14,16 @@ from collections import defaultdict
|
||||||
|
|
||||||
# PORTFOLIO_ID = 206
|
# PORTFOLIO_ID = 206
|
||||||
# SCENARIOS = [389]
|
# SCENARIOS = [389]
|
||||||
PORTFOLIO_ID = 434 # Peabody
|
PORTFOLIO_ID = 435 # Peabody
|
||||||
SCENARIOS = [
|
SCENARIOS = [
|
||||||
904,
|
908,
|
||||||
905
|
909,
|
||||||
|
# 910,
|
||||||
]
|
]
|
||||||
scenario_names = {
|
scenario_names = {
|
||||||
904: "EPC C - no solid floor, ashp 3.0",
|
908: "EPC C - no solid floor, ashp 3.0",
|
||||||
905: "EPC B - no solid floor, ashp 3.0",
|
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
|
||||||
|
# 910: "EPC B - no solid floor, no EWI, ashp 3.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -231,7 +233,7 @@ for scenario_id in SCENARIOS:
|
||||||
|
|
||||||
# Create excel to store to
|
# Create excel to store to
|
||||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||||
f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx")
|
f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx")
|
||||||
with pd.ExcelWriter(filename) as writer:
|
with pd.ExcelWriter(filename) as writer:
|
||||||
df.to_excel(writer, sheet_name="properties", index=False)
|
df.to_excel(writer, sheet_name="properties", index=False)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue