Model/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py
2026-01-07 20:40:37 +00:00

246 lines
7.3 KiB
Python

"""
This script performs a deep dive into the various scenarios and checks fundamental things
This includes:
1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
871: "EPC C, fabric first, no solid floor, ashp 3.0",
863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
862: "EPC B, No solid floor, ASHP COP 3.0",
861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
859: "EPC C, no solid floor, ashp 3.0",
}
scenario_sap_targets = {
871: 69,
863: 81,
862: 81,
861: 69,
859: 69,
}
problems = []
for scenario_id, scenario_name in scenario_names.items():
# Read in the recommended measures
print("Reading")
df = pd.read_excel(
f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
f"{scenario_name}.xlsx"
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
# Also look for zero cost and SAP points > 0
problematic_properties = df[
(df["below_scenario_target"] & df["no_recommended_measures"])
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
].copy()
# show all columns
# Source - https://stackoverflow.com/a
# Posted by YOLO, modified by community. See post 'Timeline' for change history
# Retrieved 2026-01-06, License - CC BY-SA 4.0
# pd.set_option('display.max_rows', 500)
# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
# plan_input = [
# {
# "uprn": 100022725126,
# "address": "FLAT 5 Daveys Court",
# "postcode": "WC2N 4BW"
# }
# ]
# plan_input = [
# {
# "uprn": 100120966352,
# "address": "FLAT 11 Kingsgate",
# "postcode": "OX18 2BP"
# }
# ]
plan_input = [
{
"uprn": 200003371857,
"postcode": "SE1 5SJ",
"address": "39 BUTTERMERE CLOSE",
}
]
all_problems = pd.concat(problems)
all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
)
sal = pd.concat([sal, sal2])
retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])]
# Store
retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
)
# Delete associated plans
# 1) Get the property IDs for these UPRNS, for this portfolio
portfolio_id = 419
uprns = retry["epc_os_uprn"].tolist()
# TODO: Delete all plans for these properties and re-build
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
from backend.app.db.models.recommendations import Plan
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import sessionmaker
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
return [
property.id
for property in session.query(PropertyModel)
.filter(
PropertyModel.portfolio_id == portfolio_id,
PropertyModel.uprn.in_(uprns)
)
.all()
]
with db_session() as session:
property_ids_to_delete = get_property_ids_for_uprns(session, portfolio_id, uprns)
# Get all and delete plans for these property IDs
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
return [
plan.id
for plan in session.query(Plan)
.filter(Plan.property_id.in_(property_ids))
.all()
]
with db_session() as session:
plan_ids_to_delete = get_ids_of_plans_for_deletion(session, property_ids_to_delete)
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
from sqlalchemy import text
from sqlalchemy.orm import Session
def delete_plan_batch(session: Session, plan_ids: list[int]):
if not plan_ids:
return
session.execute(text("SET LOCAL lock_timeout = '5s'"))
params = {"plan_ids": plan_ids}
# ----------------------------
# recommendation_materials
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# plan_recommendations
# ----------------------------
session.execute(
text("""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
params,
)
# ----------------------------
# plans LAST
# ----------------------------
session.execute(
text("""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
params,
)
batch_size = 25
total = (len(plan_ids_to_delete) + batch_size - 1) // batch_size
for i, batch in enumerate(chunked(plan_ids_to_delete, batch_size), start=1):
print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)")
with db_session() as session:
delete_plan_batch(session, batch)
print(f"Batch {i} committed")