Model/etl/customers/mhs/flag_pilot.py
2025-05-13 11:53:38 +01:00

134 lines
4.3 KiB
Python

"""
On the standardised asset list, this script will flag the pilot assets.
"""
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
PILOT_PROJECT_CODE = "MHS-000-PILOT"
MHS_PHASE_1_PROJECT_CODE = "MHS-001"
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Standardised Asset List",
)
flat_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Flat Data",
)
pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx"
)
ciga_checks = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx"
)
ciga_checks["row_id"] = ciga_checks.index
asset_list["project_code"] = None
asset_list["project_code"] = np.where(
asset_list["landlord_property_id"].isin(pilot["Place Reference"]),
PILOT_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the next phase of the programme
asset_list["project_code"] = np.where(
(~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull(
asset_list["project_code"]),
MHS_PHASE_1_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the CIGA checks
manual_fixes = {
"123 Columbine Close, Rochester": "2213861230"
}
ciga_lookup = []
for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)):
if manual_fixes.get(row["Matched Address"]):
ll_pid = manual_fixes[row["Matched Address"]]
df = asset_list[
(asset_list["landlord_property_id"].astype(str) == ll_pid)
]
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
df = asset_list[
(asset_list["domna_postcode"] == row["Postcode"])
]
df = df[
(df["domna_address_1"].astype(str) == str(row["Address1"]))
]
if df.empty:
df = asset_list[
(asset_list["domna_postcode"] == row["Matched Postcode"])
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] > 1:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Matched Address"].lower().replace(",", ""), na=False))
]
if df.empty:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Address2"].lower().replace(",", ""), na=False))
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] != 1:
raise Exception("More than one match found for {row['Address1']} in the asset list")
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
ciga_lookup = pd.DataFrame(ciga_lookup)
ciga_lookup = ciga_lookup.merge(
ciga_checks[["row_id", "Guarantee"]].rename(
columns={"Guarantee": "ciga_guarantee"}
), how="left", on="row_id"
)
ciga_lookup["ciga_check_complete"] = True
asset_list = asset_list.merge(
ciga_lookup[["domna_property_id", "ciga_guarantee"]],
how="left",
on="domna_property_id"
)
# Check we matched addresses correctly
# match_check = ciga_lookup.merge(
# ciga_checks, how="left", on="row_id"
# ).merge(
# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id"
# )
# match_check = match_check[["Matched Address", "domna_full_address"]]
# Save
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - "
"programme.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)