Model/etl/customers/medway/flag_reviewed.py
2025-05-14 14:27:27 +01:00

104 lines
3.6 KiB
Python

"""
This script marks which properties have been reviewed by the Medway.
"""
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/MEDWAY Asset List - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
flat_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/MEDWAY Asset List - Standardised.xlsx",
sheet_name="Flat Data",
)
reviewed_assets = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Programme Final Check.xlsx",
)
exclude_from_programme = reviewed_assets.copy() # [reviewed_assets["Khalim - include in programme"] == "No"].copy()
exclude_from_programme["exclusion_reason"] = None
exclude_from_programme["exclusion_reason"] = np.where(
(exclude_from_programme["UPRN"] == "SOLD"),
"Sold",
exclude_from_programme["exclusion_reason"],
)
exclude_from_programme["exclusion_reason"] = np.where(
(exclude_from_programme["Include in SHDF Bid?"] == "Definite"),
"Included in SHDF Bid",
exclude_from_programme["exclusion_reason"],
)
exclude_from_programme["exclusion_reason"] = np.where(
(exclude_from_programme['Move Forward'] == "EPC C"),
"Excluded from Programme",
exclude_from_programme["exclusion_reason"],
)
# exclude_from_programme = exclude_from_programme[~pd.isnull(exclude_from_programme["exclusion_reason"])]
exclude_from_programme = exclude_from_programme.reset_index(drop=True)
exclude_from_programme["row_id"] = exclude_from_programme.index
# Match to asset list
matched = []
for _, x in tqdm(exclude_from_programme.iterrows(), total=len(exclude_from_programme)):
if x["No."] == 218 and x["Postcode"] == "ME8 6QB":
pc = "ME8 6QP"
elif x["No."] == 198 and x["Postcode"] == "ME8 6HL":
pc = "ME8 6LU"
elif x["No."] == "39a" and x["Postcode"] == "ME7 2BU":
pc = "ME7 2BU"
else:
pc = x["Postcode"]
hn = x["No."]
m = asset_list[
(asset_list["domna_address_1"] == str(hn)) &
(asset_list["domna_postcode"] == str(pc))
]
if m.empty:
m = asset_list[
(asset_list["domna_full_address"].str.replace(",", "").str.lower().str.contains(
x["Address"].lower().strip()))
]
if m.shape[0] == 1:
matched.append(
{
"full_address": m["domna_full_address"].values[0],
"postcode": m["domna_postcode"].values[0],
"review_no": x["No."],
"review_address": x["Address"],
"review_postcode": x["Postcode"],
"exclusion_reason": x["exclusion_reason"],
"landlord_property_id": m["landlord_property_id"].values[0],
"ciga_guarantee": x["Unnamed: 21"]
}
)
continue
raise NotImplementedError("FIX ME")
matched = pd.DataFrame(matched)
matched = matched.rename(
columns={"review_address": "ciga_check_address"}
)
asset_list = asset_list.merge(
matched[["landlord_property_id", "exclusion_reason", "ciga_guarantee", "ciga_check_address"]],
how="left", on="landlord_property_id"
)
# Store as an excel
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Reviewed Standardised Programme.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)