Model/etl/customers/places_for_people/finalise_programme.py

144 lines
5.1 KiB
Python

"""
Having produced the 4 standardsied asset lists for PFP, this script performs a final review
on those assets, reconciling against a list of properties that they sent us that indicates the
properties that they have retained, acquired and then the list will also include some properties that we
have never seen before and so might require additional inspections
"""
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
def match_to_list(pfp_reconciliation_list, asset_list):
lookup = []
for _, asset in tqdm(pfp_reconciliation_list.iterrows(), total=pfp_reconciliation_list.shape[0]):
_id = str(asset['PRO PROPREF'])
# Match to the asset list - we check the bas ID and then we test removing leading zeros
matched = asset_list[asset_list["landlord_property_id"] == _id]
if matched.empty:
_id_stripped = _id.lstrip("0")
matched = asset_list[asset_list["landlord_property_id"] == _id_stripped]
if not matched.empty:
lookup.append(
{
"reconciliation_id": _id,
"landlord_property_id": matched["landlord_property_id"].values[0],
}
)
lookup = pd.DataFrame(lookup)
asset_list["reconciliation"] = np.where(
asset_list["landlord_property_id"].isin(
lookup["landlord_property_id"].values
),
"Property still owned by PFP",
"Property not owned by PFP"
)
return asset_list, lookup
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/Finalise Programme"
pfp_reconciliation_list = pd.read_excel(
os.path.join(data_folder, "PFP properties w repair responsibility.xlsx"),
)
# London
pfp_london = pd.read_excel(
os.path.join(data_folder, "Standardised Asset Lists/PFP - areas surrounding London - Standardised.xlsx"),
sheet_name="Standardised Asset List"
)
pfp_london["landlord_property_id"] = pfp_london["landlord_property_id"].astype(str)
# North-East
pfp_ne = pd.read_excel(
os.path.join(data_folder, "Standardised Asset Lists/PFP - North East - Standardised.xlsx"),
sheet_name="Standardised Asset List"
)
pfp_ne["landlord_property_id"] = pfp_ne["landlord_property_id"].astype(str)
# North-West
pfp_nw = pd.read_excel(
os.path.join(
data_folder,
"Standardised Asset Lists/Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE - "
"Standardised.xlsx"
),
sheet_name="Standardised Asset List"
)
pfp_nw["landlord_property_id"] = pfp_nw["landlord_property_id"].astype(str)
# East
pfp_east = pd.read_excel(
os.path.join(data_folder, "Standardised Asset Lists/PFP - East - Standardised.xlsx"),
sheet_name="Standardised Asset List"
)
pfp_east["landlord_property_id"] = pfp_east["landlord_property_id"].astype(str)
pfp_london, lookup_london = match_to_list(pfp_reconciliation_list, pfp_london)
pfp_ne, lookup_ne = match_to_list(pfp_reconciliation_list, pfp_ne)
pfp_nw, lookup_nw = match_to_list(pfp_reconciliation_list, pfp_nw)
pfp_east, lookup_east = match_to_list(pfp_reconciliation_list, pfp_east)
pfp_london["reconciliation"].value_counts()
pfp_ne["reconciliation"].value_counts()
pfp_nw["reconciliation"].value_counts()
pfp_east["reconciliation"].value_counts()
# We store the reconciled datasets
pfp_london.to_csv(
os.path.join(data_folder, "Reconciled Programme/PFP - areas surrounding London - reconciled.csv"),
index=False
)
pfp_ne.to_csv(
os.path.join(data_folder, "Reconciled Programme/PFP - North East - reconciled.csv"),
index=False
)
pfp_nw.to_csv(
os.path.join(data_folder, "Reconciled Programme/PFP - North West - reconciled.csv"),
index=False
)
pfp_east.to_csv(
os.path.join(data_folder, "Reconciled Programme/PFP - East - reconciled.csv"),
index=False
)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# We look at what was on the reconciled list, that was NOT on the original list
all_ids = lookup_london["reconciliation_id"].tolist() + \
lookup_ne["reconciliation_id"].tolist() + \
lookup_nw["reconciliation_id"].tolist() + \
lookup_east["reconciliation_id"].tolist()
missed_inspections = pd.read_excel(
os.path.join(
data_folder,
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West/Places for People NORTH "
"WEST - INSPECTIONS MASTER - UPDATE.xlsx"
),
sheet_name="MISSING STILL"
)
missed_inspections.columns = ["landlord_id", "address"]
not_seen = pfp_reconciliation_list[
~pfp_reconciliation_list["PRO PROPREF"].astype(str).isin(all_ids)
].copy()
not_seen["Note"] = None
not_seen["Note"] = np.where(
not_seen["PRO PROPREF"].astype(str).isin(missed_inspections["landlord_id"].astype(str).values) |
not_seen["PRO PROPREF"].astype(str).str.lstrip("0").isin(missed_inspections["landlord_id"].astype(str).values),
"Property not inspected",
not_seen["Note"]
)
not_seen["Note"] = not_seen["Note"].fillna("Property not in original lists")
# Store
not_seen.to_csv(
os.path.join(data_folder, "Reconciled Programme/Properties not inspected by Domna.csv"),
index=False
)