""" The data held on file for MHS is fairly incomplete, where not every single property has an observation """ from tqdm import tqdm import pandas as pd from docutils.utils.math.tex2mathml_extern import blahtexml asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for analysis.xlsx", sheet_name="Data" ) # When this list was checked, an observation was made per postcode, and so we need to extrapolate those findings inspections_observatons = asset_list[["UPRN", "Postcode", "ManagementGroup", "WFT Findings"]].copy() populated = [] for _, group in tqdm(inspections_observatons.groupby("Postcode"), total=len(inspections_observatons.groupby("Postcode"))): if all(pd.isnull(group["WFT Findings"])): group["WFT Findings"] = "Property not inspected" populated.append(group) continue fill_observation = group["WFT Findings"].values[0] if pd.isnull(fill_observation): group["WFT Findings"] = group["WFT Findings"].fillna("Property not inspected") populated.append(group) continue group = group.reset_index(drop=True) group_filled = [] for idx, x in group.iterrows(): if idx == 0: group_filled.append(x) continue new_value = x["WFT Findings"] if not pd.isnull(new_value): fill_observation = new_value x["WFT Findings"] = fill_observation group_filled.append(x) group_filled = pd.DataFrame(group_filled) populated.append(group_filled) populated = pd.concat(populated) missed = populated[~populated["UPRN"].isin(asset_list["UPRN"].values)] asset_list = asset_list.drop(columns=["WFT Findings"]).merge( populated.drop(columns=["Postcode", "ManagementGroup"]), how="left", on="UPRN" ) # Store the data asset_list.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build.xlsx" )