trying to match master to asset list

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-08 17:54:38 +00:00
parent c4eb72fb92
commit 9eba778eb1
3 changed files with 36 additions and 4 deletions

View file

@ -1932,8 +1932,10 @@ class AssetList:
# there may be properties that have been visited multiple times where the outcome was "See notes" implying
# that the surveyor had a detailed explanation as to why they couldn't gain access so if this has
# happened multiple times, in this case we judge that the work may not be viable
date_col = "Week Commencing" if "Week Commencing" in self.outcomes else "Survey Date"
lookup = lookup.merge(
self.outcomes[["row_id", "Outcome", "Notes", "Week Commencing"]], how="left", on="row_id"
self.outcomes[["row_id", "Outcome", "Notes", date_col]], how="left", on="row_id"
)
visit_counts = (
@ -1949,6 +1951,9 @@ class AssetList:
visit_counts, how="left", on="domna_property_id"
)
if pivot_df[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise Exception("We have duplicated property IDs in the outcomes data")
# We merge this data onto outcomes
self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
self.outcomes = self.outcomes.merge(
@ -1962,10 +1967,16 @@ class AssetList:
def flag_survey_master(
self,
master_filepaths
master_filepaths,
master_to_asset_list_filepath=None
):
# TODO: This probably needs further expansion
if master_to_asset_list_filepath is not None:
id_map = pd.read_csv(master_to_asset_list_filepath)
else:
id_map = pd.DataFrame()
logger.info("Getting masters and merging onto asset list")
master_surveyed = []
for filepath in master_filepaths:
@ -1973,6 +1984,11 @@ class AssetList:
# Strip columns
master_data.columns = [c.strip() for c in master_data.columns]
if not id_map.empty:
master_data = master_data.merge(
id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code']
)
install_col = (
"INSTALLED OR CANCELLED" if "INSTALLED OR CANCELLED" in master_data.columns
else "INSTALL / CANCELLATION DATE"

View file

@ -323,7 +323,11 @@ def app():
landlord_existing_pv = None
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
master_filepaths = ["JJC Rolling Master.csv", "SCIS Rolling Master.csv"]
master_filepaths = [
os.path.join(data_folder, "JJC Rolling Master.csv"),
os.path.join(data_folder, "SCIS Rolling Master.csv"),
]
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -386,7 +390,8 @@ def app():
)
asset_list.flag_survey_master(
master_filepaths=master_filepaths
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
)
### We retrieve the EPC data

View file

@ -2897,6 +2897,17 @@ class DataLoader:
# Merge onto the survey list
survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
# TEMP FOR NEWER WORK
# matching_lookup = matching_lookup.merge(
# asset_list[["asset_list_row_id", "UPRN"]], how="left", on="asset_list_row_id"
# ).merge(
# survey_list[["survey_list_row_id", "NO.", "Street / Block Name", "Post Code"]],
# how="left", on="survey_list_row_id"
# )
# matching_lookup.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/surveys_to_assets.csv"
# )
return survey_list
@staticmethod