diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 21b2111f..05f6b10e 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1932,8 +1932,10 @@ class AssetList: # there may be properties that have been visited multiple times where the outcome was "See notes" implying # that the surveyor had a detailed explanation as to why they couldn't gain access so if this has # happened multiple times, in this case we judge that the work may not be viable + + date_col = "Week Commencing" if "Week Commencing" in self.outcomes else "Survey Date" lookup = lookup.merge( - self.outcomes[["row_id", "Outcome", "Notes", "Week Commencing"]], how="left", on="row_id" + self.outcomes[["row_id", "Outcome", "Notes", date_col]], how="left", on="row_id" ) visit_counts = ( @@ -1949,6 +1951,9 @@ class AssetList: visit_counts, how="left", on="domna_property_id" ) + if pivot_df[self.DOMNA_PROPERTY_ID].duplicated().sum(): + raise Exception("We have duplicated property IDs in the outcomes data") + # We merge this data onto outcomes self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) self.outcomes = self.outcomes.merge( @@ -1962,10 +1967,16 @@ class AssetList: def flag_survey_master( self, - master_filepaths + master_filepaths, + master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion + if master_to_asset_list_filepath is not None: + id_map = pd.read_csv(master_to_asset_list_filepath) + else: + id_map = pd.DataFrame() + logger.info("Getting masters and merging onto asset list") master_surveyed = [] for filepath in master_filepaths: @@ -1973,6 +1984,11 @@ class AssetList: # Strip columns master_data.columns = [c.strip() for c in master_data.columns] + if not id_map.empty: + master_data = master_data.merge( + id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code'] + ) + install_col = ( "INSTALLED OR CANCELLED" if "INSTALLED OR CANCELLED" in master_data.columns else "INSTALL / CANCELLATION DATE" diff --git a/asset_list/app.py b/asset_list/app.py index fb71a70e..bea9cdde 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -323,7 +323,11 @@ def app(): landlord_existing_pv = None outcomes_filename = "plus dane outcomes.xlsx" outcomes_sheetname = "EVERYTHING" - master_filepaths = ["JJC Rolling Master.csv", "SCIS Rolling Master.csv"] + master_filepaths = [ + os.path.join(data_folder, "JJC Rolling Master.csv"), + os.path.join(data_folder, "SCIS Rolling Master.csv"), + ] + master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv") # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -386,7 +390,8 @@ def app(): ) asset_list.flag_survey_master( - master_filepaths=master_filepaths + master_filepaths=master_filepaths, + master_to_asset_list_filepath=master_to_asset_list_filepath ) ### We retrieve the EPC data diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index aca36584..e97f0202 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -2897,6 +2897,17 @@ class DataLoader: # Merge onto the survey list survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id") + # TEMP FOR NEWER WORK + # matching_lookup = matching_lookup.merge( + # asset_list[["asset_list_row_id", "UPRN"]], how="left", on="asset_list_row_id" + # ).merge( + # survey_list[["survey_list_row_id", "NO.", "Street / Block Name", "Post Code"]], + # how="left", on="survey_list_row_id" + # ) + # matching_lookup.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/surveys_to_assets.csv" + # ) + return survey_list @staticmethod