From d573c4d8a0ae911edd0e2f181eceb4087e3e78e4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 15:15:05 +0000 Subject: [PATCH] added try except mechanism --- .../ha_15_32/ha_analysis_batch_3.py | 35 ++++++++++++------- etl/epc/Record.py | 32 ++++++++--------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e261710e..da484daa 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1089,6 +1089,9 @@ def get_epc_data( outputs = {} for ha_name, data_assets in loader.data.items(): + if ha_name == "HA39": + continue + if not pull_data: # Then we retrieve the data from S3 processed_ha_results = read_pickle_from_s3( @@ -1114,6 +1117,7 @@ def get_epc_data( results = [] scoring_data = [] nodata = [] + failed_model_rows = [] for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)): if property_meta["matching_postcode"] is None: @@ -1225,19 +1229,24 @@ def get_epc_data( if eligibility.eco4_warmfront["eligible"]: if eligibility.epc["uprn"] == "": eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1]) - - scoring_dictionary = prepare_model_data_row( - property_id=property_meta["asset_list_row_id"], - modelling_epc=eligibility.epc, - cleaned=cleaned, - cleaning_data=cleaning_data, - created_at=created_at, - old_data=older_epcs, - full_sap_epc=full_sap_epc, - photo_supply_lookup=photo_supply_lookup, - floor_area_decile_thresholds=floor_area_decile_thresholds - ) - scoring_data.extend(scoring_dictionary) + try: + scoring_dictionary = prepare_model_data_row( + property_id=property_meta["asset_list_row_id"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + old_data=older_epcs, + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + scoring_data.extend(scoring_dictionary) + except Exception as e: + # If we fail, we just keep a record of it + failed_model_rows.append( + property_meta["asset_list_row_id"] + ) results.append( { diff --git a/etl/epc/Record.py b/etl/epc/Record.py index c793716f..e74330a2 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -725,26 +725,26 @@ class EPCRecord: if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES: if self.old_data: # Take the most recent - max_datetime = max( - [ - old_record["lodgement-datetime"] - for old_record in self.old_data - if old_record["construction-age-band"] - not in DATA_ANOMALY_MATCHES - ] - ) - - most_recent = [ - old_record + old_age_bands = [ + old_record["lodgement-datetime"] for old_record in self.old_data - if old_record["lodgement-datetime"] == max_datetime + if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES ] - self.prepared_epc["construction-age-band"] = ( - EPCDataProcessor.clean_construction_age_band( - most_recent[0]["construction-age-band"] + if old_age_bands: + max_datetime = max(old_age_bands) + + most_recent = [ + old_record + for old_record in self.old_data + if old_record["lodgement-datetime"] == max_datetime + ] + + self.prepared_epc["construction-age-band"] = ( + EPCDataProcessor.clean_construction_age_band( + most_recent[0]["construction-age-band"] + ) ) - ) self.construction_age_band = self.prepared_epc["construction-age-band"] self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)