diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index 6e724e33..a6034e3d 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -411,8 +411,8 @@ def app(): dataset = [] cleaning_dataset = [] - # Keep track of the number of all equals - all_equal_count = 0 + # Keep track of the all equals + all_equal_rows = [] for directory in tqdm(directories): @@ -521,8 +521,8 @@ def app(): ) if all_equal: - # Keep track of this for the moment - all_equal_count += 1 + # Keep track of this for the moment so we can analyse + all_equal_rows.append({"uprn": uprn, "directory_name": directory.name}) continue features = pd.concat([starting_record, ending_record]) @@ -622,7 +622,7 @@ def app(): save_dataframe_to_s3_parquet( df=output, bucket_name="retrofit-data-dev", - file_key="sap_change_model/dataset_test.parquet", + file_key="sap_change_model/dataset.parquet", )