From f0db6b69df37159c52abd16611472d02b4b6e31d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Oct 2023 10:14:52 +1100 Subject: [PATCH] created a small dataset of uprn and directory name to keep track of all equal rows --- etl/epc/property_change_app.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index 6e724e33..a6034e3d 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -411,8 +411,8 @@ def app(): dataset = [] cleaning_dataset = [] - # Keep track of the number of all equals - all_equal_count = 0 + # Keep track of the all equals + all_equal_rows = [] for directory in tqdm(directories): @@ -521,8 +521,8 @@ def app(): ) if all_equal: - # Keep track of this for the moment - all_equal_count += 1 + # Keep track of this for the moment so we can analyse + all_equal_rows.append({"uprn": uprn, "directory_name": directory.name}) continue features = pd.concat([starting_record, ending_record]) @@ -622,7 +622,7 @@ def app(): save_dataframe_to_s3_parquet( df=output, bucket_name="retrofit-data-dev", - file_key="sap_change_model/dataset_test.parquet", + file_key="sap_change_model/dataset.parquet", )