Added all_equal_rows storage

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-18 10:28:18 +11:00
parent f0db6b69df
commit 0e7f56e356

View file

@ -597,7 +597,7 @@ def app():
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
cleaning_dataset.append(cleaning_averages)
print("Final all equal count: %s" % str(all_equal_count))
print("Final all equal count: %s" % str(len(all_equal_rows)))
# Store cleaning dataset in s3 as a parquet file
cleaning_dataset = pd.concat(cleaning_dataset)
@ -625,6 +625,14 @@ def app():
file_key="sap_change_model/dataset.parquet",
)
# Store all_equal_rows
all_equal_rows = pd.DataFrame(all_equal_rows)
save_dataframe_to_s3_parquet(
df=all_equal_rows,
bucket_name="retrofit-data-dev",
file_key="sap_change_model/all_equal_rows.parquet",
)
if __name__ == "__main__":
app()