mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
created a small dataset of uprn and directory name to keep track of all equal rows
This commit is contained in:
parent
d5b2ff9c36
commit
f0db6b69df
1 changed files with 5 additions and 5 deletions
|
|
@ -411,8 +411,8 @@ def app():
|
|||
|
||||
dataset = []
|
||||
cleaning_dataset = []
|
||||
# Keep track of the number of all equals
|
||||
all_equal_count = 0
|
||||
# Keep track of the all equals
|
||||
all_equal_rows = []
|
||||
|
||||
for directory in tqdm(directories):
|
||||
|
||||
|
|
@ -521,8 +521,8 @@ def app():
|
|||
)
|
||||
|
||||
if all_equal:
|
||||
# Keep track of this for the moment
|
||||
all_equal_count += 1
|
||||
# Keep track of this for the moment so we can analyse
|
||||
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
|
||||
continue
|
||||
|
||||
features = pd.concat([starting_record, ending_record])
|
||||
|
|
@ -622,7 +622,7 @@ def app():
|
|||
save_dataframe_to_s3_parquet(
|
||||
df=output,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="sap_change_model/dataset_test.parquet",
|
||||
file_key="sap_change_model/dataset.parquet",
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue