handling dupes for ha50

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-07 15:54:40 +00:00
parent d4e378f109
commit 33b3f51ca4

View file

@ -1445,6 +1445,21 @@ class DataLoader:
"Larch Drive", "Larch Grove"
)
# Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
(survey_list["Post Code"] == "ST20QS") &
(survey_list["NO."].isin([31])))
]
# Handle dropping of dupes
survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
# Should go to 18
survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
return survey_list
@staticmethod