mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
handling dupes for ha50
This commit is contained in:
parent
d4e378f109
commit
33b3f51ca4
1 changed files with 15 additions and 0 deletions
|
|
@ -1445,6 +1445,21 @@ class DataLoader:
|
|||
"Larch Drive", "Larch Grove"
|
||||
)
|
||||
|
||||
# Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
|
||||
survey_list = survey_list[
|
||||
~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
|
||||
(survey_list["Post Code"] == "ST20QS") &
|
||||
(survey_list["NO."].isin([31])))
|
||||
]
|
||||
|
||||
# Handle dropping of dupes
|
||||
survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
|
||||
survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
|
||||
|
||||
# Should go to 18
|
||||
survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
|
||||
survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
|
||||
|
||||
return survey_list
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue