sorted dupes

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-30 13:46:43 +00:00
parent 51c2d04a6d
commit 90c9466421

View file

@ -756,27 +756,34 @@ def main():
manual_filters = {
"Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
"6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
"1 Cluny Way": "12-1-1 Cluny Way-SG15 6ZB",
"2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ",
'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT",
'14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT',
'19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY',
'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN',
'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB',
'16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS',
'2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
'14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY',
'1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW',
# '2 Sorrell Place': '',
# '72 St Ives Road': '',
# '1 The Close, Burton Gardens': '',
# '102 Cheaton Close': '',
# 'Flat 16 Spring Gardens': '',
# '4 Apple Close': '',
'25 Folly Lane': '',
}
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
# Handle the case that has the wrong postcode in the asset data
if home["Address ID"] == 6111566:
blah
# 6118117, 6118744, 6117091
if home["Name"] in manual_filters:
filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
else:
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy()
filtered["survey_folder"].values
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
@ -836,14 +843,11 @@ def main():
print(len(missing_ids))
if missing_ids:
# We check that the missing ids have no data yet
missing_data = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)][
["Name", "Address ID", "Archetype ID"]]
extracted_data[extracted_data["survey_folder"].str.contains("23 Monmouth")]["survey_folder"].values
if len(missing_ids) != 8:
raise Exception("Unacceptable number of missings")
matching_lookup[matching_lookup["survey_folder"].str.contains("23 Monmouth")]
if matching_lookup["Osm. ID"].duplicated().sum():
raise Exception("Duplicate Osm. IDs")
if matching_lookup["Address ID"].duplicated().sum():
raise Exception("Duplicate Address IDs")
if matching_lookup["survey_folder"].duplicated().sum():
raise Exception("Duplicate survey folders")
@ -865,20 +869,21 @@ def main():
]
# We should end up with a 1:1 mapping between the Osm. ID and the survey folder
stonewater_data = extracted_data.merge(matching_lookup, on="survey_folder", how="left").merge(
stonewater_data = extracted_data.merge(matching_lookup, on="survey_folder", how="inner").merge(
retrofit_packages_board[
[
"Name",
"RA",
"Address ID",
"Archetype ID",
"Arch. Group Rank", "Archetype Representative",
"Arch. Group Rank",
"Actual SAP Band",
"Actual SAP Rating",
"Modelled SAP Band",
"Modelled SAP Rating",
] + measure_columns
],
on=["Osm. ID", "Name"],
on=["Address ID", "Name"],
how="left"
)
@ -900,9 +905,13 @@ def main():
header=12
)
windows_data = windows_data[windows_data["Address ID"] != "Address ID"]
windows_data = windows_data[~pd.isnull(windows_data["Address ID"])]
# We get a lookup id of Osm.ID and when the windows were fitted
windows_data = windows_data[
["Osm. ID", "Window attributes - Fitted/renewed date", "Parent Asset Window attributes - Fitted/renewed date"]
["Address ID", "Window attributes - Fitted/renewed date",
"Parent Asset Window attributes - Fitted/renewed date"]
]
# Convert to string for the moment
windows_data["Parent Asset Window attributes - Fitted/renewed date"] = windows_data[
@ -921,7 +930,8 @@ def main():
"Fitted/renewed date"]).dt.days / 365
stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
stonewater_data = stonewater_data.merge(windows_data, on="Osm. ID", how="left")
windows_data["Address ID"] = windows_data["Address ID"].astype(float)
stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
# if __name__ == "__main__":
# main()