investigating missings'

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-30 10:22:23 +00:00
parent 8983ebec2f
commit cb9399a704

View file

@ -720,15 +720,22 @@ def main():
extracted_data["Current SAP Rating"] = extracted_data["Current SAP Rating"].astype(int)
extracted_data["Current EPC Band"] = extracted_data["Current SAP Rating"].apply(sap_to_epc)
# TODO: Clean up SAP and extract EPC
# TODO: RIR floor area!!!
# Remove some definite duplicates
dupes = extracted_data[extracted_data["Address"].duplicated()]["Address"]
dupes = extracted_data[extracted_data["Address"].isin(dupes)]
dupes = dupes.sort_values("Address")
# Get all of the folders that end with ROSS
to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist()
extracted_data = extracted_data[
~extracted_data["survey_folder"].isin(
[
"StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS",
]
"StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS",
"StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS"
] + to_drop
)
]
@ -740,8 +747,15 @@ def main():
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in retrofit_packages_board.iterrows():
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy()
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
# Handle the case that has the wrong postcode in the asset data
if home["Name"] == "Flat 21 Walmer Street":
filtered = extracted_data[
extracted_data["survey_folder"] == "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD"
].copy()
else:
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy()
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
filtered = filtered[filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
@ -749,7 +763,6 @@ def main():
)]
if filtered.empty:
print("Check this once we have full data")
continue
if filtered.shape[0] == 1:
@ -766,18 +779,20 @@ def main():
filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
# We have an edge case wher some properties have two outputs in Sharepoint
if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
bl1h2
filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
raise Exception("Fix me1")
# filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
blah1
filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
raise Exception("Fix me2")
# filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
if filtered.empty:
print("Check this once we have full data2!!!")
continue
if filtered.shape[0] != 1:
raise Exception("somethign went wrong2")
raise Exception("something went wrong")
matching_lookup.append(
{
@ -788,6 +803,9 @@ def main():
)
matching_lookup = pd.DataFrame(matching_lookup)
# Find Osmosis IDs that are in the packages board but not in the matching looking
# missing_osm_ids = set(retrofit_packages_board["Osm. ID"]) - set(matching_lookup["Osm. ID"])
# missing_osm_ids = list(missing_osm_ids)
if matching_lookup["Osm. ID"].duplicated().sum():
raise Exception("Duplicate Osm. IDs")