mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixing missed matches
This commit is contained in:
parent
cb9399a704
commit
51c2d04a6d
1 changed files with 59 additions and 21 deletions
|
|
@ -741,26 +741,53 @@ def main():
|
|||
|
||||
# We now merge on the coordinator data so that against each property, we can map the measures
|
||||
retrofit_packages_board = pd.read_excel(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"),
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater 3.0 Updated SAP Pre & Modelled 29.10.24.xlsx"),
|
||||
header=4
|
||||
)
|
||||
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
|
||||
# Take just the rows that have been surveyed
|
||||
retrofit_packages_board = retrofit_packages_board[
|
||||
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
|
||||
]
|
||||
|
||||
# Replace \n with ""
|
||||
extracted_data["Postcode"] = extracted_data["Postcode"].str.replace("\n", "")
|
||||
|
||||
manual_filters = {
|
||||
"Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
|
||||
"6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
|
||||
"1 Cluny Way": "12-1-1 Cluny Way-SG15 6ZB",
|
||||
"2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ",
|
||||
'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT",
|
||||
'14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT',
|
||||
'19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY',
|
||||
'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN',
|
||||
'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB',
|
||||
}
|
||||
|
||||
# We now match this retrofit packages board to the extracted data
|
||||
matching_lookup = []
|
||||
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
|
||||
|
||||
# Handle the case that has the wrong postcode in the asset data
|
||||
if home["Name"] == "Flat 21 Walmer Street":
|
||||
filtered = extracted_data[
|
||||
extracted_data["survey_folder"] == "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD"
|
||||
].copy()
|
||||
if home["Address ID"] == 6111566:
|
||||
blah
|
||||
# 6118117, 6118744, 6117091
|
||||
if home["Name"] in manual_filters:
|
||||
filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
|
||||
else:
|
||||
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy()
|
||||
filtered["survey_folder"].values
|
||||
|
||||
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
|
||||
filtered = filtered[filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
|
||||
home["Name"].replace(r"[^\w\s]", ""), case=False
|
||||
)]
|
||||
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
|
||||
to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
|
||||
home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".",
|
||||
"").str.contains(
|
||||
home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
|
||||
)
|
||||
filtered = filtered[to_filter]
|
||||
|
||||
if filtered.empty:
|
||||
continue
|
||||
|
|
@ -769,7 +796,7 @@ def main():
|
|||
matching_lookup.append(
|
||||
{
|
||||
"survey_folder": filtered["survey_folder"].values[0],
|
||||
"Osm. ID": home["Osm. ID"],
|
||||
"Address ID": home["Address ID"],
|
||||
"Name": home["Name"]
|
||||
}
|
||||
)
|
||||
|
|
@ -797,15 +824,23 @@ def main():
|
|||
matching_lookup.append(
|
||||
{
|
||||
"survey_folder": filtered["survey_folder"].values[0],
|
||||
"Osm. ID": home["Osm. ID"],
|
||||
"Address ID": home["Address ID"],
|
||||
"Name": home["Name"]
|
||||
}
|
||||
)
|
||||
|
||||
matching_lookup = pd.DataFrame(matching_lookup)
|
||||
# Find Osmosis IDs that are in the packages board but not in the matching looking
|
||||
# missing_osm_ids = set(retrofit_packages_board["Osm. ID"]) - set(matching_lookup["Osm. ID"])
|
||||
# missing_osm_ids = list(missing_osm_ids)
|
||||
missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"])
|
||||
missing_ids = list(missing_ids)
|
||||
print(len(missing_ids))
|
||||
if missing_ids:
|
||||
# We check that the missing ids have no data yet
|
||||
missing_data = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)][
|
||||
["Name", "Address ID", "Archetype ID"]]
|
||||
extracted_data[extracted_data["survey_folder"].str.contains("23 Monmouth")]["survey_folder"].values
|
||||
|
||||
matching_lookup[matching_lookup["survey_folder"].str.contains("23 Monmouth")]
|
||||
|
||||
if matching_lookup["Osm. ID"].duplicated().sum():
|
||||
raise Exception("Duplicate Osm. IDs")
|
||||
|
|
@ -834,7 +869,6 @@ def main():
|
|||
retrofit_packages_board[
|
||||
[
|
||||
"Name",
|
||||
"Osm. ID",
|
||||
"Address ID",
|
||||
"Archetype ID",
|
||||
"Arch. Group Rank", "Archetype Representative",
|
||||
|
|
@ -848,6 +882,14 @@ def main():
|
|||
how="left"
|
||||
)
|
||||
|
||||
# Create a section for costs
|
||||
for measure in measure_columns:
|
||||
stonewater_data[f"Cost of {measure}"] = None
|
||||
|
||||
stonewater_data["Total Cost of Measures"] = None
|
||||
stonewater_data["Contingency Cost"] = None
|
||||
stonewater_data["Total Cost of Measures inc Contingency"] = None
|
||||
|
||||
# We've appended the recommended packages and modelled SAP ratings to the data
|
||||
# We also want to append the windows data
|
||||
windows_data = pd.read_excel(
|
||||
|
|
@ -878,12 +920,8 @@ def main():
|
|||
windows_data["Years since fitted/renewed"] = (pd.Timestamp.now() - windows_data[
|
||||
"Fitted/renewed date"]).dt.days / 365
|
||||
|
||||
# TODO: Flag if a package includes windows
|
||||
|
||||
# Save this as a csv
|
||||
# extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)
|
||||
|
||||
missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()]
|
||||
stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
|
||||
stonewater_data = stonewater_data.merge(windows_data, on="Osm. ID", how="left")
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue