working on matching lookup

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-29 18:37:47 +00:00
parent 4160ec4dcb
commit dbee05e555
2 changed files with 48 additions and 1 deletions

View file

@ -5,7 +5,8 @@ import pandas as pd
from tqdm import tqdm
from collections import Counter
FILE_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 3 Surveys"
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
FILE_PATH = os.path.join(CUSTOMER_FOLDER_PATH, "Wave 3 Surveys")
def extract_summary_report(pdf_path):
@ -653,6 +654,51 @@ def main():
extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
)
# We now merge on the coordinator data so that against each property, we can map the measures
retrofit_packages_board = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"),
header=4
)
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in retrofit_packages_board.iterrows():
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()]
if filtered.empty:
print("Check this once we have full data")
continue
if filtered.shape[0] == 1:
matching_lookup.append(
{
"survey_folder": filtered["survey_folder"].values[0],
"Osm. ID": home["Osm. ID"],
"Name": home["Name"]
}
)
continue
# home["Name"] should be contained in the survey_folder
filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
# We have an edge case wher some properties have two outputs in Sharepoint
if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
if filtered.empty:
raise Exception("somethign went wrong")
if filtered.shape[0] != 1:
raise Exception("somethign went wrong2")
matching_lookup.append(
{
"survey_folder": filtered["survey_folder"].values[0],
"Osm. ID": home["Osm. ID"],
"Name": home["Name"]
}
)
matching_lookup = pd.DataFrame(matching_lookup)
# Save this as a csv
# extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)

View file

@ -1,3 +1,4 @@
PyPDF2
pandas
tqdm
openpyxl