mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on matching lookup
This commit is contained in:
parent
4160ec4dcb
commit
dbee05e555
2 changed files with 48 additions and 1 deletions
|
|
@ -5,7 +5,8 @@ import pandas as pd
|
|||
from tqdm import tqdm
|
||||
from collections import Counter
|
||||
|
||||
FILE_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 3 Surveys"
|
||||
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
|
||||
FILE_PATH = os.path.join(CUSTOMER_FOLDER_PATH, "Wave 3 Surveys")
|
||||
|
||||
|
||||
def extract_summary_report(pdf_path):
|
||||
|
|
@ -653,6 +654,51 @@ def main():
|
|||
extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
|
||||
)
|
||||
|
||||
# We now merge on the coordinator data so that against each property, we can map the measures
|
||||
retrofit_packages_board = pd.read_excel(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"),
|
||||
header=4
|
||||
)
|
||||
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
|
||||
# We now match this retrofit packages board to the extracted data
|
||||
matching_lookup = []
|
||||
for _, home in retrofit_packages_board.iterrows():
|
||||
filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()]
|
||||
if filtered.empty:
|
||||
print("Check this once we have full data")
|
||||
continue
|
||||
|
||||
if filtered.shape[0] == 1:
|
||||
matching_lookup.append(
|
||||
{
|
||||
"survey_folder": filtered["survey_folder"].values[0],
|
||||
"Osm. ID": home["Osm. ID"],
|
||||
"Name": home["Name"]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# home["Name"] should be contained in the survey_folder
|
||||
filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
|
||||
# We have an edge case wher some properties have two outputs in Sharepoint
|
||||
if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
|
||||
filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
|
||||
|
||||
if filtered.empty:
|
||||
raise Exception("somethign went wrong")
|
||||
if filtered.shape[0] != 1:
|
||||
raise Exception("somethign went wrong2")
|
||||
|
||||
matching_lookup.append(
|
||||
{
|
||||
"survey_folder": filtered["survey_folder"].values[0],
|
||||
"Osm. ID": home["Osm. ID"],
|
||||
"Name": home["Name"]
|
||||
}
|
||||
)
|
||||
|
||||
matching_lookup = pd.DataFrame(matching_lookup)
|
||||
|
||||
# Save this as a csv
|
||||
# extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
PyPDF2
|
||||
pandas
|
||||
tqdm
|
||||
openpyxl
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue