mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
adding epc band
This commit is contained in:
parent
791262fa86
commit
8983ebec2f
1 changed files with 56 additions and 3 deletions
|
|
@ -11,6 +11,32 @@ SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
|
||||||
NUM_FOLDERS = 14
|
NUM_FOLDERS = 14
|
||||||
|
|
||||||
|
|
||||||
|
def sap_to_epc(sap_points: int | float):
|
||||||
|
"""
|
||||||
|
Simple utility function to convert SAP points to EPC rating.
|
||||||
|
:param sap_points: numerical value of SAP points, typically between 0 and 100
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
if sap_points <= 0:
|
||||||
|
raise ValueError("SAP points should be above 0.")
|
||||||
|
|
||||||
|
if sap_points >= 92:
|
||||||
|
return "A"
|
||||||
|
elif sap_points >= 81:
|
||||||
|
return "B"
|
||||||
|
elif sap_points >= 69:
|
||||||
|
return "C"
|
||||||
|
elif sap_points >= 55:
|
||||||
|
return "D"
|
||||||
|
elif sap_points >= 39:
|
||||||
|
return "E"
|
||||||
|
elif sap_points >= 21:
|
||||||
|
return "F"
|
||||||
|
else:
|
||||||
|
return "G"
|
||||||
|
|
||||||
|
|
||||||
def extract_summary_report(pdf_path):
|
def extract_summary_report(pdf_path):
|
||||||
"""
|
"""
|
||||||
Extracts specific data from the provided PDF file.
|
Extracts specific data from the provided PDF file.
|
||||||
|
|
@ -23,6 +49,7 @@ def extract_summary_report(pdf_path):
|
||||||
"Address": None,
|
"Address": None,
|
||||||
"Postcode": None,
|
"Postcode": None,
|
||||||
"Current SAP Rating": None,
|
"Current SAP Rating": None,
|
||||||
|
"Current EPC Band": None,
|
||||||
"Fuel Bill": None,
|
"Fuel Bill": None,
|
||||||
"Number of Storeys": None,
|
"Number of Storeys": None,
|
||||||
"Window Age Description": None,
|
"Window Age Description": None,
|
||||||
|
|
@ -57,7 +84,7 @@ def extract_summary_report(pdf_path):
|
||||||
|
|
||||||
# Extract Current SAP rating
|
# Extract Current SAP rating
|
||||||
sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text)
|
sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text)
|
||||||
data["Current SAP Rating"] = sap_match.group(1)
|
data["Current SAP Rating"] = sap_match.group(1).split(" ")[1]
|
||||||
|
|
||||||
# Number of storeys
|
# Number of storeys
|
||||||
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
|
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
|
||||||
|
|
@ -367,6 +394,7 @@ def extract_epr(pdf_path):
|
||||||
"Address": None,
|
"Address": None,
|
||||||
"Postcode": None,
|
"Postcode": None,
|
||||||
"Current SAP Rating": None,
|
"Current SAP Rating": None,
|
||||||
|
"Current EPC Band": None,
|
||||||
"Primary Energy Use (kWh/yr)": None,
|
"Primary Energy Use (kWh/yr)": None,
|
||||||
"Primary Energy Use Intensity (kWh/m2/yr)": None,
|
"Primary Energy Use Intensity (kWh/m2/yr)": None,
|
||||||
"Number of Storeys": None,
|
"Number of Storeys": None,
|
||||||
|
|
@ -621,6 +649,9 @@ def main():
|
||||||
folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
|
folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
|
||||||
survey_folders.extend(folder_contents) # Append contents to the master list
|
survey_folders.extend(folder_contents) # Append contents to the master list
|
||||||
|
|
||||||
|
# Get rid of .DS_Store files
|
||||||
|
survey_folders = [folder for folder in survey_folders if not folder.endswith(".DS_Store")]
|
||||||
|
|
||||||
extracted_data = []
|
extracted_data = []
|
||||||
for survey_folder in tqdm(survey_folders):
|
for survey_folder in tqdm(survey_folders):
|
||||||
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
|
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
|
||||||
|
|
@ -643,6 +674,16 @@ def main():
|
||||||
retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
|
retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
|
||||||
else:
|
else:
|
||||||
retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
|
retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
|
||||||
|
|
||||||
|
# Check if everything inside is a sub-folder and the number of folders is 2
|
||||||
|
items = [item for item in os.listdir(retrofit_folder_path) if item != '.DS_Store']
|
||||||
|
all_folders = [os.path.isdir(os.path.join(retrofit_folder_path, item)) for item in items]
|
||||||
|
if all(all_folders) and len(all_folders) == 2 and "Property Pics" in items:
|
||||||
|
# Get the folder that isn't Property Pics
|
||||||
|
retrofit_folder_path = os.path.join(
|
||||||
|
retrofit_folder_path, [item for item in items if item != "Property Pics"][0]
|
||||||
|
)
|
||||||
|
|
||||||
if os.listdir(retrofit_folder_path): # If not empty
|
if os.listdir(retrofit_folder_path): # If not empty
|
||||||
summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
|
summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
|
||||||
if summary_data:
|
if summary_data:
|
||||||
|
|
@ -673,14 +714,24 @@ def main():
|
||||||
|
|
||||||
extracted_data = pd.DataFrame(extracted_data)
|
extracted_data = pd.DataFrame(extracted_data)
|
||||||
|
|
||||||
# What was missed???
|
|
||||||
|
|
||||||
extracted_data["Primary Energy Use (kWh/yr)"] = (
|
extracted_data["Primary Energy Use (kWh/yr)"] = (
|
||||||
extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
|
extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
|
||||||
)
|
)
|
||||||
|
extracted_data["Current SAP Rating"] = extracted_data["Current SAP Rating"].astype(int)
|
||||||
|
extracted_data["Current EPC Band"] = extracted_data["Current SAP Rating"].apply(sap_to_epc)
|
||||||
|
|
||||||
# TODO: Clean up SAP and extract EPC
|
# TODO: Clean up SAP and extract EPC
|
||||||
# TODO: RIR floor area!!!
|
# TODO: RIR floor area!!!
|
||||||
|
|
||||||
|
# Remove some definite duplicates
|
||||||
|
extracted_data = extracted_data[
|
||||||
|
~extracted_data["survey_folder"].isin(
|
||||||
|
[
|
||||||
|
"StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
# We now merge on the coordinator data so that against each property, we can map the measures
|
# We now merge on the coordinator data so that against each property, we can map the measures
|
||||||
retrofit_packages_board = pd.read_excel(
|
retrofit_packages_board = pd.read_excel(
|
||||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"),
|
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"),
|
||||||
|
|
@ -715,9 +766,11 @@ def main():
|
||||||
filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
|
filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
|
||||||
# We have an edge case wher some properties have two outputs in Sharepoint
|
# We have an edge case wher some properties have two outputs in Sharepoint
|
||||||
if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
|
if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
|
||||||
|
bl1h2
|
||||||
filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
|
filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
|
||||||
|
|
||||||
if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
|
if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
|
||||||
|
blah1
|
||||||
filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
|
filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
|
||||||
|
|
||||||
if filtered.empty:
|
if filtered.empty:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue