mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding to extract eprs
This commit is contained in:
parent
753bda6cb0
commit
364b5b07e8
1 changed files with 73 additions and 28 deletions
|
|
@ -18,6 +18,7 @@ def extract_summary_report(pdf_path):
|
|||
"""
|
||||
data = {
|
||||
"Address": None,
|
||||
"Postcode": None,
|
||||
"Current SAP Rating": None,
|
||||
"Space Heating": None,
|
||||
"Water Heating": None,
|
||||
|
|
@ -200,7 +201,9 @@ def extract_epr(pdf_path):
|
|||
"""
|
||||
data = {
|
||||
"Address": None,
|
||||
"Postcode": None,
|
||||
"Current SAP Rating": None,
|
||||
"Potential SAP Rating": None,
|
||||
"Space Heating": None,
|
||||
"Water Heating": None,
|
||||
"Fuel Bill": None,
|
||||
|
|
@ -211,6 +214,16 @@ def extract_epr(pdf_path):
|
|||
"Number of Windows": None,
|
||||
"Total Number of Doors": None,
|
||||
"Number of Insulated Doors": None,
|
||||
"Existing Primary Heating System": None,
|
||||
"Existing Primary Heating PCDF Reference": None,
|
||||
"Existing Primary Heating Controls": None,
|
||||
"Existing Primary Heating % of Heat": None,
|
||||
"Existing Secondary Heating System": None,
|
||||
"Existing Secondary Heating PCDF Reference": None,
|
||||
"Existing Secondary Heating Controls": None,
|
||||
"Existing Secondary Heating % of Heat": None,
|
||||
"Secondary Heating Code": None,
|
||||
"Water Heating Code": None,
|
||||
}
|
||||
|
||||
with open(pdf_path, "rb") as file:
|
||||
|
|
@ -222,41 +235,73 @@ def extract_epr(pdf_path):
|
|||
# Extract Address
|
||||
address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL)
|
||||
data["Address"] = address_match.group(1).strip()
|
||||
data["Postcode"] = data["Address"].split(",")[-1].strip()
|
||||
|
||||
# Extract Total Floor Area
|
||||
# area_match = re.search(r"Total Floor Area\s*(\d+ m2)", text)
|
||||
# data["Total Floor Area"] = area_match.group(1)
|
||||
|
||||
# Extract Current SAP rating
|
||||
# Updated Regular Expression to find "GG (1-20)" followed by two numbers
|
||||
# Extract Current and Potential SAP ratings
|
||||
sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text)
|
||||
current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
|
||||
data["Current SAP Rating"] = current_sap
|
||||
|
||||
# Extract and validate the Current and Potential SAP ratings
|
||||
current_sap, potential_sap = int(sap_match.group(1)), int(sap_match.group(2))
|
||||
# Ensure potential is greater than or equal to current
|
||||
if 1 <= current_sap <= 99 and 1 <= potential_sap <= 99 and potential_sap >= current_sap:
|
||||
data["Current SAP Rating"] = current_sap
|
||||
data["Potential SAP Rating"] = potential_sap
|
||||
else:
|
||||
raise ValueError("Failed to parse SAP ratings correctly due to unexpected format.")
|
||||
|
||||
# Extract Space Heating (kWh)
|
||||
space_heating_match = re.search(r"Space Heating\s+(\d+)\s+kWh", text)
|
||||
data["Space Heating"] = int(space_heating_match.group(1))
|
||||
|
||||
# Extract Water Heating (kWh)
|
||||
water_heating_match = re.search(r"Water Heating\s+(\d+)\s+kWh", text)
|
||||
data["Water Heating"] = int(water_heating_match.group(1))
|
||||
|
||||
# Extract Fuel Bill (total estimated costs)
|
||||
# Extract Fuel Bill
|
||||
fuel_bill_match = re.search(r"TOTAL\s*£(\d+)", text)
|
||||
data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
|
||||
|
||||
# Extract the windows data
|
||||
# Extract Total Number of Doors
|
||||
total_doors_match = re.search(r"Total Doors:\s*(\d+)", text)
|
||||
data["Total Number of Doors"] = int(total_doors_match.group(1))
|
||||
|
||||
# Extract Number of Insulated Doors
|
||||
insulated_doors_match = re.search(r"Insulated Doors:\s*(\d+)", text)
|
||||
data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
|
||||
|
||||
# Extract Primary Heating Section (Main Heating 1)
|
||||
primary_heating_section = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Main\s*Heating\s*2", text, re.DOTALL)
|
||||
primary_text = primary_heating_section.group(1)
|
||||
|
||||
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
|
||||
1).strip()
|
||||
data["Existing Primary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||
primary_text).group(1)
|
||||
data["Existing Primary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n", primary_text).group(
|
||||
1).strip()
|
||||
data["Existing Primary Heating % of Heat"] = int(
|
||||
re.search(r"Percentage of Heat\s*(\d+)\s*%?", primary_text).group(1)
|
||||
)
|
||||
|
||||
# Extract Secondary Heating Section (Main Heating 2)
|
||||
secondary_heating_section = re.search(r"Main\s*Heating\s*2\s*(.*?)\s*Secondary Heating", text, re.DOTALL)
|
||||
secondary_text = secondary_heating_section.group(1)
|
||||
|
||||
data["Existing Secondary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", secondary_text).group(
|
||||
1).strip()
|
||||
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||
secondary_text).group(1)
|
||||
|
||||
if data["Existing Secondary Heating System"] == "":
|
||||
data["Existing Secondary Heating Controls"] = ""
|
||||
else:
|
||||
data["Existing Secondary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n",
|
||||
secondary_text).group(1).strip()
|
||||
data["Existing Secondary Heating % of Heat"] = int(
|
||||
re.search(r"Percentage of Heat\s*(\d+)\s*%?", secondary_text).group(1)
|
||||
)
|
||||
|
||||
# Extract Secondary Heating and Water Heating Codes
|
||||
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
|
||||
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||
|
||||
if data["Existing Secondary Heating System"] == "":
|
||||
data["Secondary Heating Code"] = ""
|
||||
else:
|
||||
data["Secondary Heating Code"] = secondary_heating_code_match.group(1).strip()
|
||||
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||
|
||||
# Extract Windows information
|
||||
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
||||
windows_text = windows_section.group(1)
|
||||
window_data = extract_window_age_description(windows_text)
|
||||
data.update(window_data)
|
||||
if windows_section:
|
||||
windows_text = windows_section.group(1)
|
||||
window_data = extract_window_age_description(windows_text)
|
||||
data.update(window_data)
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue