mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
adding to extract eprs
This commit is contained in:
parent
753bda6cb0
commit
364b5b07e8
1 changed files with 73 additions and 28 deletions
|
|
@ -18,6 +18,7 @@ def extract_summary_report(pdf_path):
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"Address": None,
|
"Address": None,
|
||||||
|
"Postcode": None,
|
||||||
"Current SAP Rating": None,
|
"Current SAP Rating": None,
|
||||||
"Space Heating": None,
|
"Space Heating": None,
|
||||||
"Water Heating": None,
|
"Water Heating": None,
|
||||||
|
|
@ -200,7 +201,9 @@ def extract_epr(pdf_path):
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"Address": None,
|
"Address": None,
|
||||||
|
"Postcode": None,
|
||||||
"Current SAP Rating": None,
|
"Current SAP Rating": None,
|
||||||
|
"Potential SAP Rating": None,
|
||||||
"Space Heating": None,
|
"Space Heating": None,
|
||||||
"Water Heating": None,
|
"Water Heating": None,
|
||||||
"Fuel Bill": None,
|
"Fuel Bill": None,
|
||||||
|
|
@ -211,6 +214,16 @@ def extract_epr(pdf_path):
|
||||||
"Number of Windows": None,
|
"Number of Windows": None,
|
||||||
"Total Number of Doors": None,
|
"Total Number of Doors": None,
|
||||||
"Number of Insulated Doors": None,
|
"Number of Insulated Doors": None,
|
||||||
|
"Existing Primary Heating System": None,
|
||||||
|
"Existing Primary Heating PCDF Reference": None,
|
||||||
|
"Existing Primary Heating Controls": None,
|
||||||
|
"Existing Primary Heating % of Heat": None,
|
||||||
|
"Existing Secondary Heating System": None,
|
||||||
|
"Existing Secondary Heating PCDF Reference": None,
|
||||||
|
"Existing Secondary Heating Controls": None,
|
||||||
|
"Existing Secondary Heating % of Heat": None,
|
||||||
|
"Secondary Heating Code": None,
|
||||||
|
"Water Heating Code": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(pdf_path, "rb") as file:
|
with open(pdf_path, "rb") as file:
|
||||||
|
|
@ -222,41 +235,73 @@ def extract_epr(pdf_path):
|
||||||
# Extract Address
|
# Extract Address
|
||||||
address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL)
|
address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL)
|
||||||
data["Address"] = address_match.group(1).strip()
|
data["Address"] = address_match.group(1).strip()
|
||||||
|
data["Postcode"] = data["Address"].split(",")[-1].strip()
|
||||||
|
|
||||||
# Extract Total Floor Area
|
# Extract Current and Potential SAP ratings
|
||||||
# area_match = re.search(r"Total Floor Area\s*(\d+ m2)", text)
|
|
||||||
# data["Total Floor Area"] = area_match.group(1)
|
|
||||||
|
|
||||||
# Extract Current SAP rating
|
|
||||||
# Updated Regular Expression to find "GG (1-20)" followed by two numbers
|
|
||||||
sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text)
|
sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text)
|
||||||
|
current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
|
||||||
|
data["Current SAP Rating"] = current_sap
|
||||||
|
|
||||||
# Extract and validate the Current and Potential SAP ratings
|
# Extract Fuel Bill
|
||||||
current_sap, potential_sap = int(sap_match.group(1)), int(sap_match.group(2))
|
|
||||||
# Ensure potential is greater than or equal to current
|
|
||||||
if 1 <= current_sap <= 99 and 1 <= potential_sap <= 99 and potential_sap >= current_sap:
|
|
||||||
data["Current SAP Rating"] = current_sap
|
|
||||||
data["Potential SAP Rating"] = potential_sap
|
|
||||||
else:
|
|
||||||
raise ValueError("Failed to parse SAP ratings correctly due to unexpected format.")
|
|
||||||
|
|
||||||
# Extract Space Heating (kWh)
|
|
||||||
space_heating_match = re.search(r"Space Heating\s+(\d+)\s+kWh", text)
|
|
||||||
data["Space Heating"] = int(space_heating_match.group(1))
|
|
||||||
|
|
||||||
# Extract Water Heating (kWh)
|
|
||||||
water_heating_match = re.search(r"Water Heating\s+(\d+)\s+kWh", text)
|
|
||||||
data["Water Heating"] = int(water_heating_match.group(1))
|
|
||||||
|
|
||||||
# Extract Fuel Bill (total estimated costs)
|
|
||||||
fuel_bill_match = re.search(r"TOTAL\s*£(\d+)", text)
|
fuel_bill_match = re.search(r"TOTAL\s*£(\d+)", text)
|
||||||
data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
|
data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
|
||||||
|
|
||||||
# Extract the windows data
|
# Extract Total Number of Doors
|
||||||
|
total_doors_match = re.search(r"Total Doors:\s*(\d+)", text)
|
||||||
|
data["Total Number of Doors"] = int(total_doors_match.group(1))
|
||||||
|
|
||||||
|
# Extract Number of Insulated Doors
|
||||||
|
insulated_doors_match = re.search(r"Insulated Doors:\s*(\d+)", text)
|
||||||
|
data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
|
||||||
|
|
||||||
|
# Extract Primary Heating Section (Main Heating 1)
|
||||||
|
primary_heating_section = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Main\s*Heating\s*2", text, re.DOTALL)
|
||||||
|
primary_text = primary_heating_section.group(1)
|
||||||
|
|
||||||
|
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Primary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||||
|
primary_text).group(1)
|
||||||
|
data["Existing Primary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n", primary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Primary Heating % of Heat"] = int(
|
||||||
|
re.search(r"Percentage of Heat\s*(\d+)\s*%?", primary_text).group(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract Secondary Heating Section (Main Heating 2)
|
||||||
|
secondary_heating_section = re.search(r"Main\s*Heating\s*2\s*(.*?)\s*Secondary Heating", text, re.DOTALL)
|
||||||
|
secondary_text = secondary_heating_section.group(1)
|
||||||
|
|
||||||
|
data["Existing Secondary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", secondary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||||
|
secondary_text).group(1)
|
||||||
|
|
||||||
|
if data["Existing Secondary Heating System"] == "":
|
||||||
|
data["Existing Secondary Heating Controls"] = ""
|
||||||
|
else:
|
||||||
|
data["Existing Secondary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n",
|
||||||
|
secondary_text).group(1).strip()
|
||||||
|
data["Existing Secondary Heating % of Heat"] = int(
|
||||||
|
re.search(r"Percentage of Heat\s*(\d+)\s*%?", secondary_text).group(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract Secondary Heating and Water Heating Codes
|
||||||
|
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
|
||||||
|
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||||
|
|
||||||
|
if data["Existing Secondary Heating System"] == "":
|
||||||
|
data["Secondary Heating Code"] = ""
|
||||||
|
else:
|
||||||
|
data["Secondary Heating Code"] = secondary_heating_code_match.group(1).strip()
|
||||||
|
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||||
|
|
||||||
|
# Extract Windows information
|
||||||
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
||||||
windows_text = windows_section.group(1)
|
if windows_section:
|
||||||
window_data = extract_window_age_description(windows_text)
|
windows_text = windows_section.group(1)
|
||||||
data.update(window_data)
|
window_data = extract_window_age_description(windows_text)
|
||||||
|
data.update(window_data)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue