mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extracting epr
This commit is contained in:
parent
32b053e7db
commit
daabf2a586
3 changed files with 71 additions and 7 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import PyPDF2
|
||||
from survey_report.extraction.detect_report_type import detect_report_type
|
||||
from survey_report.extraction.quidos import SiteNotesExtractor
|
||||
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
|
||||
|
||||
|
||||
def handle():
|
||||
|
|
@ -33,12 +33,18 @@ def handle():
|
|||
if report_type is not None:
|
||||
file_mapping[report_type] = text
|
||||
|
||||
# Check the report type
|
||||
report_type = detect_report_type(os.path.join(data_folder, file))
|
||||
|
||||
# This is only set up to work with quido site notes so we must have it
|
||||
if "quidos_site_notes" not in file_mapping:
|
||||
raise ValueError("No quidos site notes found")
|
||||
|
||||
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
|
||||
site_notes = site_notes_extractor.extract_all()
|
||||
|
||||
# We also must have an EPR
|
||||
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
|
||||
epr = epr_extractor.extract_all()
|
||||
|
||||
# We now produce the combined data sheet which is the starting figure:
|
||||
data_sheet = {**epr, **site_notes}
|
||||
del data_sheet['Building Dimensions']
|
||||
# We unnest the Total Building Dimensions
|
||||
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
|
||||
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
|
||||
del data_sheet["Total Building Dimensions"]
|
||||
|
|
|
|||
|
|
@ -16,4 +16,7 @@ def detect_report_type(first_page):
|
|||
):
|
||||
return "quidos_site_notes"
|
||||
|
||||
if re.search(r"\nIQ-Energy\nEnergy Performance Report\nPage 1 of 1", first_page):
|
||||
return "quidos_epr"
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -109,3 +109,58 @@ class SiteNotesExtractor:
|
|||
self.extract_bills_estimate()
|
||||
self.extract_building_dimensions()
|
||||
return self.data
|
||||
|
||||
|
||||
class EPRExtractor:
|
||||
"""
|
||||
Extracts space heating, water heating, and address from an Energy Performance Report (EPR).
|
||||
"""
|
||||
|
||||
def __init__(self, pdf_text):
|
||||
"""
|
||||
Initializes the EPRExtractor with the extracted PDF text.
|
||||
"""
|
||||
self.text = pdf_text
|
||||
self.data = {}
|
||||
|
||||
def extract_heating_data(self):
|
||||
"""
|
||||
Extracts space heating and water heating values from the report.
|
||||
"""
|
||||
pattern = re.search(
|
||||
r"Space Heating\(KWH\)\s*([\d,]+).*?\nWater Heating\(KWH\)\s*([\d,]+)",
|
||||
self.text,
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
if not pattern:
|
||||
raise ValueError("No heating data found in the report")
|
||||
|
||||
self.data.update({
|
||||
"Space Heating (KWH)": int(pattern.group(1).replace(",", "")),
|
||||
"Water Heating (KWH)": int(pattern.group(2).replace(",", ""))
|
||||
})
|
||||
|
||||
def extract_address(self):
|
||||
"""
|
||||
Extracts the full address from the report.
|
||||
"""
|
||||
pattern = re.search(
|
||||
r"Address\s*(.*?)\nTown\s*(.*?)\n",
|
||||
self.text,
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
if not pattern:
|
||||
raise ValueError("No address found in the report")
|
||||
|
||||
full_address = pattern.group(1).strip()
|
||||
self.data["Address"] = full_address
|
||||
|
||||
def extract_all(self):
|
||||
"""
|
||||
Runs all extraction methods and returns a dictionary with extracted data.
|
||||
"""
|
||||
self.extract_address()
|
||||
self.extract_heating_data()
|
||||
return self.data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue