mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactored
This commit is contained in:
parent
371f17f87e
commit
4e9acdeb8e
1 changed files with 24 additions and 43 deletions
|
|
@ -118,30 +118,15 @@ def extract_epr(pdf_path):
|
|||
return data
|
||||
|
||||
|
||||
def extract_retrofit_assessment_folder(retrofit_folder_path):
|
||||
def extract_retrofit_pdfs(data_folder_path):
|
||||
"""
|
||||
Handles extraction from a retrofit assessment folder if it exists and has content.
|
||||
Handles extraction from a retrofit data folder if it exists and has content.
|
||||
"""
|
||||
retrofit_files = [f for f in os.listdir(retrofit_folder_path) if f.endswith(".pdf")]
|
||||
retrofit_files = [f for f in os.listdir(data_folder_path) if f.endswith(".pdf")]
|
||||
|
||||
for pdf_file in retrofit_files:
|
||||
pdf_path = os.path.join(retrofit_folder_path, pdf_file)
|
||||
|
||||
# Attempt to read the first page of the PDF to determine the report type
|
||||
with open(pdf_path, "rb") as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
first_page_text = reader.pages[0].extract_text() if reader.pages else ""
|
||||
|
||||
if is_energy_report(first_page_text):
|
||||
# Treat this as an Energy Report
|
||||
return extract_epr(pdf_path)
|
||||
elif "summary" in pdf_file.lower():
|
||||
# Treat this as a Summary Report
|
||||
return extract_summary_report(pdf_path)
|
||||
elif is_summary_report(first_page_text):
|
||||
# other ways to detect a summary report
|
||||
# Treat this as a Summary Report
|
||||
return extract_summary_report(pdf_path)
|
||||
pdf_path = os.path.join(data_folder_path, pdf_file)
|
||||
return detect_and_parse_report(pdf_path, pdf_file)
|
||||
|
||||
# If no relevant PDF is found, raise an exception
|
||||
raise FileNotFoundError("No valid report (EPR or Summary) found in the retrofit assessment folder.")
|
||||
|
|
@ -162,30 +147,26 @@ def is_summary_report(text):
|
|||
return text.startswith("Summary Information")
|
||||
|
||||
|
||||
def extract_from_survey_folder_files(survey_folder_path):
|
||||
def detect_and_parse_report(pdf_path, pdf_file):
|
||||
"""
|
||||
Handles extraction directly from files in the survey folder when no 'retrofit assessment' folder exists.
|
||||
Detects the type of report and extracts the relevant data.
|
||||
:param pdf_path: String path to the PDF file
|
||||
:param pdf_file: String name of the PDF file
|
||||
:return:
|
||||
"""
|
||||
survey_files = [f for f in os.listdir(survey_folder_path) if f.endswith(".pdf")]
|
||||
# Attempt to read the first page of the PDF to determine type
|
||||
with open(pdf_path, "rb") as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
first_page_text = reader.pages[0].extract_text() if reader.pages else ""
|
||||
|
||||
for pdf_file in survey_files:
|
||||
pdf_path = os.path.join(survey_folder_path, pdf_file)
|
||||
|
||||
# Attempt to read the first page of the PDF to determine type
|
||||
with open(pdf_path, "rb") as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
first_page_text = reader.pages[0].extract_text() if reader.pages else ""
|
||||
|
||||
if is_energy_report(first_page_text):
|
||||
# Treat this as an Energy Report
|
||||
return extract_epr(pdf_path)
|
||||
elif "summary" in pdf_file.lower():
|
||||
# Treat this as a Summary Report
|
||||
return extract_summary_report(pdf_path)
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
return None
|
||||
if is_energy_report(first_page_text):
|
||||
# Treat this as an Energy Report
|
||||
return extract_epr(pdf_path)
|
||||
elif "summary" in pdf_file.lower():
|
||||
# Treat this as a Summary Report
|
||||
return extract_summary_report(pdf_path)
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -210,7 +191,7 @@ def main():
|
|||
if retrofit_folder:
|
||||
retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
|
||||
if os.listdir(retrofit_folder_path): # If not empty
|
||||
summary_data = extract_retrofit_assessment_folder(retrofit_folder_path)
|
||||
summary_data = extract_retrofit_pdfs(retrofit_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
|
|
@ -219,7 +200,7 @@ def main():
|
|||
extracted_data.append(summary_data)
|
||||
continue
|
||||
# If no retrofit folder or it was empty, check files in survey_folder
|
||||
summary_data = extract_from_survey_folder_files(survey_folder_path)
|
||||
summary_data = extract_retrofit_pdfs(survey_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue