allowing extract_retrofit_assessment_folder to handle eprs

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-28 14:16:33 +00:00
parent c68e4f017e
commit 70d02075cf

View file

@ -121,20 +121,25 @@ def extract_retrofit_assessment_folder(retrofit_folder_path):
"""
Handles extraction from a retrofit assessment folder if it exists and has content.
"""
retrofit_files = os.listdir(retrofit_folder_path)
retrofit_files = [f for f in os.listdir(retrofit_folder_path) if f.endswith(".pdf")]
# Find the summary report in the retrofit folder
summary_report = next(
(name for name in retrofit_files if "summary" in name.lower() and name.endswith(".pdf")), None
)
for pdf_file in retrofit_files:
pdf_path = os.path.join(retrofit_folder_path, pdf_file)
if summary_report:
pdf_path = os.path.join(retrofit_folder_path, summary_report)
return extract_summary_report(pdf_path)
# Attempt to read the first page of the PDF to determine the report type
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
first_page_text = reader.pages[0].extract_text() if reader.pages else ""
raise Exception("Not Implemented")
if is_energy_report(first_page_text):
# Treat this as an Energy Report
return extract_epr(pdf_path)
elif "summary" in pdf_file.lower():
# Treat this as a Summary Report
return extract_summary_report(pdf_path)
return None # If no relevant PDF is found
# If no relevant PDF is found, raise an exception
raise FileNotFoundError("No valid report (EPR or Summary) found in the retrofit assessment folder.")
def is_energy_report(text):