diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index c6736ba8..14e50460 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -121,20 +121,25 @@ def extract_retrofit_assessment_folder(retrofit_folder_path): """ Handles extraction from a retrofit assessment folder if it exists and has content. """ - retrofit_files = os.listdir(retrofit_folder_path) + retrofit_files = [f for f in os.listdir(retrofit_folder_path) if f.endswith(".pdf")] - # Find the summary report in the retrofit folder - summary_report = next( - (name for name in retrofit_files if "summary" in name.lower() and name.endswith(".pdf")), None - ) + for pdf_file in retrofit_files: + pdf_path = os.path.join(retrofit_folder_path, pdf_file) - if summary_report: - pdf_path = os.path.join(retrofit_folder_path, summary_report) - return extract_summary_report(pdf_path) + # Attempt to read the first page of the PDF to determine the report type + with open(pdf_path, "rb") as file: + reader = PyPDF2.PdfReader(file) + first_page_text = reader.pages[0].extract_text() if reader.pages else "" - raise Exception("Not Implemented") + if is_energy_report(first_page_text): + # Treat this as an Energy Report + return extract_epr(pdf_path) + elif "summary" in pdf_file.lower(): + # Treat this as a Summary Report + return extract_summary_report(pdf_path) - return None # If no relevant PDF is found + # If no relevant PDF is found, raise an exception + raise FileNotFoundError("No valid report (EPR or Summary) found in the retrofit assessment folder.") def is_energy_report(text):