Model/survey_report/app.py

import os
import PyPDF2
from string import Template
from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor


def generate_html_report(template_path, output_path, data):
    """
    Reads an HTML template file, injects dynamic values, and generates a final HTML report.

    Args:
    - template_path (str): Path to the HTML template file.
    - output_path (str): Path to save the generated HTML file.
    - data (dict): Dictionary containing dynamic values for the report.
    """
    # Read the template file
    with open(template_path, "r", encoding="utf-8") as f:
        html_template = Template(f.read())  # Use Template from string module

    # Replace placeholders with actual data
    final_html = html_template.safe_substitute(data)  # Use safe_substitute to prevent missing key errors

    # Save the generated HTML file
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_html)

    print(f"HTML report generated successfully: {output_path}")


def handle():
    """
    Performs the data extraction process for the survey report
    :return:
    """

    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2"

    folder_contents = os.listdir(data_folder)
    # We look for the following files:
    # Site notes
    file_mapping = {}
    for file in folder_contents:
        # Check if it's a pdf file
        if not file.endswith(".pdf"):
            continue
        filepath = os.path.join(data_folder, file)
        with (open(filepath, "rb") as f):
            pdf = PyPDF2.PdfReader(f)
            first_page = pdf.pages[0].extract_text()
            text = ""
            for page in pdf.pages:
                text += page.extract_text()

        # Check the report type
        report_type = detect_report_type(first_page)
        if report_type is not None:
            file_mapping[report_type] = text

    # This is only set up to work with quido site notes so we must have it
    site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
    site_notes = site_notes_extractor.extract_all()

    # We also must have an EPR
    epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
    epr = epr_extractor.extract_all()

    # We now produce the combined data sheet which is the starting figure:
    data_sheet = {**epr, **site_notes}
    del data_sheet['Building Dimensions']
    # We unnest the Total Building Dimensions
    data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
    data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
    del data_sheet["Total Building Dimensions"]

    # Generate the HTML report
    # Placeholder locations
    template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
    output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
    logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
    generate_html_report(
        template_path, output_path,
        data={
            "address": data_sheet["Address"],
            "logo_path": logo_path,
            "current_epc": data_sheet["Current EPC Band"],
            "current_sap": data_sheet["Current SAP Rating"],
            "potential_epc": "A",  # TODO PLACEHOLDER
            "potential_sap": 91,  # TODO PLACEHOLDER
        }
    )