Model/survey_report/app.py

import os
import requests
import PyPDF2
from string import Template

import pandas as pd

from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor


def generate_html_report(template_path, output_path, data):
    """
    Reads an HTML template file, injects dynamic values, and generates a final HTML report.

    Args:
    - template_path (str): Path to the HTML template file.
    - output_path (str): Path to save the generated HTML file.
    - data (dict): Dictionary containing dynamic values for the report.
    """
    # Read the template file
    with open(template_path, "r", encoding="utf-8") as f:
        html_template = Template(f.read())  # Use Template from string module

    # Replace placeholders with actual data
    final_html = html_template.safe_substitute(data)  # Use safe_substitute to prevent missing key errors

    # Save the generated HTML file
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(final_html)

    print(f"HTML report generated successfully: {output_path}")


def stringify_number(num: int, rounding: bool = True) -> str:
    if num < 100000:  # 5 figures or fewer
        rounded_num = ((num + 99) // 100) * 100 if rounding else num
        return f"{rounded_num:,}"
    else:  # More than 5 figures
        rounded_num = ((num + 999) // 1000) * 1000 if rounding else num
        return f"{rounded_num // 1000}k"


class PlacidApi:
    # Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
    ERROR_CODES = {
        400: "Bad request",
        401: "Unauthorized",
        404: "Template Not found",
        422: "Validation error",
        429: "Rate limit exceeded",
        500: "Internal server error",
    }

    def __init__(self, api_key):
        self.api_key = api_key

        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        }

    def create_pdf(
        self,
        template_uuid: str,
        current_epc_rating: str,
        current_epc_rating_colour: str,
        post_retrofit_epc_rating: str,
        post_retrofit_epc_rating_colour: str,
    ):
        url = "https://api.placid.app/api/rest/pdfs"

        body = {
            "webhook_success": None,
            "passthrough": None,
            "pages": [
                {
                    "template_uuid": template_uuid,
                    "layers": {
                        "current_epc_rating": {
                            "text": current_epc_rating,
                            "text_color": current_epc_rating_colour,
                        },
                        "post_retrofit_epc_rating": {
                            "text": post_retrofit_epc_rating,
                            "text_color": post_retrofit_epc_rating_colour,
                        }
                    },
                },
            ]
        }

        response = requests.post(
            url,
            headers=self.headers,
            json=body
        )

        response_body = response.json()

        return response_body

    def get_pdf(self, pdf_id: str):
        """
        Poll the API every 5 seconds until the PDF is ready
        """
        url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"

        response = requests.get(
            url,
            headers=self.headers
        )
        response_body = response.json()

        url = response_body["pdf_url"]
        # Download the PDF form this uurl
        pdf_download = requests.get(url)
        with open("survey_report/example_data/output.pdf", "wb") as f:
            f.write(pdf_download.content)


def handler():
    """
    Performs the data extraction process for the survey report
    :return:
    """

    PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
    TEMPLATE_UUID = "5bst9mh1q9lk9"
    placid_api = PlacidApi(PLACID_API_KEY)

    current_property_value = 250000  # Needs to be an input

    EPC_COLOURS = {
        "A": "#117d58",
        "B": "#2da55c",
        "C": "#8dbd40",
        "D": "#f7cd14",
        "E": "#f3a96a",
        "F": "#ef8026",
        "G": "#e41e3b",
    }

    folders = [
        {
            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
                          "WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
                   "ROAD FLAT 1 PRE EPR PDF.pdf",
            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
                                   "/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf"
        },
        {
            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
                          "WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
                   "ROAD FLAT 2 PRE EPR PDF.pdf",
            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
                                   "/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf"
        },
        {
            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
                          "WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
                   "ROAD FLAT 3 PRE EPR PDF.pdf",
            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
                                   "/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf"
        },
    ]

    data = []
    for data_config in folders:

        file_mapping = {}
        for filename, filepath in data_config.items():
            with (open(filepath, "rb") as f):
                pdf = PyPDF2.PdfReader(f)
                first_page = pdf.pages[0].extract_text()
                text = ""
                for page in pdf.pages:
                    text += page.extract_text()

            # Check the report type
            report_type = detect_report_type(first_page)
            if report_type is not None:
                file_mapping[filename] = text

        # This is only set up to work with quido site notes so we must have it
        site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
        site_notes = site_notes_extractor.extract_all()

        # We also must have an EPR
        epr_extractor = EPRExtractor(file_mapping["epr"])
        epr = epr_extractor.extract_all()

        # Valuation simulation
        scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"])
        scenario_site_notes = scenario_site_notes_extractor.extract_all()

        from backend.ml_models.Valuation import PropertyValuation
        valuation_uplift = PropertyValuation.estimate_valuation_improvement(
            current_value=current_property_value,
            current_epc=site_notes["Current EPC Band"],
            target_epc=scenario_site_notes["Current EPC Band"],
        )
        # TODO - should convert this, when it's more than 5 figures and we should certainly stringify this

        valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value)

        # Prepare the data for output
        bill_savings = round(
            site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)']
        )

        carbon_savings = round(
            site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"],
            2
        )

        payback_period = None
        if payback_period is None:
            raise NotImplementedError("Implement me")

        # We extract the measures from the site notes

        report_data = {
            "current_epc_rating": site_notes["Current EPC Band"],
            "current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
            "post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"],
            "post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]],
            "bill_savings": stringify_number(bill_savings),
            "valuation_improvement": stringify_number(valuation_difference),
            "carbon_savings": carbon_savings,

        }

        # We now produce the combined data sheet which is the starting figure:
        # data_sheet = {**epr, **site_notes}
        # del data_sheet['Building Dimensions']
        # # We unnest the Total Building Dimensions
        # data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
        # data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
        # del data_sheet["Total Building Dimensions"]

        create_pdf_response = placid_api.create_pdf(
            template_uuid=TEMPLATE_UUID, **report_data
        )
        # {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None}
        # Download locally
        placid_api.get_pdf(create_pdf_response["id"])

    data = pd.DataFrame(data)

    # Generate the HTML report
    # Placeholder locations
    template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
    output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
    logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
    generate_html_report(
        template_path, output_path,
        data={
            "address": data_sheet["Address"],
            "logo_path": logo_path,
            "current_epc": data_sheet["Current EPC Band"],
            "current_sap": data_sheet["Current SAP Rating"],
            "potential_epc": "A",  # TODO PLACEHOLDER
            "potential_sap": 91,  # TODO PLACEHOLDER
        }
    )