import os import requests import PyPDF2 from string import Template import pandas as pd from survey_report.extraction.detect_report_type import detect_report_type from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor def generate_html_report(template_path, output_path, data): """ Reads an HTML template file, injects dynamic values, and generates a final HTML report. Args: - template_path (str): Path to the HTML template file. - output_path (str): Path to save the generated HTML file. - data (dict): Dictionary containing dynamic values for the report. """ # Read the template file with open(template_path, "r", encoding="utf-8") as f: html_template = Template(f.read()) # Use Template from string module # Replace placeholders with actual data final_html = html_template.safe_substitute(data) # Use safe_substitute to prevent missing key errors # Save the generated HTML file with open(output_path, "w", encoding="utf-8") as f: f.write(final_html) print(f"HTML report generated successfully: {output_path}") def stringify_number(num: int, rounding: bool = True) -> str: if num < 100000: # 5 figures or fewer rounded_num = ((num + 99) // 100) * 100 if rounding else num return f"{rounded_num:,}" else: # More than 5 figures rounded_num = ((num + 999) // 1000) * 1000 if rounding else num return f"{rounded_num // 1000}k" class PlacidApi: # Errors as defined by docs: https://placid.app/docs/2.0/rest/errors ERROR_CODES = { 400: "Bad request", 401: "Unauthorized", 404: "Template Not found", 422: "Validation error", 429: "Rate limit exceeded", 500: "Internal server error", } def __init__(self, api_key): self.api_key = api_key self.headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "application/json", } def create_pdf( self, template_uuid: str, current_epc_rating: str, current_epc_rating_colour: str, post_retrofit_epc_rating: str, post_retrofit_epc_rating_colour: str, ): url = "https://api.placid.app/api/rest/pdfs" body = { "webhook_success": None, "passthrough": None, "pages": [ { "template_uuid": template_uuid, "layers": { "current_epc_rating": { "text": current_epc_rating, "text_color": current_epc_rating_colour, }, "post_retrofit_epc_rating": { "text": post_retrofit_epc_rating, "text_color": post_retrofit_epc_rating_colour, } }, }, ] } response = requests.post( url, headers=self.headers, json=body ) response_body = response.json() return response_body def get_pdf(self, pdf_id: str): """ Poll the API every 5 seconds until the PDF is ready """ url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}" response = requests.get( url, headers=self.headers ) response_body = response.json() url = response_body["pdf_url"] # Download the PDF form this uurl pdf_download = requests.get(url) with open("survey_report/example_data/output.pdf", "wb") as f: f.write(pdf_download.content) def handler(): """ Performs the data extraction process for the survey report :return: """ PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa" TEMPLATE_UUID = "5bst9mh1q9lk9" placid_api = PlacidApi(PLACID_API_KEY) current_property_value = 250000 # Needs to be an input EPC_COLOURS = { "A": "#117d58", "B": "#2da55c", "C": "#8dbd40", "D": "#f7cd14", "E": "#f3a96a", "F": "#ef8026", "G": "#e41e3b", } folders = [ { "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 " "WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf", "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS " "ROAD FLAT 1 PRE EPR PDF.pdf", "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data" "/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf" }, { "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 " "WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf", "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS " "ROAD FLAT 2 PRE EPR PDF.pdf", "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data" "/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf" }, { "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 " "WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf", "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS " "ROAD FLAT 3 PRE EPR PDF.pdf", "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data" "/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf" }, ] data = [] for data_config in folders: file_mapping = {} for filename, filepath in data_config.items(): with (open(filepath, "rb") as f): pdf = PyPDF2.PdfReader(f) first_page = pdf.pages[0].extract_text() text = "" for page in pdf.pages: text += page.extract_text() # Check the report type report_type = detect_report_type(first_page) if report_type is not None: file_mapping[filename] = text # This is only set up to work with quido site notes so we must have it site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"]) site_notes = site_notes_extractor.extract_all() # We also must have an EPR epr_extractor = EPRExtractor(file_mapping["epr"]) epr = epr_extractor.extract_all() # Valuation simulation scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"]) scenario_site_notes = scenario_site_notes_extractor.extract_all() from backend.ml_models.Valuation import PropertyValuation valuation_uplift = PropertyValuation.estimate_valuation_improvement( current_value=current_property_value, current_epc=site_notes["Current EPC Band"], target_epc=scenario_site_notes["Current EPC Band"], ) # TODO - should convert this, when it's more than 5 figures and we should certainly stringify this valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value) # Prepare the data for output bill_savings = round( site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)'] ) carbon_savings = round( site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"], 2 ) payback_period = None if payback_period is None: raise NotImplementedError("Implement me") # We extract the measures from the site notes report_data = { "current_epc_rating": site_notes["Current EPC Band"], "current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]], "post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"], "post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]], "bill_savings": stringify_number(bill_savings), "valuation_improvement": stringify_number(valuation_difference), "carbon_savings": carbon_savings, } # We now produce the combined data sheet which is the starting figure: # data_sheet = {**epr, **site_notes} # del data_sheet['Building Dimensions'] # # We unnest the Total Building Dimensions # data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"] # data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"] # del data_sheet["Total Building Dimensions"] create_pdf_response = placid_api.create_pdf( template_uuid=TEMPLATE_UUID, **report_data ) # {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None} # Download locally placid_api.get_pdf(create_pdf_response["id"]) data = pd.DataFrame(data) # Generate the HTML report # Placeholder locations template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html" output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html" logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png" generate_html_report( template_path, output_path, data={ "address": data_sheet["Address"], "logo_path": logo_path, "current_epc": data_sheet["Current EPC Band"], "current_sap": data_sheet["Current SAP Rating"], "potential_epc": "A", # TODO PLACEHOLDER "potential_sap": 91, # TODO PLACEHOLDER } )