Model/survey_report/app.py
2025-02-18 19:49:29 +00:00

270 lines
10 KiB
Python

import os
import requests
import PyPDF2
from string import Template
import pandas as pd
from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
def generate_html_report(template_path, output_path, data):
"""
Reads an HTML template file, injects dynamic values, and generates a final HTML report.
Args:
- template_path (str): Path to the HTML template file.
- output_path (str): Path to save the generated HTML file.
- data (dict): Dictionary containing dynamic values for the report.
"""
# Read the template file
with open(template_path, "r", encoding="utf-8") as f:
html_template = Template(f.read()) # Use Template from string module
# Replace placeholders with actual data
final_html = html_template.safe_substitute(data) # Use safe_substitute to prevent missing key errors
# Save the generated HTML file
with open(output_path, "w", encoding="utf-8") as f:
f.write(final_html)
print(f"HTML report generated successfully: {output_path}")
def stringify_number(num: int, rounding: bool = True) -> str:
if num < 100000: # 5 figures or fewer
rounded_num = ((num + 99) // 100) * 100 if rounding else num
return f"{rounded_num:,}"
else: # More than 5 figures
rounded_num = ((num + 999) // 1000) * 1000 if rounding else num
return f"{rounded_num // 1000}k"
class PlacidApi:
# Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
ERROR_CODES = {
400: "Bad request",
401: "Unauthorized",
404: "Template Not found",
422: "Validation error",
429: "Rate limit exceeded",
500: "Internal server error",
}
def __init__(self, api_key):
self.api_key = api_key
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def create_pdf(
self,
template_uuid: str,
current_epc_rating: str,
current_epc_rating_colour: str,
post_retrofit_epc_rating: str,
post_retrofit_epc_rating_colour: str,
):
url = "https://api.placid.app/api/rest/pdfs"
body = {
"webhook_success": None,
"passthrough": None,
"pages": [
{
"template_uuid": template_uuid,
"layers": {
"current_epc_rating": {
"text": current_epc_rating,
"text_color": current_epc_rating_colour,
},
"post_retrofit_epc_rating": {
"text": post_retrofit_epc_rating,
"text_color": post_retrofit_epc_rating_colour,
}
},
},
]
}
response = requests.post(
url,
headers=self.headers,
json=body
)
response_body = response.json()
return response_body
def get_pdf(self, pdf_id: str):
"""
Poll the API every 5 seconds until the PDF is ready
"""
url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
response = requests.get(
url,
headers=self.headers
)
response_body = response.json()
url = response_body["pdf_url"]
# Download the PDF form this uurl
pdf_download = requests.get(url)
with open("survey_report/example_data/output.pdf", "wb") as f:
f.write(pdf_download.content)
def handler():
"""
Performs the data extraction process for the survey report
:return:
"""
PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
TEMPLATE_UUID = "5bst9mh1q9lk9"
placid_api = PlacidApi(PLACID_API_KEY)
current_property_value = 250000 # Needs to be an input
EPC_COLOURS = {
"A": "#117d58",
"B": "#2da55c",
"C": "#8dbd40",
"D": "#f7cd14",
"E": "#f3a96a",
"F": "#ef8026",
"G": "#e41e3b",
}
folders = [
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
"WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
"ROAD FLAT 1 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
"WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
"ROAD FLAT 2 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
"WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
"ROAD FLAT 3 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf"
},
]
data = []
for data_config in folders:
file_mapping = {}
for filename, filepath in data_config.items():
with (open(filepath, "rb") as f):
pdf = PyPDF2.PdfReader(f)
first_page = pdf.pages[0].extract_text()
text = ""
for page in pdf.pages:
text += page.extract_text()
# Check the report type
report_type = detect_report_type(first_page)
if report_type is not None:
file_mapping[filename] = text
# This is only set up to work with quido site notes so we must have it
site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
site_notes = site_notes_extractor.extract_all()
# We also must have an EPR
epr_extractor = EPRExtractor(file_mapping["epr"])
epr = epr_extractor.extract_all()
# Valuation simulation
scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"])
scenario_site_notes = scenario_site_notes_extractor.extract_all()
from backend.ml_models.Valuation import PropertyValuation
valuation_uplift = PropertyValuation.estimate_valuation_improvement(
current_value=current_property_value,
current_epc=site_notes["Current EPC Band"],
target_epc=scenario_site_notes["Current EPC Band"],
)
# TODO - should convert this, when it's more than 5 figures and we should certainly stringify this
valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value)
# Prepare the data for output
bill_savings = round(
site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)']
)
carbon_savings = round(
site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"],
2
)
payback_period = None
if payback_period is None:
raise NotImplementedError("Implement me")
# We extract the measures from the site notes
report_data = {
"current_epc_rating": site_notes["Current EPC Band"],
"current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
"post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"],
"post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]],
"bill_savings": stringify_number(bill_savings),
"valuation_improvement": stringify_number(valuation_difference),
"carbon_savings": carbon_savings,
}
# We now produce the combined data sheet which is the starting figure:
# data_sheet = {**epr, **site_notes}
# del data_sheet['Building Dimensions']
# # We unnest the Total Building Dimensions
# data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
# data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
# del data_sheet["Total Building Dimensions"]
create_pdf_response = placid_api.create_pdf(
template_uuid=TEMPLATE_UUID, **report_data
)
# {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None}
# Download locally
placid_api.get_pdf(create_pdf_response["id"])
data = pd.DataFrame(data)
# Generate the HTML report
# Placeholder locations
template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
generate_html_report(
template_path, output_path,
data={
"address": data_sheet["Address"],
"logo_path": logo_path,
"current_epc": data_sheet["Current EPC Band"],
"current_sap": data_sheet["Current SAP Rating"],
"potential_epc": "A", # TODO PLACEHOLDER
"potential_sap": 91, # TODO PLACEHOLDER
}
)