setting up EPC data extraction process for creation of reports

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-18 12:20:04 +00:00
parent c09b693922
commit 764dc7901f
5 changed files with 151 additions and 35 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
PORTFOLIO_ID = 132
PORTFOLIO_ID = 133
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@ -19,11 +19,9 @@ def app():
asset_list = [
{
"address": "3",
"postcode": "BB8 0JF",
"uprn": 100010509503,
"property_type": "House",
"built_form": "End-Terrace",
"address": "40",
"postcode": "PE4 5BB",
"uprn": 100090220519,
}
]
asset_list = pd.DataFrame(asset_list)
@ -54,8 +52,8 @@ def app():
valuation_data = [
{
"uprn": 100010509503,
"valuation": 116_000
"uprn": 100090220519,
"valuation": 135_000
}
]
# Store valuation data to s3

View file

@ -258,16 +258,16 @@ def app():
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
# - Or the insulation required is loft/cavity (floors should be solid)
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
DATA_FILENAME = "Stonewater All Props for EPC Check 10.02.25.xlsx"
SHEET_NAME = "stonewater sap, insta"
POSTCODE_COLUMN = "Post Code"
FULLADDRESS_COLUMN = "Name"
ADDRESS1_COLUMN = "Name"
ADDRESS1_METHOD = None
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing"
DATA_FILENAME = "Community Housing PV data pull.xlsx"
SHEET_NAME = "Community Housing"
POSTCODE_COLUMN = "Postcode"
FULLADDRESS_COLUMN = "Full Address"
ADDRESS1_COLUMN = None
ADDRESS1_METHOD = "first_word"
ADDRESS_COLS_TO_CONCAT = []
MISSING_POSTCODES_METHOD = None
PROPERTY_YEAR_BUILT = None
PROPERTY_YEAR_BUILT = "Build_Date"
# Maps addresses to uprn in problematic cases
MANUAL_UPRN_MAP = {}

View file

@ -1,4 +1,5 @@
import os
import requests
import PyPDF2
from string import Template
@ -31,31 +32,135 @@ def generate_html_report(template_path, output_path, data):
print(f"HTML report generated successfully: {output_path}")
class PlacidApi:
# Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
ERROR_CODES = {
400: "Bad request",
401: "Unauthorized",
404: "Template Not found",
422: "Validation error",
429: "Rate limit exceeded",
500: "Internal server error",
}
def __init__(self, api_key):
self.api_key = api_key
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def create_pdf(
self,
template_uuid: str,
current_epc_rating: str,
current_epc_rating_colour: str,
post_retrofit_epc_rating: str,
post_retrofit_epc_rating_colour: str,
):
url = "https://api.placid.app/api/rest/pdfs"
body = {
"webhook_success": None,
"passthrough": None,
"pages": [
{
"template_uuid": template_uuid,
"layers": {
"current_epc_rating": {
"text": current_epc_rating,
"text_color": current_epc_rating_colour,
},
"post_retrofit_epc_rating": {
"text": post_retrofit_epc_rating,
"text_color": post_retrofit_epc_rating_colour,
}
},
},
]
}
response = requests.post(
url,
headers=self.headers,
json=body
)
response_body = response.json()
pdf_id = response_body["id"]
def get_pdf(self, pdf_id: str):
"""
Poll the API every 5 seconds until the PDF is ready
"""
url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
response = requests.get(
url,
headers=self.headers
)
response_body = response.json()
url = response_body["pdf_url"]
# Download the PDF form this uurl
pdf_download = requests.get(url)
with open("output.pdf", "wb") as f:
f.write(pdf_download.content)
def handle():
"""
Performs the data extraction process for the survey report
:return:
"""
PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
TEMPLATE_UUID = "hnwqgtumckfbf"
placid_api = PlacidApi(PLACID_API_KEY)
EPC_COLOURS = {
"A": "#117d58",
"B": "#2da55c",
"C": "#8dbd40",
"D": "#f7cd14",
"E": "#f3a96a",
"F": "#ef8026",
"G": "#e41e3b",
}
folders = [
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5",
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
"WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
"ROAD FLAT 1 PRE EPR PDF.pdf",
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
"WILLIS ROAD FLAT 1 POST EPR PDF.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
"WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
"ROAD FLAT 2 PRE EPR PDF.pdf",
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
"WILLIS ROAD FLAT 2 POST EPR PDF.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
"WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
"ROAD FLAT 3 PRE EPR PDF.pdf",
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
"WILLIS ROAD FLAT 3 POST EPR PDF.pdf"
},
]
data = []
for data_folder in folders:
for data_config in folders:
folder_contents = os.listdir(data_folder)
# We look for the following files:
# Site notes
file_mapping = {}
for file in folder_contents:
# Check if it's a pdf file
if not file.endswith(".pdf"):
continue
filepath = os.path.join(data_folder, file)
for filename, filepath in data_config.items():
with (open(filepath, "rb") as f):
pdf = PyPDF2.PdfReader(f)
first_page = pdf.pages[0].extract_text()
@ -66,16 +171,27 @@ def handle():
# Check the report type
report_type = detect_report_type(first_page)
if report_type is not None:
file_mapping[report_type] = text
file_mapping[filename] = text
# This is only set up to work with quido site notes so we must have it
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
site_notes = site_notes_extractor.extract_all()
# We also must have an EPR
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
epr_extractor = EPRExtractor(file_mapping["epr"])
epr = epr_extractor.extract_all()
scenario_epr = EPRExtractor(file_mapping["scenario_epr"])
scenario_epr = scenario_epr.extract_all()
report_data = {
"template_uuid": TEMPLATE_UUID,
"current_epc_rating": site_notes["Current EPC Band"],
"current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
post_retrofit_epc_rating: str,
post_retrofit_epc_rating_colour: str,
}
# We now produce the combined data sheet which is the starting figure:
data_sheet = {**epr, **site_notes}
del data_sheet['Building Dimensions']
@ -83,7 +199,9 @@ def handle():
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
del data_sheet["Total Building Dimensions"]
data.append(data_sheet)
data = pd.DataFrame(data)
# Generate the HTML report