mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
setting up EPC data extraction process for creation of reports
This commit is contained in:
parent
c09b693922
commit
764dc7901f
5 changed files with 151 additions and 35 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|||
from utils.s3 import save_csv_to_s3
|
||||
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
|
||||
|
||||
PORTFOLIO_ID = 132
|
||||
PORTFOLIO_ID = 133
|
||||
USER_ID = 8
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
|
|
@ -19,11 +19,9 @@ def app():
|
|||
|
||||
asset_list = [
|
||||
{
|
||||
"address": "3",
|
||||
"postcode": "BB8 0JF",
|
||||
"uprn": 100010509503,
|
||||
"property_type": "House",
|
||||
"built_form": "End-Terrace",
|
||||
"address": "40",
|
||||
"postcode": "PE4 5BB",
|
||||
"uprn": 100090220519,
|
||||
}
|
||||
]
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
|
@ -54,8 +52,8 @@ def app():
|
|||
|
||||
valuation_data = [
|
||||
{
|
||||
"uprn": 100010509503,
|
||||
"valuation": 116_000
|
||||
"uprn": 100090220519,
|
||||
"valuation": 135_000
|
||||
}
|
||||
]
|
||||
# Store valuation data to s3
|
||||
|
|
|
|||
|
|
@ -258,16 +258,16 @@ def app():
|
|||
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
|
||||
# - Or the insulation required is loft/cavity (floors should be solid)
|
||||
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
|
||||
DATA_FILENAME = "Stonewater All Props for EPC Check 10.02.25.xlsx"
|
||||
SHEET_NAME = "stonewater sap, insta"
|
||||
POSTCODE_COLUMN = "Post Code"
|
||||
FULLADDRESS_COLUMN = "Name"
|
||||
ADDRESS1_COLUMN = "Name"
|
||||
ADDRESS1_METHOD = None
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing"
|
||||
DATA_FILENAME = "Community Housing PV data pull.xlsx"
|
||||
SHEET_NAME = "Community Housing"
|
||||
POSTCODE_COLUMN = "Postcode"
|
||||
FULLADDRESS_COLUMN = "Full Address"
|
||||
ADDRESS1_COLUMN = None
|
||||
ADDRESS1_METHOD = "first_word"
|
||||
ADDRESS_COLS_TO_CONCAT = []
|
||||
MISSING_POSTCODES_METHOD = None
|
||||
PROPERTY_YEAR_BUILT = None
|
||||
PROPERTY_YEAR_BUILT = "Build_Date"
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
MANUAL_UPRN_MAP = {}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import requests
|
||||
import PyPDF2
|
||||
from string import Template
|
||||
|
||||
|
|
@ -31,31 +32,135 @@ def generate_html_report(template_path, output_path, data):
|
|||
print(f"HTML report generated successfully: {output_path}")
|
||||
|
||||
|
||||
class PlacidApi:
|
||||
# Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
|
||||
ERROR_CODES = {
|
||||
400: "Bad request",
|
||||
401: "Unauthorized",
|
||||
404: "Template Not found",
|
||||
422: "Validation error",
|
||||
429: "Rate limit exceeded",
|
||||
500: "Internal server error",
|
||||
}
|
||||
|
||||
def __init__(self, api_key):
|
||||
self.api_key = api_key
|
||||
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
def create_pdf(
|
||||
self,
|
||||
template_uuid: str,
|
||||
current_epc_rating: str,
|
||||
current_epc_rating_colour: str,
|
||||
post_retrofit_epc_rating: str,
|
||||
post_retrofit_epc_rating_colour: str,
|
||||
):
|
||||
url = "https://api.placid.app/api/rest/pdfs"
|
||||
|
||||
body = {
|
||||
"webhook_success": None,
|
||||
"passthrough": None,
|
||||
"pages": [
|
||||
{
|
||||
"template_uuid": template_uuid,
|
||||
"layers": {
|
||||
"current_epc_rating": {
|
||||
"text": current_epc_rating,
|
||||
"text_color": current_epc_rating_colour,
|
||||
},
|
||||
"post_retrofit_epc_rating": {
|
||||
"text": post_retrofit_epc_rating,
|
||||
"text_color": post_retrofit_epc_rating_colour,
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
url,
|
||||
headers=self.headers,
|
||||
json=body
|
||||
)
|
||||
|
||||
response_body = response.json()
|
||||
pdf_id = response_body["id"]
|
||||
|
||||
def get_pdf(self, pdf_id: str):
|
||||
"""
|
||||
Poll the API every 5 seconds until the PDF is ready
|
||||
"""
|
||||
url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
|
||||
|
||||
response = requests.get(
|
||||
url,
|
||||
headers=self.headers
|
||||
)
|
||||
response_body = response.json()
|
||||
|
||||
url = response_body["pdf_url"]
|
||||
# Download the PDF form this uurl
|
||||
pdf_download = requests.get(url)
|
||||
with open("output.pdf", "wb") as f:
|
||||
f.write(pdf_download.content)
|
||||
|
||||
|
||||
def handle():
|
||||
"""
|
||||
Performs the data extraction process for the survey report
|
||||
:return:
|
||||
"""
|
||||
|
||||
PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
|
||||
TEMPLATE_UUID = "hnwqgtumckfbf"
|
||||
placid_api = PlacidApi(PLACID_API_KEY)
|
||||
|
||||
EPC_COLOURS = {
|
||||
"A": "#117d58",
|
||||
"B": "#2da55c",
|
||||
"C": "#8dbd40",
|
||||
"D": "#f7cd14",
|
||||
"E": "#f3a96a",
|
||||
"F": "#ef8026",
|
||||
"G": "#e41e3b",
|
||||
}
|
||||
|
||||
folders = [
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5",
|
||||
{
|
||||
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
|
||||
"WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
|
||||
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
|
||||
"ROAD FLAT 1 PRE EPR PDF.pdf",
|
||||
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
|
||||
"WILLIS ROAD FLAT 1 POST EPR PDF.pdf"
|
||||
},
|
||||
{
|
||||
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
|
||||
"WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
|
||||
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
|
||||
"ROAD FLAT 2 PRE EPR PDF.pdf",
|
||||
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
|
||||
"WILLIS ROAD FLAT 2 POST EPR PDF.pdf"
|
||||
},
|
||||
{
|
||||
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
|
||||
"WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
|
||||
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
|
||||
"ROAD FLAT 3 PRE EPR PDF.pdf",
|
||||
"scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
|
||||
"WILLIS ROAD FLAT 3 POST EPR PDF.pdf"
|
||||
},
|
||||
]
|
||||
data = []
|
||||
for data_folder in folders:
|
||||
for data_config in folders:
|
||||
|
||||
folder_contents = os.listdir(data_folder)
|
||||
# We look for the following files:
|
||||
# Site notes
|
||||
file_mapping = {}
|
||||
for file in folder_contents:
|
||||
# Check if it's a pdf file
|
||||
if not file.endswith(".pdf"):
|
||||
continue
|
||||
filepath = os.path.join(data_folder, file)
|
||||
for filename, filepath in data_config.items():
|
||||
with (open(filepath, "rb") as f):
|
||||
pdf = PyPDF2.PdfReader(f)
|
||||
first_page = pdf.pages[0].extract_text()
|
||||
|
|
@ -66,16 +171,27 @@ def handle():
|
|||
# Check the report type
|
||||
report_type = detect_report_type(first_page)
|
||||
if report_type is not None:
|
||||
file_mapping[report_type] = text
|
||||
file_mapping[filename] = text
|
||||
|
||||
# This is only set up to work with quido site notes so we must have it
|
||||
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
|
||||
site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
|
||||
site_notes = site_notes_extractor.extract_all()
|
||||
|
||||
# We also must have an EPR
|
||||
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
|
||||
epr_extractor = EPRExtractor(file_mapping["epr"])
|
||||
epr = epr_extractor.extract_all()
|
||||
|
||||
scenario_epr = EPRExtractor(file_mapping["scenario_epr"])
|
||||
scenario_epr = scenario_epr.extract_all()
|
||||
|
||||
report_data = {
|
||||
"template_uuid": TEMPLATE_UUID,
|
||||
"current_epc_rating": site_notes["Current EPC Band"],
|
||||
"current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
|
||||
post_retrofit_epc_rating: str,
|
||||
post_retrofit_epc_rating_colour: str,
|
||||
}
|
||||
|
||||
# We now produce the combined data sheet which is the starting figure:
|
||||
data_sheet = {**epr, **site_notes}
|
||||
del data_sheet['Building Dimensions']
|
||||
|
|
@ -83,7 +199,9 @@ def handle():
|
|||
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
|
||||
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
|
||||
del data_sheet["Total Building Dimensions"]
|
||||
|
||||
data.append(data_sheet)
|
||||
|
||||
data = pd.DataFrame(data)
|
||||
|
||||
# Generate the HTML report
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue