diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index ad97fd41..15f59c5e 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 132 +PORTFOLIO_ID = 133 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -19,11 +19,9 @@ def app(): asset_list = [ { - "address": "3", - "postcode": "BB8 0JF", - "uprn": 100010509503, - "property_type": "House", - "built_form": "End-Terrace", + "address": "40", + "postcode": "PE4 5BB", + "uprn": 100090220519, } ] asset_list = pd.DataFrame(asset_list) @@ -54,8 +52,8 @@ def app(): valuation_data = [ { - "uprn": 100010509503, - "valuation": 116_000 + "uprn": 100090220519, + "valuation": 135_000 } ] # Store valuation data to s3 diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index 1b937b2d..f9cb7cbb 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -258,16 +258,16 @@ def app(): # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) - DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater" - DATA_FILENAME = "Stonewater All Props for EPC Check 10.02.25.xlsx" - SHEET_NAME = "stonewater sap, insta" - POSTCODE_COLUMN = "Post Code" - FULLADDRESS_COLUMN = "Name" - ADDRESS1_COLUMN = "Name" - ADDRESS1_METHOD = None + DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing" + DATA_FILENAME = "Community Housing PV data pull.xlsx" + SHEET_NAME = "Community Housing" + POSTCODE_COLUMN = "Postcode" + FULLADDRESS_COLUMN = "Full Address" + ADDRESS1_COLUMN = None + ADDRESS1_METHOD = "first_word" ADDRESS_COLS_TO_CONCAT = [] MISSING_POSTCODES_METHOD = None - PROPERTY_YEAR_BUILT = None + PROPERTY_YEAR_BUILT = "Build_Date" # Maps addresses to uprn in problematic cases MANUAL_UPRN_MAP = {} diff --git a/survey_report/app.py b/survey_report/app.py index be31bd52..774d2a15 100644 --- a/survey_report/app.py +++ b/survey_report/app.py @@ -1,4 +1,5 @@ import os +import requests import PyPDF2 from string import Template @@ -31,31 +32,135 @@ def generate_html_report(template_path, output_path, data): print(f"HTML report generated successfully: {output_path}") +class PlacidApi: + # Errors as defined by docs: https://placid.app/docs/2.0/rest/errors + ERROR_CODES = { + 400: "Bad request", + 401: "Unauthorized", + 404: "Template Not found", + 422: "Validation error", + 429: "Rate limit exceeded", + 500: "Internal server error", + } + + def __init__(self, api_key): + self.api_key = api_key + + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + def create_pdf( + self, + template_uuid: str, + current_epc_rating: str, + current_epc_rating_colour: str, + post_retrofit_epc_rating: str, + post_retrofit_epc_rating_colour: str, + ): + url = "https://api.placid.app/api/rest/pdfs" + + body = { + "webhook_success": None, + "passthrough": None, + "pages": [ + { + "template_uuid": template_uuid, + "layers": { + "current_epc_rating": { + "text": current_epc_rating, + "text_color": current_epc_rating_colour, + }, + "post_retrofit_epc_rating": { + "text": post_retrofit_epc_rating, + "text_color": post_retrofit_epc_rating_colour, + } + }, + }, + ] + } + + response = requests.post( + url, + headers=self.headers, + json=body + ) + + response_body = response.json() + pdf_id = response_body["id"] + + def get_pdf(self, pdf_id: str): + """ + Poll the API every 5 seconds until the PDF is ready + """ + url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}" + + response = requests.get( + url, + headers=self.headers + ) + response_body = response.json() + + url = response_body["pdf_url"] + # Download the PDF form this uurl + pdf_download = requests.get(url) + with open("output.pdf", "wb") as f: + f.write(pdf_download.content) + + def handle(): """ Performs the data extraction process for the survey report :return: """ + PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa" + TEMPLATE_UUID = "hnwqgtumckfbf" + placid_api = PlacidApi(PLACID_API_KEY) + + EPC_COLOURS = { + "A": "#117d58", + "B": "#2da55c", + "C": "#8dbd40", + "D": "#f7cd14", + "E": "#f3a96a", + "F": "#ef8026", + "G": "#e41e3b", + } + folders = [ - "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1", - "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2", - "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3", - "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4", - "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5", + { + "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 " + "WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf", + "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS " + "ROAD FLAT 1 PRE EPR PDF.pdf", + "scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 " + "WILLIS ROAD FLAT 1 POST EPR PDF.pdf" + }, + { + "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 " + "WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf", + "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS " + "ROAD FLAT 2 PRE EPR PDF.pdf", + "scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 " + "WILLIS ROAD FLAT 2 POST EPR PDF.pdf" + }, + { + "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 " + "WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf", + "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS " + "ROAD FLAT 3 PRE EPR PDF.pdf", + "scenario_epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 " + "WILLIS ROAD FLAT 3 POST EPR PDF.pdf" + }, ] data = [] - for data_folder in folders: + for data_config in folders: - folder_contents = os.listdir(data_folder) - # We look for the following files: - # Site notes file_mapping = {} - for file in folder_contents: - # Check if it's a pdf file - if not file.endswith(".pdf"): - continue - filepath = os.path.join(data_folder, file) + for filename, filepath in data_config.items(): with (open(filepath, "rb") as f): pdf = PyPDF2.PdfReader(f) first_page = pdf.pages[0].extract_text() @@ -66,16 +171,27 @@ def handle(): # Check the report type report_type = detect_report_type(first_page) if report_type is not None: - file_mapping[report_type] = text + file_mapping[filename] = text # This is only set up to work with quido site notes so we must have it - site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"]) + site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"]) site_notes = site_notes_extractor.extract_all() # We also must have an EPR - epr_extractor = EPRExtractor(file_mapping["quidos_epr"]) + epr_extractor = EPRExtractor(file_mapping["epr"]) epr = epr_extractor.extract_all() + scenario_epr = EPRExtractor(file_mapping["scenario_epr"]) + scenario_epr = scenario_epr.extract_all() + + report_data = { + "template_uuid": TEMPLATE_UUID, + "current_epc_rating": site_notes["Current EPC Band"], + "current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]], + post_retrofit_epc_rating: str, + post_retrofit_epc_rating_colour: str, + } + # We now produce the combined data sheet which is the starting figure: data_sheet = {**epr, **site_notes} del data_sheet['Building Dimensions'] @@ -83,7 +199,9 @@ def handle(): data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"] data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"] del data_sheet["Total Building Dimensions"] + data.append(data_sheet) + data = pd.DataFrame(data) # Generate the HTML report