diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 70c531c0..d9b5c41d 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3078,6 +3078,13 @@ def revised_model(): retrofit_assessment_data = pd.DataFrame(extracted_data) + # retrofit_assessment_data.to_csv( + # os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"), index=False + # ) + retrofit_assessment_data = pd.read_csv( + os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"), + ) + # Remove some definite duplicates dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"] dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)] @@ -3097,10 +3104,6 @@ def revised_model(): # Replace \n with "" retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "") - # retrofit_assessment_data.to_csv( - # os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet.csv"), index=False - # ) - # We can read in the data as needed # Next Step: Read in the coordinated measures and match to the extracted data @@ -3108,24 +3111,59 @@ def revised_model(): # CCS ############################################################# ccs_coordination_sheet = pd.read_excel( - os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"), + os.path.join( + CUSTOMER_FOLDER_PATH, + "Jan 2025 Project", + "CCS_Installation_Compliance_-_Stonewater_SHDF_2_1_1738228227.xlsx" + ), header=4 ) + ccs_postcodes = pd.read_excel( + os.path.join( + CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx" + ), + header=4 + ) + ccs_coordination_sheet = ccs_postcodes[['Name', 'Post Code', 'Asset ID', 'Asset ID.1']].merge( + ccs_coordination_sheet, how="left", on="Name" + ) + ccs_coordination_sheet = ccs_coordination_sheet[~pd.isnull(ccs_coordination_sheet["Name"])] ccs_coordination_sheet["contractor"] = "CCS" # We split ccs into two sections - the first being ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21) ccs_coordination_sheet = ccs_coordination_sheet.head(87) ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet]) + from urllib import parse + def extract_sharepoint_url(x): + if pd.isnull(x): + return "" + return "/".join(parse.urlparse( + x.split(" - http")[1] + ).path.replace("%20", " ").split("/")[-2:]) + + ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x)) + ############################################################ # WATES ############################################################# wates_coordination_sheet = pd.read_excel( + os.path.join( + CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_1738229226.xlsx" + ), + header=4 + ) + wates_postcodes = pd.read_excel( os.path.join( CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx" ), header=4 ) + wates_postcodes = wates_postcodes[~pd.isnull(wates_postcodes["Post Code"])] + wates_coordination_sheet = wates_coordination_sheet.merge( + wates_postcodes[['Name', 'Post Code', 'Asset ID']].drop_duplicates(), how="left", on="Name" + ) + wates_coordination_sheet["contractor"] = "Wates" # Break into the different sites: # Wiltshire @@ -3136,7 +3174,7 @@ def revised_model(): wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :] wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :] wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :] - wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[928:972, :] + wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[930:972, :] wates_coordination = pd.concat( [ @@ -3151,12 +3189,15 @@ def revised_model(): ] ) + wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply( + lambda x: extract_sharepoint_url(x) + ) + # Combine the data back ############################################################ # NEW 450 COORDINATED RETROFIT ASSESSMENTS ############################################################# - retrofit_packages_board = pd.read_excel( os.path.join( CUSTOMER_FOLDER_PATH, @@ -3361,17 +3402,49 @@ def revised_model(): wates_coordination = wates_coordination[ wates_coordination["Retrofit Assessment"].isin(["Completed"]) ] + wates_coordination = wates_coordination[ + ~pd.isnull(wates_coordination["Postcode"]) + ] wates_manual_filters = { - "24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View" + "24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View", + "14 Edencroft": "Wave 2.1 Surveys/3. Wiltshire/14 Edencroft", + "Flat 31 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/Flat 31 Rabley Wood View", + 'Flat 13, Manor Fields': 'Wave 2.1 Surveys/1. Herefordshire/(038) Manor Fields Flat 13', + "4 Kittys Lane": "Wave 2.1 Surveys/1. Herefordshire/(005) Kittys Lane 4", + '1 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 1', + '2 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 2', } wates_matching_lookup = [] # Examples to skip when we cannot get the data wates_to_skip = [ "66 Abbatt Close", # File type is unusual, couldn't extract the data + "Flat 69 Goddard Road", # Doesn't exist + "19 Garth House", # # File type is unusual, couldn't extract the data + '5 Gilpin Close', # No properly formatted EPR + '49 The Hide, Netherfield', # TODO: TEMP HERE + '19 Chanders Rd', + '5 Chanders Rd', + '23 Chanders Rd', + '3 Chanders Rd', + '1 Orchard Close', ] for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)): + # Search the folder + filtered = retrofit_assessment_data[ + retrofit_assessment_data["survey_folder"].str.contains(home["folder_path"], regex=False) + ] + if len(filtered) == 1: + wates_matching_lookup.append( + { + "survey_folder": filtered["survey_folder"].values[0], + "Asset ID": home["Asset ID"], + "Name": home["Name"] + } + ) + continue + if home["Name"] in wates_to_skip: continue diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py index 7ee06fcd..010902ce 100644 --- a/etl/customers/stonewater/data_cleaning.py +++ b/etl/customers/stonewater/data_cleaning.py @@ -86,7 +86,6 @@ def download_data_from_sharepoint(): folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" ) - len(contents["value"]) folders_to_pull = [ folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"] ] @@ -108,6 +107,8 @@ def download_data_from_sharepoint(): folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" + folder_to_pull["name"] + "/" + property_folder["name"] ) + if not property_folder_contents.get("value"): + continue # We look for the retrofit assessment folder: property_sub_folders = [ f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower() @@ -138,5 +139,5 @@ def download_data_from_sharepoint(): drive_id=sharepoint_client.document_drive["id"], folder_path=property_folder_path, download_dir=download_dir, - excluded_file_types=["MOV"] + excluded_file_types=["MOV", "jpg"] ) diff --git a/survey_report/app.py b/survey_report/app.py index f59c9984..87ce7864 100644 --- a/survey_report/app.py +++ b/survey_report/app.py @@ -1,9 +1,33 @@ import os import PyPDF2 +from string import Template from survey_report.extraction.detect_report_type import detect_report_type from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor +def generate_html_report(template_path, output_path, data): + """ + Reads an HTML template file, injects dynamic values, and generates a final HTML report. + + Args: + - template_path (str): Path to the HTML template file. + - output_path (str): Path to save the generated HTML file. + - data (dict): Dictionary containing dynamic values for the report. + """ + # Read the template file + with open(template_path, "r", encoding="utf-8") as f: + html_template = Template(f.read()) # Use Template from string module + + # Replace placeholders with actual data + final_html = html_template.safe_substitute(data) # Use safe_substitute to prevent missing key errors + + # Save the generated HTML file + with open(output_path, "w", encoding="utf-8") as f: + f.write(final_html) + + print(f"HTML report generated successfully: {output_path}") + + def handle(): """ Performs the data extraction process for the survey report @@ -48,3 +72,20 @@ def handle(): data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"] data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"] del data_sheet["Total Building Dimensions"] + + # Generate the HTML report + # Placeholder locations + template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html" + output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html" + logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png" + generate_html_report( + template_path, output_path, + data={ + "address": data_sheet["Address"], + "logo_path": logo_path, + "current_epc": data_sheet["Current EPC Band"], + "current_sap": data_sheet["Current SAP Rating"], + "potential_epc": "A", # TODO PLACEHOLDER + "potential_sap": 91, # TODO PLACEHOLDER + } + ) diff --git a/survey_report/template.html b/survey_report/template.html new file mode 100644 index 00000000..5d3b6c63 --- /dev/null +++ b/survey_report/template.html @@ -0,0 +1,123 @@ + + + + + + Domna Energy Report + + + + +
+ +
+
+

Domna Energy Report

+

${address}

+
+ +
+ + +
+
+
Current EPC Rating
+
${current_epc}
+
SAP ${current_sap}
+
+ +
+
Potential EPC Rating
+
${potential_epc}
+
SAP ${potential_sap}
+
+
+ +
+ + +