completed matching

2026-07-27 23:35:01 +00:00 · 2025-01-30 17:30:17 +00:00 · 2025-01-30 17:30:17 +00:00 · f6d8688698
commit f6d8688698
parent daabf2a586
4 changed files with 248 additions and 10 deletions
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -3078,6 +3078,13 @@ def revised_model():

    retrofit_assessment_data = pd.DataFrame(extracted_data)

+    # retrofit_assessment_data.to_csv(
+    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"), index=False
+    # )
+    retrofit_assessment_data = pd.read_csv(
+        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"),
+    )
+
    # Remove some definite duplicates
    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"]
    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)]
@ -3097,10 +3104,6 @@ def revised_model():
    # Replace \n with ""
    retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")

-    # retrofit_assessment_data.to_csv(
-    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet.csv"), index=False
-    # )
-
    # We can read in the data as needed

    # Next Step: Read in the coordinated measures and match to the extracted data
@ -3108,24 +3111,59 @@ def revised_model():
    # CCS
    #############################################################
    ccs_coordination_sheet = pd.read_excel(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"),
+        os.path.join(
+            CUSTOMER_FOLDER_PATH,
+            "Jan 2025 Project",
+            "CCS_Installation_Compliance_-_Stonewater_SHDF_2_1_1738228227.xlsx"
+        ),
        header=4
    )
+    ccs_postcodes = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"
+        ),
+        header=4
+    )
+    ccs_coordination_sheet = ccs_postcodes[['Name', 'Post Code', 'Asset ID', 'Asset ID.1']].merge(
+        ccs_coordination_sheet, how="left", on="Name"
+    )
+    ccs_coordination_sheet = ccs_coordination_sheet[~pd.isnull(ccs_coordination_sheet["Name"])]
    ccs_coordination_sheet["contractor"] = "CCS"
    # We split ccs into two sections - the first being
    ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21)
    ccs_coordination_sheet = ccs_coordination_sheet.head(87)
    ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])

+    from urllib import parse
+    def extract_sharepoint_url(x):
+        if pd.isnull(x):
+            return ""
+        return "/".join(parse.urlparse(
+            x.split(" - http")[1]
+        ).path.replace("%20", " ").split("/")[-2:])
+
+    ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
+
    ############################################################
    # WATES
    #############################################################
    wates_coordination_sheet = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_1738229226.xlsx"
+        ),
+        header=4
+    )
+    wates_postcodes = pd.read_excel(
        os.path.join(
            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx"
        ),
        header=4
    )
+    wates_postcodes = wates_postcodes[~pd.isnull(wates_postcodes["Post Code"])]
+    wates_coordination_sheet = wates_coordination_sheet.merge(
+        wates_postcodes[['Name', 'Post Code', 'Asset ID']].drop_duplicates(), how="left", on="Name"
+    )
+
    wates_coordination_sheet["contractor"] = "Wates"
    # Break into the different sites:
    # Wiltshire
@ -3136,7 +3174,7 @@ def revised_model():
    wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :]
    wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :]
    wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :]
-    wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[928:972, :]
+    wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[930:972, :]

    wates_coordination = pd.concat(
        [
@ -3151,12 +3189,15 @@ def revised_model():
        ]
    )

+    wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
+        lambda x: extract_sharepoint_url(x)
+    )
+
    # Combine the data back

    ############################################################
    # NEW 450 COORDINATED RETROFIT ASSESSMENTS
    #############################################################
-
    retrofit_packages_board = pd.read_excel(
        os.path.join(
            CUSTOMER_FOLDER_PATH,
@ -3361,17 +3402,49 @@ def revised_model():
    wates_coordination = wates_coordination[
        wates_coordination["Retrofit Assessment"].isin(["Completed"])
    ]
+    wates_coordination = wates_coordination[
+        ~pd.isnull(wates_coordination["Postcode"])
+    ]

    wates_manual_filters = {
-        "24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View"
+        "24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View",
+        "14 Edencroft": "Wave 2.1 Surveys/3. Wiltshire/14 Edencroft",
+        "Flat 31 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/Flat 31  Rabley Wood View",
+        'Flat 13, Manor Fields': 'Wave 2.1 Surveys/1. Herefordshire/(038) Manor Fields Flat 13',
+        "4 Kittys Lane": "Wave 2.1 Surveys/1. Herefordshire/(005) Kittys Lane 4",
+        '1 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 1',
+        '2 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 2',
    }
    wates_matching_lookup = []
    # Examples to skip when we cannot get the data
    wates_to_skip = [
        "66 Abbatt Close",  # File type is unusual, couldn't extract the data
+        "Flat 69 Goddard Road",  # Doesn't exist
+        "19 Garth House",  # # File type is unusual, couldn't extract the data
+        '5 Gilpin Close',  # No properly formatted EPR
+        '49 The Hide, Netherfield',  # TODO: TEMP HERE
+        '19 Chanders Rd',
+        '5 Chanders Rd',
+        '23 Chanders Rd',
+        '3 Chanders Rd',
+        '1 Orchard Close',
    ]
    for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):

+        # Search the folder
+        filtered = retrofit_assessment_data[
+            retrofit_assessment_data["survey_folder"].str.contains(home["folder_path"], regex=False)
+        ]
+        if len(filtered) == 1:
+            wates_matching_lookup.append(
+                {
+                    "survey_folder": filtered["survey_folder"].values[0],
+                    "Asset ID": home["Asset ID"],
+                    "Name": home["Name"]
+                }
+            )
+            continue
+
        if home["Name"] in wates_to_skip:
            continue

--- a/etl/customers/stonewater/data_cleaning.py
+++ b/etl/customers/stonewater/data_cleaning.py
@ -86,7 +86,6 @@ def download_data_from_sharepoint():
        folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
    )

-    len(contents["value"])
    folders_to_pull = [
        folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
    ]
@ -108,6 +107,8 @@ def download_data_from_sharepoint():
                folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
                            folder_to_pull["name"] + "/" + property_folder["name"]
            )
+            if not property_folder_contents.get("value"):
+                continue
            # We look for the retrofit assessment folder:
            property_sub_folders = [
                f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
@ -138,5 +139,5 @@ def download_data_from_sharepoint():
                drive_id=sharepoint_client.document_drive["id"],
                folder_path=property_folder_path,
                download_dir=download_dir,
-                excluded_file_types=["MOV"]
+                excluded_file_types=["MOV", "jpg"]
            )
--- a/survey_report/app.py
+++ b/survey_report/app.py
@ -1,9 +1,33 @@
 import os
 import PyPDF2
+from string import Template
 from survey_report.extraction.detect_report_type import detect_report_type
 from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor


+def generate_html_report(template_path, output_path, data):
+    """
+    Reads an HTML template file, injects dynamic values, and generates a final HTML report.
+
+    Args:
+    - template_path (str): Path to the HTML template file.
+    - output_path (str): Path to save the generated HTML file.
+    - data (dict): Dictionary containing dynamic values for the report.
+    """
+    # Read the template file
+    with open(template_path, "r", encoding="utf-8") as f:
+        html_template = Template(f.read())  # Use Template from string module
+
+    # Replace placeholders with actual data
+    final_html = html_template.safe_substitute(data)  # Use safe_substitute to prevent missing key errors
+
+    # Save the generated HTML file
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(final_html)
+
+    print(f"HTML report generated successfully: {output_path}")
+
+
 def handle():
    """
    Performs the data extraction process for the survey report
@ -48,3 +72,20 @@ def handle():
    data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
    data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
    del data_sheet["Total Building Dimensions"]
+
+    # Generate the HTML report
+    # Placeholder locations
+    template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
+    output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
+    logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
+    generate_html_report(
+        template_path, output_path,
+        data={
+            "address": data_sheet["Address"],
+            "logo_path": logo_path,
+            "current_epc": data_sheet["Current EPC Band"],
+            "current_sap": data_sheet["Current SAP Rating"],
+            "potential_epc": "A",  # TODO PLACEHOLDER
+            "potential_sap": 91,  # TODO PLACEHOLDER
+        }
+    )
--- a/survey_report/template.html
+++ b/survey_report/template.html
@ -0,0 +1,123 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Domna Energy Report</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #ffffff;
+            color: #333;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+        }
+        .container {
+            width: 100%;
+            max-width: 1300px;
+            margin: 20px auto;
+        }
+        .header {
+            background-color: #1B1F3B;
+            color: white;
+            padding: 30px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            border-radius: 12px;
+        }
+        .header h1 {
+            margin: 5;
+            font-size: 24px;
+        }
+        .header p {
+            margin: 5px 0 0;
+            font-size: 16px;
+            color: #d1d5db;
+        }
+        .logo img {
+            height: 60px;
+        }
+
+        /* EPC Rating Cards */
+        .epc-container {
+            display: flex;
+            justify-content: space-between;
+            gap: 20px;
+            margin-top: 30px;
+        }
+        .epc-card {
+            background-color: white;
+            border: 2px solid #ccc;
+            border-radius: 10px;
+            padding: 20px;
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            justify-content: space-between; /* Pushes SAP to bottom */
+            align-items: center;
+            text-align: center;
+            box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
+            position: relative;
+            height: 160px;
+        }
+        .epc-title {
+            font-size: 18px;
+            font-weight: bold;
+            color: #666;
+        }
+        .epc-rating {
+            font-size: 50px;
+            font-weight: bold;
+        }
+        .sap-rating {
+            font-size: 18px;
+            color: #555;
+            position: absolute;
+            bottom: 10px;
+            right: 20px;
+        }
+        .before .epc-rating {
+            color: #1B1F3B; /* Medium Blue */
+        }
+        .after .epc-rating {
+            color: #D4AF37; /* Gold */
+        }
+
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <!-- Header Section -->
+        <div class="header">
+            <div>
+                <h1>Domna Energy Report</h1>
+                <p>${address}</p> <!-- Address Placeholder -->
+            </div>
+            <div class="logo">
+                <img src="${logo_path}" alt="Domna Logo">
+            </div>
+        </div>
+
+        <!-- EPC Rating Cards -->
+        <div class="epc-container">
+            <div class="epc-card before">
+                <div class="epc-title">Current EPC Rating</div>
+                <div class="epc-rating">${current_epc}</div>
+                <div class="sap-rating">SAP ${current_sap}</div>
+            </div>
+
+            <div class="epc-card after">
+                <div class="epc-title">Potential EPC Rating</div>
+                <div class="epc-rating">${potential_epc}</div>
+                <div class="sap-rating">SAP ${potential_sap}</div>
+            </div>
+        </div>
+
+    </div>
+
+</body>
+</html>