done with stonewater for now

2026-07-27 23:35:01 +00:00 · 2024-10-31 12:03:17 +00:00 · 2024-10-31 12:03:17 +00:00 · a9ea89d2ae
commit a9ea89d2ae
parent 7e26fb4b86
1 changed files with 133 additions and 11 deletions
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -76,10 +76,13 @@ def extract_summary_report(pdf_path):
        'First Extension Wall Area (m2)': None,
        "Number of Light Fittings": None,
        "Number of LEL Fittings": None,
-        "Number of fittings needing LEL": None
+        "Number of fittings needing LEL": None,
+        "Main Roof Type": None,
+        "Main Roof Insulation": None,
+        "Main Roof Insulation Thickness": None,
    }

-    with open(pdf_path, "rb") as file:
+    with (open(pdf_path, "rb") as file):
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
@ -205,6 +208,27 @@ def extract_summary_report(pdf_path):
        data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
        data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]

+        roof_section = re.search(r"8\.0 Roofs:\n(.*?)\n9\.0 Floors:", text, re.DOTALL)
+        roof_text = roof_section.group(1).strip()
+        roof_type_match = re.search(r"Type\s*([A-Za-z0-9\s]+)", roof_text)
+        data["Main Roof Type"] = roof_type_match.group(1).strip() if roof_type_match else None
+
+        # Check if "Insulation" exists between Type and Insulation Thickness
+        insulation_search = re.search(
+            r"Type\s+.*?\n(Insulation\s+(.*?)\n)?(Insulation Thickness\s+(.*?)\n)", roof_text, re.DOTALL
+        )
+
+        if insulation_search:
+            # Insulation match will be present if it exists, otherwise it will be None
+            insulation_match = insulation_search.group(2)  # Optional group for Insulation
+            insulation_thickness_match = insulation_search.group(4)  # Required group for Insulation Thickness
+
+            # Populate insulation fields
+            data["Main Roof Insulation"] = insulation_match.strip() if insulation_match else None
+            data["Main Roof Insulation Thickness"] = (
+                insulation_thickness_match.strip() if insulation_thickness_match else None
+            )
+
    return data


@ -434,6 +458,49 @@ def extract_building_parts_summary(text):
    return dimensions


+import re
+
+
+def extract_roof_details_epr(text):
+    """
+    Extracts roof type, insulation, and insulation thickness for each building part
+    in the provided EPR PDF text.
+    """
+    # Define data structure to hold results
+    roof_data = []
+
+    # Locate each building part section
+    building_part_pattern = re.compile(
+        r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
+        re.DOTALL
+    )
+
+    # Extract each building part's data, including roof details
+    for match in building_part_pattern.finditer(text):
+        part_name = match.group(1).strip()
+
+        # Clean up the building part name
+        cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
+
+        part_details = match.group(2)
+
+        # Extract Roof Type, Roof Insulation, and Roof Insulation Thickness
+        roof_type_match = re.search(r"Roof Type:\s*(.*?)(?=\n|$)", part_details)
+        roof_insulation_match = re.search(r"Roof Insulation:\s*(.*?)(?=\n|$)", part_details)
+        roof_insulation_thickness_match = re.search(r"Roof Insulation Thickness:\s*(.*?)(?=\n|$)", part_details)
+
+        # Store results for this building part
+        roof_data.append({
+            "Building Part": cleaned_part_name,
+            "Roof Type": roof_type_match.group(1).strip() if roof_type_match else None,
+            "Roof Insulation": roof_insulation_match.group(1).strip() if roof_insulation_match else None,
+            "Roof Insulation Thickness": roof_insulation_thickness_match.group(
+                1).strip() if roof_insulation_thickness_match else None,
+        })
+
+    return roof_data
+
+
 def extract_epr(pdf_path):
    """
    Extracts specific data from an Energy Report (EPR) PDF file.
@ -471,7 +538,10 @@ def extract_epr(pdf_path):
        'First Extension Wall Area (m2)': None,
        "Number of Light Fittings": None,
        "Number of LEL Fittings": None,
-        "Number of fittings needing LEL": None
+        "Number of fittings needing LEL": None,
+        "Main Roof Type": None,
+        "Main Roof Insulation": None,
+        "Main Roof Insulation Thickness": None,
    }

    with open(pdf_path, "rb") as file:
@ -590,6 +660,13 @@ def extract_epr(pdf_path):
        data["Number of LEL Fittings"] = int(lel_fittings_match.group(1))
        data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]

+        roof_details = extract_roof_details_epr(text)
+        # Get from the main building
+        main_roof_details = [r for r in roof_details if "Main" in r["Building Part"]]
+        data["Main Roof Type"] = main_roof_details[0]["Roof Type"]
+        data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"]
+        data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"]
+
    return data


@ -1077,13 +1154,11 @@ def main():
    # Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
    cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)

-    stonewater_data["Room in Roof"].value_counts()
-
    # stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values

    create_proposed_wave_3_bid(
        costed_packages_filepath=os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx"
+            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) V2.xlsx"
        ),
        archetypes_sheet_filepath=os.path.join(
            CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@ -1098,11 +1173,30 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
    archetypes_to_cost = costed_packages[
        [
            "Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
-            "Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost',
-            'Total Cost of Measures inc Contingency'
+            "Modelled SAP Rating", "Package Ref", 'Total Cost of Measures', 'Contingency Cost',
+            'Total Cost of Measures inc Contingency', 'Main Roof Type', 'Main Roof Insulation',
+            'Main Roof Insulation Thickness', 'Existing Primary Heating System',
+            'Existing Primary Heating PCDF Reference'
        ]
    ].copy()

+    # Combine 'Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness', separating by colons!
+    archetypes_to_cost['Surveyed Main Roof'] = (
+        archetypes_to_cost['Main Roof Type'] + ': ' + archetypes_to_cost['Main Roof Insulation'] + ': ' +
+        archetypes_to_cost['Main Roof Insulation Thickness'].astype(str)
+    )
+
+    # Combine the heating systems, separating by colons!
+    archetypes_to_cost['Surveyed Main Heating'] = (
+        archetypes_to_cost['Existing Primary Heating System'] + ': code - ' + archetypes_to_cost[
+        'Existing Primary Heating PCDF Reference'].astype(str)
+    )
+
+    archetypes_to_cost = archetypes_to_cost.drop(
+        columns=['Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness',
+                 'Existing Primary Heating System',
+                 'Existing Primary Heating PCDF Reference'])
+
    # We take properties that are EPC D and below (61% of units)
    archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]

@ -1139,7 +1233,19 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa

    match_classification = []
    for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
-        surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]]
+
+        surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]].copy()
+        surveyed["Package Ref"] = surveyed["Package Ref"].astype(str)
+
+        package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
+        package = package.replace("\n", "")
+
+        surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
+        surveyed_roofs = surveyed_roofs.replace("\n", "")
+
+        surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
+        surveyed_heating = surveyed_heating.replace("\n", "")
+
        # We now check if we have a perfect match
        surveyed = surveyed[
            (surveyed["Property Type"] == home["Property Type"]) &
@ -1149,17 +1255,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
            ]

        if surveyed.empty:
+            if package == "2B2A":
+                raise Exception("Fix me")
            match_classification.append(
                {
                    "Address ID": home["Address ID"],
-                    "Match to Surveyed": "Approximate"
+                    "Match to Surveyed": "Approximate",
+                    "Proposed Package Ref": package,
+                    "Surveyed Archetype Roofs": surveyed_roofs,
+                    "Surveyed Archetype Heating": surveyed_heating
                }
            )
            continue
+        # Re-do
+        package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
+        package = package.replace("\n", "")
+        surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
+        surveyed_roofs = surveyed_roofs.replace("\n", "")
+        surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
+        surveyed_heating = surveyed_heating.replace("\n", "")
+
        match_classification.append(
            {
                "Address ID": home["Address ID"],
-                "Match to Surveyed": "Exact"
+                "Match to Surveyed": "Exact",
+                "Proposed Package Ref": package,
+                "Surveyed Archetype Roofs": surveyed_roofs,
+                "Surveyed Archetype Heating": surveyed_heating
            }
        )