From 69b3ec7961eda55e7b2fe36d17353f215a1bf068 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 13 Nov 2024 23:35:02 +0000 Subject: [PATCH] fixing alternative wall extracction for summary report --- .../stonewater/Wave 3 Preparation.py | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 0af3ffbb..8791912a 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -50,18 +50,23 @@ def extract_wall_details_summary(text): # Define pattern to match each building part's wall entry within the section building_part_pattern = re.compile( - r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part + r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part label r"Type\s+(.*?)\n" # Matches main wall Type r"Insulation\s+(.*?)\n" # Matches main wall Insulation - r"(Dry-lining\s+(.*?)\n)?" # Optional Dry-lining + r"(Dry-lining\s+(.*?)\n)?" # Optional main wall Dry-lining r"Wall Thickness Unknown\s+(.*?)\n" # Matches main wall Thickness Unknown - r"Wall Thickness \[mm\]\s+(\d+)" # Matches main wall Thickness - r"(?:\nAlternative Wall Area.*?\n" # Starts matching alternative wall section if present + r"Wall Thickness \[mm\]\s+(\d+)", # Matches main wall Thickness + re.DOTALL + ) + + # Define pattern to capture alternative wall details, if present + alternative_wall_pattern = re.compile( + r"Alternative Wall Area.*?\n" # Matches start of alternative wall section r"Alternative Type\s+(.*?)\n" # Matches alternative wall Type r"Alternative Insulation\s+(.*?)\n" # Matches alternative wall Insulation r"(Alternative Dry-lining\s+(.*?)\n)?" # Optional Alternative Dry-lining r"Alternative Wall Thickness Unknown\s+(.*?)\n" # Matches alternative wall Thickness Unknown - r"Alternative Wall Thickness\s+(\d+))?", # Matches alternative wall Thickness + r"Alternative Wall Thickness\s+(\d+)", # Matches alternative wall Thickness re.DOTALL ) @@ -74,27 +79,32 @@ def extract_wall_details_summary(text): main_wall_thickness_unknown = match.group(6).strip() main_wall_thickness = int(match.group(7)) - # Optional alternative wall fields - alt_wall_type = match.group(8).strip() if match.group(8) else None - alt_wall_insulation = match.group(9).strip() if match.group(9) else None - alt_wall_dry_lining = match.group(10).strip() if match.group(10) else None - alt_wall_thickness_unknown = match.group(11).strip() if match.group(11) else None - alt_wall_thickness = int(match.group(12)) if match.group(12) else None - - # Append each building part as a dictionary in the wall_data list - wall_data.append({ + # Initialize dictionary for this wall entry + wall_entry = { "Building Part": wall_label, "Wall Type": main_wall_type, "Wall Insulation": main_wall_insulation, "Wall Dry-lining": main_wall_dry_lining, "Wall Thickness Unknown": main_wall_thickness_unknown, "Wall Thickness (mm)": main_wall_thickness, - "Alternative Wall Type": alt_wall_type, - "Alternative Wall Insulation": alt_wall_insulation, - "Alternative Wall Dry-lining": alt_wall_dry_lining, - "Alternative Wall Thickness Unknown": alt_wall_thickness_unknown, - "Alternative Wall Thickness (mm)": alt_wall_thickness, - }) + "Alternative Wall Type": None, + "Alternative Wall Insulation": None, + "Alternative Wall Dry-lining": "N/A", + "Alternative Wall Thickness Unknown": None, + "Alternative Wall Thickness (mm)": None, + } + + # Check if there's an alternative wall section following this wall entry + alt_match = alternative_wall_pattern.search(wall_section, match.end()) + if alt_match: + wall_entry["Alternative Wall Type"] = alt_match.group(1).strip() + wall_entry["Alternative Wall Insulation"] = alt_match.group(2).strip() + wall_entry["Alternative Wall Dry-lining"] = alt_match.group(4).strip() if alt_match.group(4) else "N/A" + wall_entry["Alternative Wall Thickness Unknown"] = alt_match.group(5).strip() + wall_entry["Alternative Wall Thickness (mm)"] = int(alt_match.group(6)) + + # Append each building part as a dictionary in the wall_data list + wall_data.append(wall_entry) return wall_data