From d0cf88af6498d73a1155af320e5d6b899e3f94fa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 30 Oct 2024 14:09:42 +0000 Subject: [PATCH] added RIR area search for epr --- .../stonewater/Wave 3 Preparation.py | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 6cf26df8..ee5cd1ca 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -256,8 +256,9 @@ def extract_window_age_description(windows_text): def extract_building_parts_epr(text): """ - Extracts building parts and associated dimensions from the provided PDF file. + Extracts building parts and associated dimensions from the provided PDF text. Each building part (main and extensions) includes floor area, room height, perimeter, and party wall length. + Handles cases where 'Room(s) in Roof area' appears within the part_name with only the Floor Area information. """ data = [] @@ -271,12 +272,28 @@ def extract_building_parts_epr(text): # Extract each building part for match in building_part_pattern.finditer(text): part_name = match.group(1).strip() - # Clean up building part name to keep only the descriptor (e.g., "Main" or "1st Extension") - cleaned_part_name = re.sub(r" - built in.*", "", part_name) - floor_data = match.group(2) - # Pattern to match each floor's measurements + # Check for "Room(s) in Roof area" within the part_name + room_in_roof_match = re.search(r"Room\(s\) in Roof area:\s*([\d.]+)", part_name) + if room_in_roof_match: + # Extract Room in Roof area and add it as a separate entry + floor_area = float(room_in_roof_match.group(1)) + # Clean up part name to exclude "Room(s) in Roof area" from the building part name + cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip() + data.append({ + "Building Part": cleaned_part_name, + "Floor Level": "Room in Roof", + "Floor Area (m2)": floor_area, + "Room Height (m)": None, # Placeholder for missing data + "Perimeter (m)": None, # Placeholder for missing data + "Party Wall Length (m)": None # Placeholder for missing data + }) + else: + # Clean up part name to keep only the descriptor (e.g., "Main" or "1st Extension") + cleaned_part_name = re.sub(r" - built in.*", "", part_name).strip() + + # Pattern to match each floor's measurements in standard cases floor_pattern = re.compile( r"(Lowest floor|First floor|Second floor)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)" ) @@ -299,8 +316,7 @@ def extract_building_parts_epr(text): "Party Wall Length (m)": party_wall_length }) - # We now extract out the aggregated data - + # Aggregated data calculation main_building = [part for part in data if "Main" in part["Building Part"]] first_extension = [part for part in data if "1st Extension" in part["Building Part"]] dimensions = { @@ -308,10 +324,17 @@ def extract_building_parts_epr(text): "Total Ground Floor Area (m2)": sum( [part["Floor Area (m2)"] for part in data if "Lowest floor" in part["Floor Level"]] ), - "RIR Floor Area": 0, - "Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_building]), + "RIR Floor Area": sum( + [part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]] + ), + "Main Building Wall Area (m2)": sum( + [x["Perimeter (m)"] * x["Room Height (m)"] for x in main_building if + x["Perimeter (m)"] and x["Room Height (m)"]] + ), "First Extension Wall Area (m2)": sum( - [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extension]) if first_extension else 0, + [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extension if + x["Perimeter (m)"] and x["Room Height (m)"]] + ) if first_extension else 0, } return dimensions