diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 67362865..6cf26df8 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -320,7 +320,7 @@ def extract_building_parts_epr(text): def extract_building_parts_summary(text): """ Extracts building parts and associated dimensions from the summary report PDF. - This includes Main Property and multiple extensions if they exist. + This includes Main Property, multiple extensions if they exist, and Room in Roof areas. """ data = [] @@ -368,6 +368,20 @@ def extract_building_parts_summary(text): "Party Wall Length (m)": party_wall_length }) + # Check specifically for "Room(s) in Roof" entries, which only have Floor Area + room_in_roof_pattern = re.compile(r"Room\(s\) in Roof:\s*([\d.]+)") + room_in_roof_match = room_in_roof_pattern.search(floor_data) + if room_in_roof_match: + floor_area = float(room_in_roof_match.group(1)) + data.append({ + "Building Part": part_name, + "Floor Level": "Room in Roof", + "Floor Area (m2)": floor_area, + "Room Height (m)": None, # Placeholder for missing data + "Perimeter (m)": None, # Placeholder for missing data + "Party Wall Length (m)": None # Placeholder for missing data + }) + # Calculate aggregated dimensions main_property = [part for part in data if "Main Property" in part["Building Part"]] first_extensions = [part for part in data if "1st Extension" in part["Building Part"]] @@ -376,10 +390,14 @@ def extract_building_parts_summary(text): "Total Ground Floor Area (m2)": sum( [part["Floor Area (m2)"] for part in data if "Lowest Floor" in part["Floor Level"]] ), - "RIR Floor Area": 0, - "Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property]), + "RIR Floor Area": sum( + [part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]] + ), + "Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property if + x["Perimeter (m)"] and x["Room Height (m)"]]), "First Extension Wall Area (m2)": sum( - [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions] + [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions if + x["Perimeter (m)"] and x["Room Height (m)"]] ), } @@ -887,6 +905,9 @@ def main(): how="left" ) + if stonewater_data["Address ID"].duplicated().sum(): + raise Exception("Duplicate Address IDs") + # Create a section for costs for measure in measure_columns: stonewater_data[f"Cost of {measure}"] = None @@ -933,5 +954,10 @@ def main(): windows_data["Address ID"] = windows_data["Address ID"].astype(float) stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left") + if stonewater_data["Address ID"].duplicated().sum(): + raise Exception("Duplicate Address IDs") + + # stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values + # if __name__ == "__main__": # main()