extracted roof and walls

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-28 09:20:44 +00:00
parent d489b4346f
commit 26e0206f37

View file

@ -587,6 +587,220 @@ class ElmhurstSummaryReportExtractor:
return output
@staticmethod
def extract_building_parts(text):
"""
Extracts building parts and associated dimensions from the summary report PDF.
This includes Main Property, multiple extensions if they exist, and Room in Roof areas.
"""
data = []
# Locate the Dimensions section
dimensions_section = re.search(
r"Dimensions:\s*Dimension type: Internal\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
)
if not dimensions_section:
raise ValueError("Failed to locate dimensions section in the text.")
dimensions_text = dimensions_section.group(1)
# Pattern to extract each building part, starting from Main Property and including extensions
building_part_pattern = re.compile(
r"(Main Property|\d+(?:st|nd|rd|th) Extension)\s*"
r"(.*?)(?=\d+(?:st|nd|rd|th) Extension|5\.0 Conservatory|$)",
re.DOTALL
)
# Loop through each building part match, including Main Property and extensions
for match in building_part_pattern.finditer(dimensions_text):
part_name = match.group(1)
floor_data = match.group(2)
# Pattern to extract floor details: Floor Level, Floor Area, Room Height, Perimeter, Party Wall Length
floor_pattern = re.compile(
r"(1st Floor|Lowest Floor|Second floor):\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
)
# Extract data for each floor within the building part
for floor_match in floor_pattern.finditer(floor_data):
floor_level = floor_match.group(1)
floor_area = float(floor_match.group(2))
room_height = float(floor_match.group(3))
perimeter = float(floor_match.group(4))
party_wall_length = float(floor_match.group(5))
# Append to data list
data.append(
{
"Building Part": part_name,
"Floor Level": floor_level,
"Floor Area (m2)": floor_area,
"Room Height (m)": room_height,
"Perimeter (m)": perimeter,
"Party Wall Length (m)": party_wall_length
}
)
# Check specifically for "Room(s) in Roof" entries, which only have Floor Area
room_in_roof_pattern = re.compile(r"Room\(s\) in Roof:\s*([\d.]+)")
room_in_roof_match = room_in_roof_pattern.search(floor_data)
if room_in_roof_match:
floor_area = float(room_in_roof_match.group(1))
data.append(
{
"Building Part": part_name,
"Floor Level": "Room in Roof",
"Floor Area (m2)": floor_area,
"Room Height (m)": None, # Placeholder for missing data
"Perimeter (m)": None, # Placeholder for missing data
"Party Wall Length (m)": None # Placeholder for missing data
}
)
# Calculate aggregated dimensions
main_property = [part for part in data if "Main Property" in part["Building Part"]]
first_extensions = [part for part in data if "1st Extension" in part["Building Part"]]
dimensions = {
"Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
"Total Ground Floor Area (m2)": sum(
[part["Floor Area (m2)"] for part in data if "Lowest Floor" in part["Floor Level"]]
),
"RIR Floor Area": sum(
[part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]]
),
"Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property if
x["Perimeter (m)"] and x["Room Height (m)"]]),
"First Extension Wall Area (m2)": sum(
[x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions if
x["Perimeter (m)"] and x["Room Height (m)"]]
),
}
return dimensions
@staticmethod
def extract_roof_details(text):
"""
Extracts roof type, insulation, and insulation thickness for each building part
in the 8.0 Roofs section of the summary report.
"""
# Define data structure to hold results
roof_data = []
# Locate the entire 8.0 Roofs section
roof_section_match = re.search(r"8\.0 Roofs:\n(.*?)(?=\n9\.0 Floors:|$)", text, re.DOTALL)
if not roof_section_match:
return roof_data # Return empty if no roof section is found
# Extract the roof section and append "9.0 Floors:" as the boundary
roof_section = roof_section_match.group(1).strip() + "\n9.0 Floors:"
# Define pattern to match each building part's roof entry
building_part_pattern = re.compile(
r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part label
r"Type\s+(.*?)(?=\n(?:Insulation|9\.0 Floors:|[A-Z]))" # Matches Roof Type until the next field, label,
# or end
r"(?:\nInsulation\s+(.*?)(?=\n(?:Insulation Thickness|9\.0 Floors:|[A-Z])))?" # Optional Insulation
r"(?:\nInsulation Thickness\s+(.*?)(?=\n(?:9\.0 Floors:|[A-Z])))?", # Optional Insulation Thickness
re.DOTALL
)
# Extract each building part's data
for match in building_part_pattern.finditer(roof_section):
part_name = match.group(1).strip() # Building part label
roof_type = match.group(2).strip() # Roof Type
roof_insulation = match.group(3).strip() if match.group(3) else None # Optional Insulation
roof_insulation_thickness = match.group(4).strip() if match.group(4) else None # Optional Thickness
# Cleaning to handle annoying cases when it comes out like this:
# 'A Another dwelling above\n1st Extension'
if roof_type.startswith("A Another dwelling above"):
roof_type = "A Another dwelling above"
# Store results for this building part
roof_data.append(
{
"Building Part": part_name,
"Roof Type": roof_type,
"Roof Insulation": roof_insulation,
"Roof Insulation Thickness": roof_insulation_thickness,
}
)
return roof_data
@staticmethod
def extract_wall_details(text):
"""
Extracts wall type, insulation, dry-lining, and thickness for each building part,
including any alternative wall details within the 7.0 Walls section of the summary PDF text.
"""
# Define data structure to hold all building part wall entries
wall_data = []
# Locate the entire 7.0 Walls section
wall_section = re.search(r"7\.0 Walls:\n(.*?)\n8\.0 Roofs:", text, re.DOTALL).group(1)
# Define pattern to match each building part's wall entry within the section
building_part_pattern = re.compile(
r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part label
r"Type\s+(.*?)\n" # Matches main wall Type
r"Insulation\s+(.*?)\n" # Matches main wall Insulation
r"(Dry-lining\s+(.*?)\n)?" # Optional main wall Dry-lining
r"Wall Thickness Unknown\s+(.*?)\n" # Matches main wall Thickness Unknown
r"Wall Thickness \[mm\]\s+(\d+)", # Matches main wall Thickness
re.DOTALL
)
# Define pattern to capture alternative wall details, if present
alternative_wall_pattern = re.compile(
r"Alternative Wall Area.*?\n" # Matches start of alternative wall section
r"Alternative Type\s+(.*?)\n" # Matches alternative wall Type
r"Alternative Insulation\s+(.*?)\n" # Matches alternative wall Insulation
r"(Alternative Dry-lining\s+(.*?)\n)?" # Optional Alternative Dry-lining
r"Alternative Wall Thickness Unknown\s+(.*?)\n" # Matches alternative wall Thickness Unknown
r"Alternative Wall Thickness\s+(\d+)", # Matches alternative wall Thickness
re.DOTALL
)
# Find all building part entries within the 7.0 Walls section
for match in building_part_pattern.finditer(wall_section):
wall_label = match.group(1).strip()
main_wall_type = match.group(2).strip()
main_wall_insulation = match.group(3).strip()
main_wall_dry_lining = match.group(5).strip() if match.group(5) else "N/A"
main_wall_thickness_unknown = match.group(6).strip()
main_wall_thickness = int(match.group(7))
# Initialize dictionary for this wall entry
wall_entry = {
"Building Part": wall_label,
"Wall Type": main_wall_type,
"Wall Insulation": main_wall_insulation,
"Wall Dry-lining": main_wall_dry_lining,
"Wall Thickness Unknown": main_wall_thickness_unknown,
"Wall Thickness (mm)": main_wall_thickness,
"Alternative Wall Type": None,
"Alternative Wall Insulation": None,
"Alternative Wall Dry-lining": "N/A",
"Alternative Wall Thickness Unknown": None,
"Alternative Wall Thickness (mm)": None,
}
# Check if there's an alternative wall section following this wall entry
alt_match = alternative_wall_pattern.search(wall_section, match.end())
if alt_match:
wall_entry["Alternative Wall Type"] = alt_match.group(1).strip()
wall_entry["Alternative Wall Insulation"] = alt_match.group(2).strip()
wall_entry["Alternative Wall Dry-lining"] = alt_match.group(4).strip() if alt_match.group(4) else "N/A"
wall_entry["Alternative Wall Thickness Unknown"] = alt_match.group(5).strip()
wall_entry["Alternative Wall Thickness (mm)"] = int(alt_match.group(6))
# Append each building part as a dictionary in the wall_data list
wall_data.append(wall_entry)
return wall_data
def extract(self):
"""
Extracts specific data from the provided PDF file.
@ -687,25 +901,16 @@ class ElmhurstSummaryReportExtractor:
data["Primary Heating"] = self.extract_primary_heating(text)
data["Secondary Heating"] = self.extract_secondary_heating_details(text)
# Extract Secondary Heating Section
# Extract Secondary Heating and Water Heating Codes
data["Building Parts"] = self.extract_building_parts(text)
data["Roof Details"] = self.extract_roof_details(text)
data["Wall Details"] = self.extract_wall_details(text)
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
if not water_heating_code_match:
raise ValueError("Failed to extract water heating code.")
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
dimensions = extract_building_parts_summary(text)
data.update(dimensions)
extracted_roof_data = extract_roof_details_summary(text)
main_roof_data = [roof for roof in extracted_roof_data if "Main" in roof["Building Part"]][0]
data["Main Roof Type"] = main_roof_data["Roof Type"]
data["Main Roof Insulation"] = main_roof_data["Roof Insulation"]
data["Main Roof Insulation Thickness"] = main_roof_data["Roof Insulation Thickness"]
walls_data = extract_wall_details_summary(text)
# Get the main building wall data
main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0]
data["Main Wall Type"] = main_building_walls["Wall Type"]