mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extracted roof and walls
This commit is contained in:
parent
d489b4346f
commit
26e0206f37
1 changed files with 219 additions and 14 deletions
|
|
@ -587,6 +587,220 @@ class ElmhurstSummaryReportExtractor:
|
|||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def extract_building_parts(text):
|
||||
"""
|
||||
Extracts building parts and associated dimensions from the summary report PDF.
|
||||
This includes Main Property, multiple extensions if they exist, and Room in Roof areas.
|
||||
"""
|
||||
data = []
|
||||
|
||||
# Locate the Dimensions section
|
||||
dimensions_section = re.search(
|
||||
r"Dimensions:\s*Dimension type: Internal\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
|
||||
)
|
||||
if not dimensions_section:
|
||||
raise ValueError("Failed to locate dimensions section in the text.")
|
||||
|
||||
dimensions_text = dimensions_section.group(1)
|
||||
|
||||
# Pattern to extract each building part, starting from Main Property and including extensions
|
||||
building_part_pattern = re.compile(
|
||||
r"(Main Property|\d+(?:st|nd|rd|th) Extension)\s*"
|
||||
r"(.*?)(?=\d+(?:st|nd|rd|th) Extension|5\.0 Conservatory|$)",
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Loop through each building part match, including Main Property and extensions
|
||||
for match in building_part_pattern.finditer(dimensions_text):
|
||||
part_name = match.group(1)
|
||||
floor_data = match.group(2)
|
||||
|
||||
# Pattern to extract floor details: Floor Level, Floor Area, Room Height, Perimeter, Party Wall Length
|
||||
floor_pattern = re.compile(
|
||||
r"(1st Floor|Lowest Floor|Second floor):\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
|
||||
)
|
||||
|
||||
# Extract data for each floor within the building part
|
||||
for floor_match in floor_pattern.finditer(floor_data):
|
||||
floor_level = floor_match.group(1)
|
||||
floor_area = float(floor_match.group(2))
|
||||
room_height = float(floor_match.group(3))
|
||||
perimeter = float(floor_match.group(4))
|
||||
party_wall_length = float(floor_match.group(5))
|
||||
|
||||
# Append to data list
|
||||
data.append(
|
||||
{
|
||||
"Building Part": part_name,
|
||||
"Floor Level": floor_level,
|
||||
"Floor Area (m2)": floor_area,
|
||||
"Room Height (m)": room_height,
|
||||
"Perimeter (m)": perimeter,
|
||||
"Party Wall Length (m)": party_wall_length
|
||||
}
|
||||
)
|
||||
|
||||
# Check specifically for "Room(s) in Roof" entries, which only have Floor Area
|
||||
room_in_roof_pattern = re.compile(r"Room\(s\) in Roof:\s*([\d.]+)")
|
||||
room_in_roof_match = room_in_roof_pattern.search(floor_data)
|
||||
if room_in_roof_match:
|
||||
floor_area = float(room_in_roof_match.group(1))
|
||||
data.append(
|
||||
{
|
||||
"Building Part": part_name,
|
||||
"Floor Level": "Room in Roof",
|
||||
"Floor Area (m2)": floor_area,
|
||||
"Room Height (m)": None, # Placeholder for missing data
|
||||
"Perimeter (m)": None, # Placeholder for missing data
|
||||
"Party Wall Length (m)": None # Placeholder for missing data
|
||||
}
|
||||
)
|
||||
|
||||
# Calculate aggregated dimensions
|
||||
main_property = [part for part in data if "Main Property" in part["Building Part"]]
|
||||
first_extensions = [part for part in data if "1st Extension" in part["Building Part"]]
|
||||
dimensions = {
|
||||
"Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
|
||||
"Total Ground Floor Area (m2)": sum(
|
||||
[part["Floor Area (m2)"] for part in data if "Lowest Floor" in part["Floor Level"]]
|
||||
),
|
||||
"RIR Floor Area": sum(
|
||||
[part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]]
|
||||
),
|
||||
"Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property if
|
||||
x["Perimeter (m)"] and x["Room Height (m)"]]),
|
||||
"First Extension Wall Area (m2)": sum(
|
||||
[x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions if
|
||||
x["Perimeter (m)"] and x["Room Height (m)"]]
|
||||
),
|
||||
}
|
||||
|
||||
return dimensions
|
||||
|
||||
@staticmethod
|
||||
def extract_roof_details(text):
|
||||
"""
|
||||
Extracts roof type, insulation, and insulation thickness for each building part
|
||||
in the 8.0 Roofs section of the summary report.
|
||||
"""
|
||||
# Define data structure to hold results
|
||||
roof_data = []
|
||||
|
||||
# Locate the entire 8.0 Roofs section
|
||||
roof_section_match = re.search(r"8\.0 Roofs:\n(.*?)(?=\n9\.0 Floors:|$)", text, re.DOTALL)
|
||||
if not roof_section_match:
|
||||
return roof_data # Return empty if no roof section is found
|
||||
|
||||
# Extract the roof section and append "9.0 Floors:" as the boundary
|
||||
roof_section = roof_section_match.group(1).strip() + "\n9.0 Floors:"
|
||||
|
||||
# Define pattern to match each building part's roof entry
|
||||
building_part_pattern = re.compile(
|
||||
r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part label
|
||||
r"Type\s+(.*?)(?=\n(?:Insulation|9\.0 Floors:|[A-Z]))" # Matches Roof Type until the next field, label,
|
||||
# or end
|
||||
r"(?:\nInsulation\s+(.*?)(?=\n(?:Insulation Thickness|9\.0 Floors:|[A-Z])))?" # Optional Insulation
|
||||
r"(?:\nInsulation Thickness\s+(.*?)(?=\n(?:9\.0 Floors:|[A-Z])))?", # Optional Insulation Thickness
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Extract each building part's data
|
||||
for match in building_part_pattern.finditer(roof_section):
|
||||
part_name = match.group(1).strip() # Building part label
|
||||
roof_type = match.group(2).strip() # Roof Type
|
||||
roof_insulation = match.group(3).strip() if match.group(3) else None # Optional Insulation
|
||||
roof_insulation_thickness = match.group(4).strip() if match.group(4) else None # Optional Thickness
|
||||
|
||||
# Cleaning to handle annoying cases when it comes out like this:
|
||||
# 'A Another dwelling above\n1st Extension'
|
||||
if roof_type.startswith("A Another dwelling above"):
|
||||
roof_type = "A Another dwelling above"
|
||||
|
||||
# Store results for this building part
|
||||
roof_data.append(
|
||||
{
|
||||
"Building Part": part_name,
|
||||
"Roof Type": roof_type,
|
||||
"Roof Insulation": roof_insulation,
|
||||
"Roof Insulation Thickness": roof_insulation_thickness,
|
||||
}
|
||||
)
|
||||
|
||||
return roof_data
|
||||
|
||||
@staticmethod
|
||||
def extract_wall_details(text):
|
||||
"""
|
||||
Extracts wall type, insulation, dry-lining, and thickness for each building part,
|
||||
including any alternative wall details within the 7.0 Walls section of the summary PDF text.
|
||||
"""
|
||||
# Define data structure to hold all building part wall entries
|
||||
wall_data = []
|
||||
|
||||
# Locate the entire 7.0 Walls section
|
||||
wall_section = re.search(r"7\.0 Walls:\n(.*?)\n8\.0 Roofs:", text, re.DOTALL).group(1)
|
||||
|
||||
# Define pattern to match each building part's wall entry within the section
|
||||
building_part_pattern = re.compile(
|
||||
r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part label
|
||||
r"Type\s+(.*?)\n" # Matches main wall Type
|
||||
r"Insulation\s+(.*?)\n" # Matches main wall Insulation
|
||||
r"(Dry-lining\s+(.*?)\n)?" # Optional main wall Dry-lining
|
||||
r"Wall Thickness Unknown\s+(.*?)\n" # Matches main wall Thickness Unknown
|
||||
r"Wall Thickness \[mm\]\s+(\d+)", # Matches main wall Thickness
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Define pattern to capture alternative wall details, if present
|
||||
alternative_wall_pattern = re.compile(
|
||||
r"Alternative Wall Area.*?\n" # Matches start of alternative wall section
|
||||
r"Alternative Type\s+(.*?)\n" # Matches alternative wall Type
|
||||
r"Alternative Insulation\s+(.*?)\n" # Matches alternative wall Insulation
|
||||
r"(Alternative Dry-lining\s+(.*?)\n)?" # Optional Alternative Dry-lining
|
||||
r"Alternative Wall Thickness Unknown\s+(.*?)\n" # Matches alternative wall Thickness Unknown
|
||||
r"Alternative Wall Thickness\s+(\d+)", # Matches alternative wall Thickness
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Find all building part entries within the 7.0 Walls section
|
||||
for match in building_part_pattern.finditer(wall_section):
|
||||
wall_label = match.group(1).strip()
|
||||
main_wall_type = match.group(2).strip()
|
||||
main_wall_insulation = match.group(3).strip()
|
||||
main_wall_dry_lining = match.group(5).strip() if match.group(5) else "N/A"
|
||||
main_wall_thickness_unknown = match.group(6).strip()
|
||||
main_wall_thickness = int(match.group(7))
|
||||
|
||||
# Initialize dictionary for this wall entry
|
||||
wall_entry = {
|
||||
"Building Part": wall_label,
|
||||
"Wall Type": main_wall_type,
|
||||
"Wall Insulation": main_wall_insulation,
|
||||
"Wall Dry-lining": main_wall_dry_lining,
|
||||
"Wall Thickness Unknown": main_wall_thickness_unknown,
|
||||
"Wall Thickness (mm)": main_wall_thickness,
|
||||
"Alternative Wall Type": None,
|
||||
"Alternative Wall Insulation": None,
|
||||
"Alternative Wall Dry-lining": "N/A",
|
||||
"Alternative Wall Thickness Unknown": None,
|
||||
"Alternative Wall Thickness (mm)": None,
|
||||
}
|
||||
|
||||
# Check if there's an alternative wall section following this wall entry
|
||||
alt_match = alternative_wall_pattern.search(wall_section, match.end())
|
||||
if alt_match:
|
||||
wall_entry["Alternative Wall Type"] = alt_match.group(1).strip()
|
||||
wall_entry["Alternative Wall Insulation"] = alt_match.group(2).strip()
|
||||
wall_entry["Alternative Wall Dry-lining"] = alt_match.group(4).strip() if alt_match.group(4) else "N/A"
|
||||
wall_entry["Alternative Wall Thickness Unknown"] = alt_match.group(5).strip()
|
||||
wall_entry["Alternative Wall Thickness (mm)"] = int(alt_match.group(6))
|
||||
|
||||
# Append each building part as a dictionary in the wall_data list
|
||||
wall_data.append(wall_entry)
|
||||
|
||||
return wall_data
|
||||
|
||||
def extract(self):
|
||||
"""
|
||||
Extracts specific data from the provided PDF file.
|
||||
|
|
@ -687,25 +901,16 @@ class ElmhurstSummaryReportExtractor:
|
|||
|
||||
data["Primary Heating"] = self.extract_primary_heating(text)
|
||||
data["Secondary Heating"] = self.extract_secondary_heating_details(text)
|
||||
|
||||
# Extract Secondary Heating Section
|
||||
|
||||
# Extract Secondary Heating and Water Heating Codes
|
||||
data["Building Parts"] = self.extract_building_parts(text)
|
||||
data["Roof Details"] = self.extract_roof_details(text)
|
||||
data["Wall Details"] = self.extract_wall_details(text)
|
||||
|
||||
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||
if not water_heating_code_match:
|
||||
raise ValueError("Failed to extract water heating code.")
|
||||
|
||||
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||
|
||||
dimensions = extract_building_parts_summary(text)
|
||||
data.update(dimensions)
|
||||
|
||||
extracted_roof_data = extract_roof_details_summary(text)
|
||||
main_roof_data = [roof for roof in extracted_roof_data if "Main" in roof["Building Part"]][0]
|
||||
data["Main Roof Type"] = main_roof_data["Roof Type"]
|
||||
data["Main Roof Insulation"] = main_roof_data["Roof Insulation"]
|
||||
data["Main Roof Insulation Thickness"] = main_roof_data["Roof Insulation Thickness"]
|
||||
|
||||
walls_data = extract_wall_details_summary(text)
|
||||
# Get the main building wall data
|
||||
main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0]
|
||||
data["Main Wall Type"] = main_building_walls["Wall Type"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue