mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
done with summary report extraction
This commit is contained in:
parent
26e0206f37
commit
8b875cbccf
1 changed files with 59 additions and 16 deletions
|
|
@ -801,6 +801,64 @@ class ElmhurstSummaryReportExtractor:
|
|||
|
||||
return wall_data
|
||||
|
||||
@staticmethod
|
||||
def extract_conservatory(text):
|
||||
"""
|
||||
Extracts conservatory data from the provided text.
|
||||
The section is located between "5.0 Conservatory" and "7.0 Walls".
|
||||
|
||||
Args:
|
||||
text (str): The full text of the Summary Report PDF.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with conservatory details:
|
||||
- "Conservatory Present"
|
||||
- "Conservatory Separated"
|
||||
- "Conservatory Floor Area"
|
||||
- "Conservatory Double Glazed"
|
||||
- "Conservatory Glazed Perimeter"
|
||||
- "Heated Conservatory Height"
|
||||
"""
|
||||
|
||||
# Extract the section between "5.0 Conservatory" and "7.0 Walls"
|
||||
conservatory_match = re.search(r"5\.0 Conservatory:(.*?)7\.0 Walls:", text, re.DOTALL)
|
||||
if not conservatory_match:
|
||||
logger.error("Failed to extract conservatory data.")
|
||||
raise ValueError("Could not extract conservatory data.")
|
||||
|
||||
conservatory_text = conservatory_match.group(1)
|
||||
|
||||
# Check if conservatory is present
|
||||
present_match = re.search(r"Is there a conservatory\?\s*(Yes|No)", conservatory_text, re.IGNORECASE)
|
||||
|
||||
if not present_match or present_match.group(1).strip().lower() == "no":
|
||||
return {
|
||||
"Conservatory Present": "No",
|
||||
"Conservatory Separated": "",
|
||||
"Conservatory Floor Area": 0,
|
||||
"Conservatory Double Glazed": "",
|
||||
"Conservatory Glazed Perimeter": 0,
|
||||
"Heated Conservatory Height": "",
|
||||
}
|
||||
|
||||
# If we get here, raise a temporary exception since we've not seen a case of this, so should make sure
|
||||
# this is correct
|
||||
|
||||
separated_match = re.search(r"Is it thermally separated\?\s*(Yes|No)", conservatory_text, re.IGNORECASE)
|
||||
floor_area_match = re.search(r"Floor Area \[m2\]\s*([\d.]+)", conservatory_text, re.IGNORECASE)
|
||||
double_glazed_match = re.search(r"Double Glazed\s*(Yes|No)", conservatory_text, re.IGNORECASE)
|
||||
glazed_perimeter_match = re.search(r"Glazed Perimeter \[m\]\s*([\d.]+)", conservatory_text, re.IGNORECASE)
|
||||
height_match = re.search(r"Room Height\s*(.*?)(?=\n|$)", conservatory_text, re.IGNORECASE)
|
||||
|
||||
return {
|
||||
"Conservatory Present": "Yes",
|
||||
"Conservatory Separated": separated_match.group(1).strip() if separated_match else "",
|
||||
"Conservatory Floor Area": float(floor_area_match.group(1)) if floor_area_match else 0,
|
||||
"Conservatory Double Glazed": double_glazed_match.group(1).strip() if double_glazed_match else "",
|
||||
"Conservatory Glazed Perimeter": float(glazed_perimeter_match.group(1)) if glazed_perimeter_match else 0,
|
||||
"Heated Conservatory Height": height_match.group(1).strip() if height_match else "",
|
||||
}
|
||||
|
||||
def extract(self):
|
||||
"""
|
||||
Extracts specific data from the provided PDF file.
|
||||
|
|
@ -810,11 +868,6 @@ class ElmhurstSummaryReportExtractor:
|
|||
- Address
|
||||
"""
|
||||
|
||||
# Expected keys:
|
||||
# dict_keys([
|
||||
# 'Primary Heating', 'Secondary Heating', 'Building Parts', 'Roof Details', 'Wall Details', 'Conservatory',
|
||||
# 'Water Heating Code'])
|
||||
|
||||
data = {}
|
||||
|
||||
with (open(self.file_path, "rb") as file):
|
||||
|
|
@ -904,6 +957,7 @@ class ElmhurstSummaryReportExtractor:
|
|||
data["Building Parts"] = self.extract_building_parts(text)
|
||||
data["Roof Details"] = self.extract_roof_details(text)
|
||||
data["Wall Details"] = self.extract_wall_details(text)
|
||||
data["Conservatory"] = self.extract_conservatory(text)
|
||||
|
||||
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||
if not water_heating_code_match:
|
||||
|
|
@ -911,15 +965,4 @@ class ElmhurstSummaryReportExtractor:
|
|||
|
||||
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||
|
||||
# Get the main building wall data
|
||||
main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0]
|
||||
data["Main Wall Type"] = main_building_walls["Wall Type"]
|
||||
data["Main Wall Insulation"] = main_building_walls["Wall Insulation"]
|
||||
data["Main Wall Dry-lining"] = main_building_walls["Wall Dry-lining"]
|
||||
data["Main Wall Thickness"] = main_building_walls["Wall Thickness (mm)"]
|
||||
data["Main Building Alternative Wall Type"] = main_building_walls["Alternative Wall Type"]
|
||||
data["Main Building Alternative Wall Insulation"] = main_building_walls["Alternative Wall Insulation"]
|
||||
data["Main Building Alternative Wall Dry-lining"] = main_building_walls["Alternative Wall Dry-lining"]
|
||||
data["Main Building Alternative Wall Thickness"] = main_building_walls["Alternative Wall Thickness (mm)"]
|
||||
|
||||
return data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue