mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extracting windows
This commit is contained in:
parent
bcbb43ed8f
commit
f141aa4d84
1 changed files with 62 additions and 9 deletions
|
|
@ -478,6 +478,59 @@ class ElmhurstSummaryReportExtractor:
|
|||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
|
||||
@staticmethod
|
||||
def extract_window_age_description(windows_text):
|
||||
"""
|
||||
Extracts the most common window age description and its proportion.
|
||||
|
||||
Parameters:
|
||||
windows_text (str): The text section containing window data.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with the most common window age description and its proportion.
|
||||
"""
|
||||
# Clean up windows_text by removing line breaks for better pattern matching
|
||||
windows_text = windows_text.replace("\n", "")
|
||||
|
||||
# Define possible window age descriptions
|
||||
window_descriptions = [
|
||||
"Double post or during 2002",
|
||||
"Double pre 2002",
|
||||
"Double with unknown install date",
|
||||
"Secondary glazing",
|
||||
"Triple glazing",
|
||||
"Single glazing",
|
||||
]
|
||||
|
||||
# Count occurrences of each description
|
||||
description_counts = Counter()
|
||||
for description in window_descriptions:
|
||||
matches = re.findall(re.escape(description), windows_text)
|
||||
description_counts[description] = len(matches)
|
||||
|
||||
if not description_counts or not sum(description_counts.values()):
|
||||
raise ValueError("Failed to extract window data.")
|
||||
|
||||
# Determine the most common description and calculate its proportion
|
||||
most_common_description, window_count = description_counts.most_common(1)[0]
|
||||
window_proportion = window_count / sum(description_counts.values()) * 100
|
||||
|
||||
# Get the second most common and the proportion
|
||||
if window_proportion == 100:
|
||||
second_most_common_description = None
|
||||
second_most_common_proportion = 0
|
||||
else:
|
||||
second_most_common_description, second_window_count = description_counts.most_common(2)[1]
|
||||
second_most_common_proportion = second_window_count / sum(description_counts.values()) * 100
|
||||
|
||||
return {
|
||||
"Window Age Description": most_common_description,
|
||||
"Window Age Description Proportion (%)": window_proportion,
|
||||
"Secondary Window Age Description": second_most_common_description,
|
||||
"Secondary Window Age Description Proportion (%)": second_most_common_proportion,
|
||||
"Number of Windows": sum(description_counts.values())
|
||||
}
|
||||
|
||||
def extract(self):
|
||||
"""
|
||||
Extracts specific data from the provided PDF file.
|
||||
|
|
@ -488,8 +541,7 @@ class ElmhurstSummaryReportExtractor:
|
|||
"""
|
||||
|
||||
# Expected keys:
|
||||
# dict_keys(['Total Number of Doors', 'Number of Insulated
|
||||
# Doors', 'Number of Light Fittings', 'Number of LEL Fittings', 'Number of fittings needing LEL', 'Windows',
|
||||
# dict_keys(['Windows',
|
||||
# 'Primary Heating', 'Secondary Heating', 'Building Parts', 'Roof Details', 'Wall Details', 'Conservatory',
|
||||
# 'Water Heating Code'])
|
||||
|
||||
|
|
@ -569,10 +621,15 @@ class ElmhurstSummaryReportExtractor:
|
|||
raise ValueError("Could not extract number of insulated doors")
|
||||
data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
|
||||
|
||||
# lighting
|
||||
data["Number of Light Fittings"] = int(re.search(r"Total number of light fittings\s*(\d+)", text).group(1))
|
||||
data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
|
||||
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
|
||||
|
||||
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
||||
windows_text = windows_section.group(1)
|
||||
window_data = extract_window_age_description(windows_text)
|
||||
data.update(window_data)
|
||||
if not windows_section:
|
||||
raise ValueError("Failed to extract window data.")
|
||||
data["Windows"] = self.extract_window_age_description(windows_section.group(1))
|
||||
|
||||
# Extract heating system
|
||||
# Extract Primary Heating Data
|
||||
|
|
@ -636,10 +693,6 @@ class ElmhurstSummaryReportExtractor:
|
|||
dimensions = extract_building_parts_summary(text)
|
||||
data.update(dimensions)
|
||||
|
||||
data["Number of Light Fittings"] = int(re.search(r"Total number of light fittings\s*(\d+)", text).group(1))
|
||||
data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
|
||||
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
|
||||
|
||||
extracted_roof_data = extract_roof_details_summary(text)
|
||||
main_roof_data = [roof for roof in extracted_roof_data if "Main" in roof["Building Part"]][0]
|
||||
data["Main Roof Type"] = main_roof_data["Roof Type"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue