mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extracting secondary heating
This commit is contained in:
parent
f141aa4d84
commit
d489b4346f
1 changed files with 61 additions and 55 deletions
|
|
@ -531,6 +531,62 @@ class ElmhurstSummaryReportExtractor:
|
|||
"Number of Windows": sum(description_counts.values())
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def extract_primary_heating(text):
|
||||
primary_heating_section1 = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
|
||||
primary_heating_section2 = re.search(r"Main\s*Heating1\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
|
||||
primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
|
||||
if primary_heating_section is None:
|
||||
raise ValueError("Failed to extract primary heating data.")
|
||||
|
||||
primary_text = primary_heating_section.group(1)
|
||||
|
||||
output = {
|
||||
'System': re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(1).strip(),
|
||||
'PCDF Reference': re.search(r"PCDF boiler Reference\s*(\d+)", primary_text).group(1),
|
||||
'Controls': re.search(r"Main Heating Controls\s*(.*?)\n", primary_text).group(1).strip(),
|
||||
'% of Heat': int(re.search(r"Percentage of Heat\s*(\d+)\s*%", primary_text).group(1))
|
||||
}
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def extract_secondary_heating_details(text):
|
||||
secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
|
||||
|
||||
# Defaults
|
||||
output = {
|
||||
"System": "",
|
||||
"PCDF Reference": "",
|
||||
"Controls": "",
|
||||
"% of Heat": 0,
|
||||
"Heating Code": ""
|
||||
}
|
||||
if secondary_heating_section is not None:
|
||||
# Overwrite defaults
|
||||
secondary_text = secondary_heating_section.group(1)
|
||||
|
||||
main_heating_code_match_secondary = re.search(
|
||||
r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
|
||||
)
|
||||
output["System"] = main_heating_code_match_secondary.group(1).strip()
|
||||
output["PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)", secondary_text).group(1)
|
||||
|
||||
second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
|
||||
output["Heating Controls"] = (
|
||||
second_heating_controls_match.group(1).strip() if second_heating_controls_match else ""
|
||||
)
|
||||
output["% of Heat"] = int(
|
||||
re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
|
||||
)
|
||||
|
||||
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
|
||||
if output["System"] != "":
|
||||
output["Heating Code"] = (
|
||||
secondary_heating_code_match.group(1).strip() if secondary_heating_code_match else ""
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
def extract(self):
|
||||
"""
|
||||
Extracts specific data from the provided PDF file.
|
||||
|
|
@ -541,13 +597,11 @@ class ElmhurstSummaryReportExtractor:
|
|||
"""
|
||||
|
||||
# Expected keys:
|
||||
# dict_keys(['Windows',
|
||||
# dict_keys([
|
||||
# 'Primary Heating', 'Secondary Heating', 'Building Parts', 'Roof Details', 'Wall Details', 'Conservatory',
|
||||
# 'Water Heating Code'])
|
||||
|
||||
data = {
|
||||
|
||||
}
|
||||
data = {}
|
||||
|
||||
with (open(self.file_path, "rb") as file):
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
|
|
@ -631,62 +685,14 @@ class ElmhurstSummaryReportExtractor:
|
|||
raise ValueError("Failed to extract window data.")
|
||||
data["Windows"] = self.extract_window_age_description(windows_section.group(1))
|
||||
|
||||
# Extract heating system
|
||||
# Extract Primary Heating Data
|
||||
# Extract Primary Heating Section
|
||||
primary_heating_section1 = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
|
||||
primary_heating_section2 = re.search(r"Main\s*Heating1\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
|
||||
primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
|
||||
|
||||
primary_text = primary_heating_section.group(1)
|
||||
|
||||
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
|
||||
1).strip()
|
||||
data["Existing Primary Heating PCDF Reference"] = re.search(
|
||||
r"PCDF boiler Reference\s*(\d+)", primary_text
|
||||
).group(1)
|
||||
data["Existing Primary Heating Controls"] = re.search(
|
||||
r"Main Heating Controls\s*(.*?)\n", primary_text
|
||||
).group(1).strip()
|
||||
data["Existing Primary Heating % of Heat"] = int(
|
||||
re.search(r"Percentage of Heat\s*(\d+)\s*%", primary_text).group(1)
|
||||
)
|
||||
data["Primary Heating"] = self.extract_primary_heating(text)
|
||||
data["Secondary Heating"] = self.extract_secondary_heating_details(text)
|
||||
|
||||
# Extract Secondary Heating Section
|
||||
secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
|
||||
|
||||
if secondary_heating_section is None:
|
||||
data["Existing Secondary Heating System"] = ""
|
||||
data["Existing Secondary Heating PCDF Reference"] = ""
|
||||
data["Existing Secondary Heating Controls"] = ""
|
||||
data["Existing Secondary Heating % of Heat"] = 0
|
||||
|
||||
else:
|
||||
secondary_text = secondary_heating_section.group(1)
|
||||
|
||||
main_heating_code_match_secondary = re.search(
|
||||
r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
|
||||
)
|
||||
data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip()
|
||||
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||
secondary_text).group(1)
|
||||
second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
|
||||
data["Existing Secondary Heating Controls"] = (
|
||||
second_heating_controls_match.group(1).strip() if second_heating_controls_match else ""
|
||||
)
|
||||
data["Existing Secondary Heating % of Heat"] = int(
|
||||
re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
|
||||
)
|
||||
|
||||
# Extract Secondary Heating and Water Heating Codes
|
||||
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
|
||||
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||
|
||||
if data["Existing Secondary Heating System"] == "":
|
||||
data["Secondary Heating Code"] = ""
|
||||
else:
|
||||
data["Secondary Heating Code"] = secondary_heating_code_match.group(
|
||||
1).strip() if secondary_heating_code_match else ""
|
||||
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||
|
||||
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue