fixing bug extracting from epr

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-29 14:59:32 +00:00
parent 9eb4720c91
commit b74b8823d1

View file

@ -261,36 +261,50 @@ def extract_epr(pdf_path):
data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
# Extract Primary Heating Section (Main Heating 1)
primary_heating_section = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Main\s*Heating\s*2", text, re.DOTALL)
primary_heating_section1 = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Main\s*Heating\s*2", text, re.DOTALL)
# We may not have a secondary heating
primary_heating_section2 = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Secondary\s*Heating", text, re.DOTALL)
primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
primary_text = primary_heating_section.group(1)
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
1).strip()
data["Existing Primary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
primary_text).group(1)
data["Existing Primary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n", primary_text).group(
1).strip()
data["Existing Primary Heating System"] = re.search(
r"Main Heating Code\s*(.*?)\n", primary_text
).group(1).strip()
data["Existing Primary Heating PCDF Reference"] = re.search(
r"PCDF boiler Reference\s*(\d+)", primary_text
).group(1)
data["Existing Primary Heating Controls"] = re.search(
r"Main Heating Controls\s*(.*?)\n", primary_text
).group(1).strip()
data["Existing Primary Heating % of Heat"] = int(
re.search(r"Percentage of Heat\s*(\d+)\s*%?", primary_text).group(1)
)
# Extract Secondary Heating Section (Main Heating 2)
secondary_heating_section = re.search(r"Main\s*Heating\s*2\s*(.*?)\s*Secondary Heating", text, re.DOTALL)
secondary_text = secondary_heating_section.group(1)
data["Existing Secondary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", secondary_text).group(
1).strip()
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
secondary_text).group(1)
if data["Existing Secondary Heating System"] == "":
if secondary_heating_section is None:
data["Existing Secondary Heating System"] = ""
data["Existing Secondary Heating PCDF Reference"] = ""
data["Existing Secondary Heating Controls"] = ""
data["Existing Secondary Heating % of Heat"] = 0
else:
data["Existing Secondary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n",
secondary_text).group(1).strip()
data["Existing Secondary Heating % of Heat"] = int(
re.search(r"Percentage of Heat\s*(\d+)\s*%?", secondary_text).group(1)
)
secondary_text = secondary_heating_section.group(1)
data["Existing Secondary Heating System"] = re.search(
r"Main Heating Code\s*(.*?)\n", secondary_text
).group(1).strip()
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
secondary_text).group(1)
if data["Existing Secondary Heating System"] == "":
data["Existing Secondary Heating Controls"] = ""
else:
data["Existing Secondary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n",
secondary_text).group(1).strip()
data["Existing Secondary Heating % of Heat"] = int(
re.search(r"Percentage of Heat\s*(\d+)\s*%?", secondary_text).group(1)
)
# Extract Secondary Heating and Water Heating Codes
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)