debugging missing secondary heating for summary report, completed extraction for files

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-29 17:52:51 +00:00
parent c0d896cd59
commit 4160ec4dcb

View file

@ -109,7 +109,10 @@ def extract_summary_report(pdf_path):
# Extract heating system
# Extract Primary Heating Data
# Extract Primary Heating Section
primary_heating_section = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
primary_heating_section1 = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
primary_heating_section2 = re.search(r"Main\s*Heating1\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
primary_text = primary_heating_section.group(1)
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
@ -126,21 +129,29 @@ def extract_summary_report(pdf_path):
# Extract Secondary Heating Section
secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
secondary_text = secondary_heating_section.group(1)
main_heating_code_match_secondary = re.search(
r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
)
data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip()
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
secondary_text).group(1)
second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
data["Existing Secondary Heating Controls"] = (
second_heating_controls_match.group(1).strip() if second_heating_controls_match else ""
)
data["Existing Secondary Heating % of Heat"] = int(
re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
)
if secondary_heating_section is None:
data["Existing Secondary Heating System"] = ""
data["Existing Secondary Heating PCDF Reference"] = ""
data["Existing Secondary Heating Controls"] = ""
data["Existing Secondary Heating % of Heat"] = 0
else:
secondary_text = secondary_heating_section.group(1)
main_heating_code_match_secondary = re.search(
r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
)
data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip()
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
secondary_text).group(1)
second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
data["Existing Secondary Heating Controls"] = (
second_heating_controls_match.group(1).strip() if second_heating_controls_match else ""
)
data["Existing Secondary Heating % of Heat"] = int(
re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
)
# Extract Secondary Heating and Water Heating Codes
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
@ -638,6 +649,9 @@ def main():
extracted_data.append(summary_data)
extracted_data = pd.DataFrame(extracted_data)
extracted_data["Primary Energy Use (kWh/yr)"] = (
extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
)
# Save this as a csv
# extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)