From 4160ec4dcbae01b438010cc75e0d6eb157d76df2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 29 Oct 2024 17:52:51 +0000 Subject: [PATCH] debugging missing secondary heating for summary report, completed extraction for files --- .../stonewater/Wave 3 Preparation.py | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index dc01ef6f..7bedef29 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -109,7 +109,10 @@ def extract_summary_report(pdf_path): # Extract heating system # Extract Primary Heating Data # Extract Primary Heating Section - primary_heating_section = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL) + primary_heating_section1 = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL) + primary_heating_section2 = re.search(r"Main\s*Heating1\s*(.*?)\s*Water\s*Heating", text, re.DOTALL) + primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2 + primary_text = primary_heating_section.group(1) data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group( @@ -126,21 +129,29 @@ def extract_summary_report(pdf_path): # Extract Secondary Heating Section secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL) - secondary_text = secondary_heating_section.group(1) - main_heating_code_match_secondary = re.search( - r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text - ) - data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip() - data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)", - secondary_text).group(1) - second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text) - data["Existing Secondary Heating Controls"] = ( - second_heating_controls_match.group(1).strip() if second_heating_controls_match else "" - ) - data["Existing Secondary Heating % of Heat"] = int( - re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1) - ) + if secondary_heating_section is None: + data["Existing Secondary Heating System"] = "" + data["Existing Secondary Heating PCDF Reference"] = "" + data["Existing Secondary Heating Controls"] = "" + data["Existing Secondary Heating % of Heat"] = 0 + + else: + secondary_text = secondary_heating_section.group(1) + + main_heating_code_match_secondary = re.search( + r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text + ) + data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip() + data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)", + secondary_text).group(1) + second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text) + data["Existing Secondary Heating Controls"] = ( + second_heating_controls_match.group(1).strip() if second_heating_controls_match else "" + ) + data["Existing Secondary Heating % of Heat"] = int( + re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1) + ) # Extract Secondary Heating and Water Heating Codes secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text) @@ -638,6 +649,9 @@ def main(): extracted_data.append(summary_data) extracted_data = pd.DataFrame(extracted_data) + extracted_data["Primary Energy Use (kWh/yr)"] = ( + extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"] + ) # Save this as a csv # extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)