From bccf3c621bbec73ac35a18f123ba73b456c695df Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 30 Oct 2024 14:17:20 +0000 Subject: [PATCH] lighting fitting extraction from summary report --- etl/customers/stonewater/Wave 3 Preparation.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 16970803..ccd062e2 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -73,7 +73,10 @@ def extract_summary_report(pdf_path): 'Total Ground Floor Area (m2)': None, 'RIR Floor Area': None, 'Main Building Wall Area (m2)': None, - 'First Extension Wall Area (m2)': None + 'First Extension Wall Area (m2)': None, + "Number of Light Fittings": None, + "Number of LEL Fittings": None, + "Number of fittings needing LEL": None } with open(pdf_path, "rb") as file: @@ -198,6 +201,10 @@ def extract_summary_report(pdf_path): dimensions = extract_building_parts_summary(text) data.update(dimensions) + data["Number of Light Fittings"] = int(re.search(r"Total number of light fittings\s*(\d+)", text).group(1)) + data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1)) + data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"] + return data @@ -771,8 +778,6 @@ def main(): extracted_data["Current SAP Rating"] = extracted_data["Current SAP Rating"].astype(int) extracted_data["Current EPC Band"] = extracted_data["Current SAP Rating"].apply(sap_to_epc) - # TODO: RIR floor area!!! - # Remove some definite duplicates dupes = extracted_data[extracted_data["Address"].duplicated()]["Address"] dupes = extracted_data[extracted_data["Address"].isin(dupes)]