From b7f402ba9d699ede3693068f8bec9e2087c0a8aa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 29 Oct 2024 13:55:18 +0000 Subject: [PATCH] addded # Storeys --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/stonewater/Wave 3 Preparation.py | 11 +++++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 850c0cda..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index e4070118..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index d8d01b22..b1b48cec 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -19,6 +19,7 @@ def extract_summary_report(pdf_path): data = { "Address": None, "Current SAP Rating": None, + "Number of Storeys": None, "Fuel Bill": None, "Window Age Description": None, "Window Age Description Proportion (%)": None, @@ -32,13 +33,15 @@ def extract_summary_report(pdf_path): # Extract Current SAP rating sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text) - if sap_match: - data["Current SAP Rating"] = sap_match.group(1) + data["Current SAP Rating"] = sap_match.group(1) + + # Number of storeys + storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text) + data["Number of Storeys"] = int(storeys_match.group(1)) # Extract Fuel Bill fuel_bill_match = re.search(r"Fuel Bill:\s*£(\d+)", text) - if fuel_bill_match: - data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}" + data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}" # Extract individual address components postcode = re.search(r"Postcode:\s*(.*?)\nRegion:", text)