diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index b2a92e4c..24a8e9bb 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py
+++ b/etl/customers/stonewater/Wave 3 Preparation.py
@@ -125,6 +125,7 @@ def extract_summary_report(pdf_path):
- Address
"""
+ blah
data = {
"Address": None,
"Postcode": None,
@@ -701,6 +702,7 @@ def extract_epr(pdf_path):
"Primary Energy Use (kWh/yr)": None,
"Primary Energy Use Intensity (kWh/m2/yr)": None,
"Number of Storeys": None,
+ "Main Building Age Band": None,
"Fuel Bill": None,
"Window Age Description": None,
"Window Age Description Proportion (%)": None,
@@ -779,6 +781,10 @@ def extract_epr(pdf_path):
floor_area = re.search(r"Total Floor Area\s(?P\d+)\s?m2", text).group("floor_area")
data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
+ # Extract age band
+ age_band_match = re.search(r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4})", text)
+ data["Main Building Age Band"] = age_band_match.group(1)
+
# Extract Number of Storeys
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
data["Number of Storeys"] = int(storeys_match.group(1))
@@ -3022,7 +3028,6 @@ def revised_model():
# We now do a large pull of all of the data
extracted_data = []
for survey_folder in tqdm(survey_folders):
-
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
# Check that the survey folder is actually a folder