diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index b2a92e4c..24a8e9bb 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -125,6 +125,7 @@ def extract_summary_report(pdf_path): - Address """ + blah data = { "Address": None, "Postcode": None, @@ -701,6 +702,7 @@ def extract_epr(pdf_path): "Primary Energy Use (kWh/yr)": None, "Primary Energy Use Intensity (kWh/m2/yr)": None, "Number of Storeys": None, + "Main Building Age Band": None, "Fuel Bill": None, "Window Age Description": None, "Window Age Description Proportion (%)": None, @@ -779,6 +781,10 @@ def extract_epr(pdf_path): floor_area = re.search(r"Total Floor Area\s(?P\d+)\s?m2", text).group("floor_area") data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area) + # Extract age band + age_band_match = re.search(r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4})", text) + data["Main Building Age Band"] = age_band_match.group(1) + # Extract Number of Storeys storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text) data["Number of Storeys"] = int(storeys_match.group(1)) @@ -3022,7 +3028,6 @@ def revised_model(): # We now do a large pull of all of the data extracted_data = [] for survey_folder in tqdm(survey_folders): - survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder) # Check that the survey folder is actually a folder