diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 4ab33732..1b7b1bcd 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -20,9 +20,8 @@ def extract_summary_report(pdf_path): "Address": None, "Postcode": None, "Current SAP Rating": None, - "Space Heating": None, - "Water Heating": None, "Fuel Bill": None, + "Number of Storeys": None, "Window Age Description": None, "Window Age Description Proportion (%)": None, "Secondary Window Age Description": None, @@ -203,9 +202,8 @@ def extract_epr(pdf_path): "Address": None, "Postcode": None, "Current SAP Rating": None, - "Potential SAP Rating": None, - "Space Heating": None, - "Water Heating": None, + "Primary Energy Use Intensity (kWh/m2/yr)": None, + "Number of Storeys": None, "Fuel Bill": None, "Window Age Description": None, "Window Age Description Proportion (%)": None, @@ -242,6 +240,14 @@ def extract_epr(pdf_path): current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2)) data["Current SAP Rating"] = current_sap + # Extract the primary energy use intensity + additional_rating_match = re.search(r"Additional ratings for your home\s*([\d.]+)", text) + data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1)) + + # Extract Number of Storeys + storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text) + data["Number of Storeys"] = int(storeys_match.group(1)) + # Extract Fuel Bill fuel_bill_match = re.search(r"TOTAL\s*£(\d+)", text) data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"