diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py
index 6ea6962b..26321a41 100644
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@@ -1,4 +1,278 @@
+import os
+import time
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from utils.s3 import read_from_s3, read_pickle_from_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+ epc_data = []
+ errors = []
+ for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+ try:
+ postcode = home["Postcode"]
+ house_number = home["Number"]
+ full_address = home["Full Address"]
+
+ searcher = SearchEpc(
+ address1=str(house_number),
+ postcode=postcode,
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address,
+ max_retries=5
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ # Look for EPC recommendatons
+ try:
+ property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+ except:
+ property_recommendations = {"rows": []}
+
+ epc = {
+ "row_id": home["row_id"],
+ **searcher.newest_epc.copy(),
+ "recommendations": property_recommendations["rows"]
+ }
+
+ epc_data.append(epc)
+ except Exception as e:
+ errors.append(home["row_id"])
+ time.sleep(5)
+
+ return epc_data, errors
+
+
def app():
"""
This code creates a list of cavity properties, for review
"""
+
+ archetyped_properties = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
+ "Archetyped V3.1.xlsx",
+ header=4
+ )
+
+ cavity_descriptions = [
+ "Cavity: AsBuilt (1983-1995)",
+ "Cavity: AsBuilt (Post 1995)",
+ "Cavity: AsBuilt (Pre 1976)",
+ "Cavity: AsBuilt (1976-1982)",
+ ]
+
+ archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions)
+ # We also identify any properties where properties were found to need cavity wall insulation
+
+ costed_packages = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
+ "20241030 (WIP) Single Model V2.xlsx",
+ sheet_name="Modelled Packages",
+ header=13
+ )
+
+ needs_cwi = costed_packages[
+ costed_packages["Main Wall Insulation"].isin(
+ [
+ "Poss Extract CWI & Refill (issues identified)",
+ "CWI RdSAP Default"
+ ]
+ )
+ ][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID",
+ "Main Wall Insulation",
+ "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]]
+
+ # We flag these properties
+ archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin(
+ needs_cwi["Archetype ID"]
+ )
+
+ archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])]
+ archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"]
+
+ # this is the big list!!!
+ features = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+ "master sheet.csv",
+ encoding='latin1'
+ )
+ features["Address ID"] = features["Address ID"].astype(str)
+
+ features_to_merge = features[
+ [
+ "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
+ "Renewables", "Total Floor Area"
+ ]
+ ]
+
+ stonewater_cavity_properties = archetyped_properties[
+ ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
+ "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
+ ].merge(
+ features_to_merge, how="left", on="Address ID"
+ )
+
+ # We filter this down to the properties that are cavity properties
+ stonewater_cavity_properties = stonewater_cavity_properties[
+ stonewater_cavity_properties["Is Cavity Property"] |
+ stonewater_cavity_properties["Survey shows CWI needed for Archetype"]
+ ]
+
+ stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property"
+ stonewater_cavity_properties["Reason Included"] = np.where(
+ stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+ ~stonewater_cavity_properties["Is Cavity Property"],
+ "Survey revealed potential need for CWI or extract and re-fill",
+ stonewater_cavity_properties["Reason Included"]
+ )
+ stonewater_cavity_properties["Reason Included"] = np.where(
+ stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+ stonewater_cavity_properties["Is Cavity Property"],
+ "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+ stonewater_cavity_properties["Reason Included"]
+ )
+ # We indicate the exact properties that need CWI, based on survey findings
+ stonewater_cavity_properties["Reason Included"] = np.where(
+ stonewater_cavity_properties["Address ID"].isin(
+ needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype(
+ str).values
+ ),
+ "Survey showed this property needs CWI",
+ stonewater_cavity_properties["Reason Included"]
+ )
+
+ stonewater_cavity_properties["Reason Included"] = np.where(
+ stonewater_cavity_properties["Address ID"].isin(
+ needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][
+ "Address ID"].astype(int).astype(str).values
+ ),
+ "Survey showed this property could need extract and re-fill",
+ stonewater_cavity_properties["Reason Included"]
+ )
+
+ # We get the EPC data
+ epc_data = json.loads(
+ read_from_s3(
+ bucket_name="retrofit-data-dev",
+ s3_file_name="customers/Stonewater/clustering/epc_data.json"
+ )
+ )
+ epc_data = pd.DataFrame(epc_data)
+
+ epc_data["uprn"] = np.where(
+ epc_data["internal_id"] == 1091,
+ 83143766,
+ epc_data["uprn"]
+ )
+
+ epc_data_batch_2 = read_pickle_from_s3(
+ s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
+ bucket_name="retrofit-data-dev"
+ )
+ epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
+
+ complete_epcs = pd.concat([epc_data, epc_data_batch_2])
+
+ epcs_to_merge = complete_epcs[
+ [
+ "uprn",
+ "address",
+ "postcode",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type",
+ "secondheat-description",
+ "total-floor-area",
+ "construction-age-band",
+ "floor-height",
+ "number-habitable-rooms",
+ "mainheat-description",
+ "energy-consumption-current"
+ ]
+ ].rename(
+ columns={
+ "address": "Address",
+ "postcode": "Postcode",
+ "inspection-date": "Date of last EPC",
+ "current-energy-efficiency": "SAP score on register",
+ "current-energy-rating": "EPC rating on register",
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "total-floor-area": "Property Floor Area",
+ "construction-age-band": "Property Age Band",
+ "floor-height": "Property Floor Height",
+ "number-habitable-rooms": "Number of Habitable Rooms",
+ "walls-description": "Wall Construction",
+ "roof-description": "Roof Construction",
+ "mainheat-description": "Heating Type",
+ "secondheat-description": "Secondary Heating",
+ "transaction-type": "Reason for last EPC",
+ "energy-consumption-current": "Heat Demand (kWh/m2)",
+ }
+ )
+ # We de-dupe, taking the newest on the date the EPC was lod
+ epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
+ epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
+ epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
+
+ # Merge the EPCs on, with the data we need
+ stonewater_cavity_properties = stonewater_cavity_properties.rename(
+ columns={
+ "Age": "Parity - Build Age",
+ "Property Type": "Parity - Property Type",
+ "Walls": "Parity - Wall Construction",
+ "Roofs": "Parity - Roof Construction",
+ "Glazing": "Parity - Glazing Type",
+ "Heating": "Parity - Heating Type",
+ "Main Fuel": "Parity - Main Fuel",
+ "Hot Water": "Parity - Hot Water",
+ "Renewables": "Parity - Renewables",
+ "Total Floor Area": "Parity - Total Floor Area"
+ }
+ ).merge(
+ epcs_to_merge,
+ how="left",
+ left_on="UPRN",
+ right_on="uprn"
+ )
+
+ # We now flag the additional properties in the as built list
+
+ additional_properties = features[
+ ~features["Address ID"].isin(archetyped_properties["Address ID"].values)
+ ]
+
+ # Filter on as built cavity properties
+ additional_properties = additional_properties[
+ additional_properties["Walls"].isin(
+ cavity_descriptions +
+ ["Cavity: FilledCavity", "Cavity: External", "Cavity: Internal"]
+ )
+ ]
+
+ # Pull the EPCs for these properties
+ for _, home in tqdm(additional_properties.iterrows()):
+ full_address = home["Address"]
+ postcode = home["Postcode"]
+ address1 = full_address.split(",")[0]
diff --git a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
index 97314b32..102f5930 100644
--- a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
+++ b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
@@ -2,3 +2,7 @@ PyPDF2
pandas
tqdm
openpyxl
+boto3
+epc-api-python==1.0.2
+usaddress==0.5.11
+fuzzywuzzy==0.18.0