diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 7dde8926..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,9 +34,6 @@ def app():
low_memory=False
)
- z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
- z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
-
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
diff --git a/etl/customers/vander_elliot/initial_scoping.py b/etl/customers/vander_elliot/initial_scoping.py
new file mode 100644
index 00000000..de212c7c
--- /dev/null
+++ b/etl/customers/vander_elliot/initial_scoping.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+
+def app():
+ # Check how many properties there are at EPC F/G in Birmingham
+ epc_data = pd.read_csv(
+ "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+ low_memory=False
+ )
+
+ # Filter on entries where we have a UPRN
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])]
+
+ one_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(1 * 365))
+ epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= one_years_ago]