diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..0e963140 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..35513387 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index f9e978c6..2d658c04 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -2,6 +2,7 @@ import os
import time
import re
+from urllib.parse import urlencode
import usaddress
import pandas as pd
import numpy as np
@@ -257,6 +258,8 @@ class SearchEpc:
params = {"address": self.address1, "postcode": self.postcode}
url = os.path.join(self.client.domestic.host, "search")
+ if size:
+ url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})
for retry in range(self.max_retries):
try:
diff --git a/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
new file mode 100644
index 00000000..345f0afe
--- /dev/null
+++ b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
@@ -0,0 +1,122 @@
+import os
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+from urllib.parse import urlencode
+from epc_api.client import EpcClient
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+CONFIG = [
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "SETTLE GBIS x 242 ",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "ACIS GBIS x 76",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "SOUTHERN GBIS x 150",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "COMMUNITY HOUSING GBIS x 199",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "EASTLIGHT GBIS x 42",
+ "postcode_column": "Postcode",
+ },
+]
+
+CAVITY_WALL_DESCRIPTIONS = [
+ "Cavity wall, as built, no insulation (assumed)",
+ "Cavity wall, as built, partial insulation (assumed)",
+ "Cavity wall, as built, insulated (assumed)",
+ "Cavity wall, with internal insulation",
+ "Cavity wall, with external insulation",
+]
+
+ROOF_DESCRIPTIONS = [
+ "Pitched, no insulation",
+ "Pitched, no insulation (assumed)",
+ "Pitched, 25 mm loft insulation",
+ "Pitched, 50 mm loft insulation",
+ "Pitched, 75 mm loft insulation",
+ "Pitched, 100 mm loft insulation",
+ "Pitched, 150 mm loft insulation",
+ "Pitched, limited insulation (assumed)",
+ "Pitched, insulated (assumed)",
+]
+
+SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
+
+
+def main():
+ """
+ This application is used to identify additional units that are private rentals or owner occupies that can be
+ included in the route marches
+
+ Required inputs are the following:
+ - An excel file that contains one or many tabs that include the addresses to be visited
+ """
+
+ for config in CONFIG:
+ # Read in the data
+ route_march_addresses = pd.read_excel(
+ config["filepath"],
+ sheet_name=config["tab"],
+ engine="openpyxl"
+ )
+
+ postcodes = route_march_addresses[config["postcode_column"]].unique()
+
+ epcs = []
+ for postcode in postcodes:
+ # Get the EPCs in this postcode
+
+ params = {"postcode": postcode}
+ client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+ url = os.path.join(client.domestic.host, "search")
+ url += "?" + urlencode({k: v for k, v in {"size": 1000}.items() if v})
+ response = client.domestic.call(method="get", url=url, params=params)
+
+ postcode_epcs = pd.DataFrame(response["rows"])
+ # Get the newest EPC, per UPRN
+ postcode_epcs["uprn"] = np.where(
+ pd.isnull(postcode_epcs["uprn"]),
+ postcode_epcs["address"],
+ postcode_epcs["uprn"]
+ )
+ postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
+ postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
+
+ postcode_epcs["Is Cavity Property"] = postcode_epcs["walls-description"].isin(
+ CAVITY_WALL_DESCRIPTIONS
+ ) & (postcode_epcs["current-energy-efficiency"].astype(int) <= 72)
+
+ postcode_epcs["Solar and Loft"] = (postcode_epcs["roof-description"].isin(ROOF_DESCRIPTIONS)) & (
+ postcode_epcs["photo-supply"].isin(["0", "", "0.0"])) & (
+ postcode_epcs["current-energy-efficiency"].astype(int) <= 68
+ )
+
+ postcode_epcs = postcode_epcs[postcode_epcs["Is Cavity Property"] | postcode_epcs["Solar and Loft"]]
+
+ # Remove any social properties
+ postcode_epcs = postcode_epcs[~postcode_epcs["tenure"].isin(SOCIAL_TENURES)]
+
+ epcs.append(postcode_epcs)
+
+ epcs = pd.concat(epcs)
diff --git a/etl/route_march/oo_prs_additional_units/requirements.txt b/etl/route_march/oo_prs_additional_units/requirements.txt
new file mode 100644
index 00000000..fd763a3b
--- /dev/null
+++ b/etl/route_march/oo_prs_additional_units/requirements.txt
@@ -0,0 +1,9 @@
+openpyxl
+epc-api-python==1.0.2
+numpy==2.1.2
+pandas==2.2.3
+usaddress==0.5.11
+fuzzywuzzy==0.18.0
+boto3==1.35.44
+python-dotenv
+tqdm
\ No newline at end of file