diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..0e963140 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..35513387 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index f9e978c6..2d658c04 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -2,6 +2,7 @@ import os
import time
import re
+from urllib.parse import urlencode
import usaddress
import pandas as pd
import numpy as np
@@ -257,6 +258,8 @@ class SearchEpc:
params = {"address": self.address1, "postcode": self.postcode}
url = os.path.join(self.client.domestic.host, "search")
+ if size:
+ url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})
for retry in range(self.max_retries):
try:
diff --git a/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
new file mode 100644
index 00000000..3bd87a8c
--- /dev/null
+++ b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
@@ -0,0 +1,240 @@
+import os
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from urllib.parse import urlencode
+from epc_api.client import EpcClient
+from utils.logger import setup_logger
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+ estimate_perimeter,
+ estimate_external_wall_area,
+ estimate_number_of_floors
+)
+
+logger = setup_logger()
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+CONFIG = [
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "SETTLE GBIS x 242 ",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "ACIS GBIS x 76",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "SOUTHERN GBIS x 150",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "COMMUNITY HOUSING GBIS x 199",
+ "postcode_column": "Postcode",
+ },
+ {
+ "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+ "11.11.2024.xlsx",
+ "tab": "EASTLIGHT GBIS x 42",
+ "postcode_column": "Postcode",
+ },
+]
+
+CAVITY_WALL_DESCRIPTIONS = [
+ "Cavity wall, as built, no insulation (assumed)",
+ "Cavity wall, as built, partial insulation (assumed)",
+ "Cavity wall, as built, insulated (assumed)",
+ "Cavity wall, with internal insulation",
+ "Cavity wall, with external insulation",
+]
+
+ROOF_DESCRIPTIONS = [
+ "Pitched, no insulation",
+ "Pitched, no insulation (assumed)",
+ "Pitched, 25 mm loft insulation",
+ "Pitched, 50 mm loft insulation",
+ "Pitched, 75 mm loft insulation",
+ "Pitched, 100 mm loft insulation",
+ "Pitched, 150 mm loft insulation",
+ "Pitched, limited insulation (assumed)",
+ "Pitched, insulated (assumed)",
+]
+
+SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
+
+
+def process_postcode_epcs(postcode, client):
+ params = {"postcode": postcode.rstrip().lstrip()}
+ url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
+ response = client.domestic.call(method="get", url=url, params=params)
+ if "rows" not in response:
+ logger.warning("No EPCs found for postcode %s", postcode)
+ return pd.DataFrame()
+ postcode_epcs = pd.DataFrame(response["rows"])
+
+ # Processing code here
+ postcode_epcs["uprn"] = np.where(
+ pd.isnull(postcode_epcs["uprn"]),
+ postcode_epcs["address"],
+ postcode_epcs["uprn"]
+ )
+ postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
+ postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
+ return postcode_epcs
+
+
+def filter_and_prepare_epcs(epcs):
+ epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
+ epcs["current-energy-efficiency"].astype(int) <= 72
+ )
+ epcs["Solar and Loft"] = (
+ epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
+ ) & (
+ epcs["photo-supply"].isin(["0", "", "0.0"])
+ ) & (
+ epcs["current-energy-efficiency"].astype(int) <= 68
+ )
+ epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
+ epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
+ return epcs
+
+
+def rename_and_add_columns(epcs):
+ # Retrieve just the data we need
+ epcs = epcs[
+ [
+ "uprn",
+ "address",
+ "postcode",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type",
+ # New fields needed
+ "secondheat-description",
+ "total-floor-area",
+ "construction-age-band",
+ "floor-height",
+ "number-habitable-rooms",
+ "mainheat-description",
+ #
+ "energy-consumption-current", # kwh/m2
+ "tenure",
+ "Is Cavity Property",
+ "Solar and Loft",
+ ]
+ ]
+
+ epcs = epcs.rename(
+ columns={
+ "address": "Address",
+ "postcode": "Postcode",
+ "inspection-date": "Date of last EPC",
+ "current-energy-efficiency": "SAP score on register",
+ "current-energy-rating": "EPC rating on register",
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "total-floor-area": "Property Floor Area",
+ "construction-age-band": "Property Age Band",
+ "floor-height": "Property Floor Height",
+ "number-habitable-rooms": "Number of Habitable Rooms",
+ "walls-description": "Wall Construction",
+ "roof-description": "Roof Construction",
+ "mainheat-description": "Heating Type",
+ "secondheat-description": "Secondary Heating",
+ "transaction-type": "Reason for last EPC",
+ "energy-consumption-current": "Heat Demand (kWh/m2)",
+ "tenure": "Tenure"
+ }
+ )
+
+ epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int)
+ epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float)
+
+ # Add additional columns as in your original code
+ epcs["Estimated Number of Floors"] = epcs.apply(
+ lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
+ )
+
+ epcs["Estimated Perimeter (m)"] = epcs.apply(
+ lambda x: estimate_perimeter(
+ x["Property Floor Area"] / x["Estimated Number of Floors"],
+ x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
+ ), axis=1
+ )
+ epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
+ lambda x: estimate_external_wall_area(
+ x["Estimated Number of Floors"],
+ float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4,
+ x["Estimated Perimeter (m)"],
+ x["Archetype"]
+ ), axis=1
+ )
+ epcs["Roof Insulation Thickness"] = epcs.apply(
+ lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
+ "insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
+ axis=1
+ )
+ return epcs
+
+
+def main():
+ """
+ This application is used to identify additional units that are private rentals or owner occupies that can be
+ included in the route marches
+
+ Required inputs are the following:
+ - An excel file that contains one or many tabs that include the addresses to be visited
+ """
+
+ # This should be set:
+ output_filepath = (
+ "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx"
+ )
+ client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+ writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
+
+ for config in CONFIG:
+ logger.info("Processing %s", config["tab"])
+ # Read in the data
+ route_march_addresses = pd.read_excel(
+ config["filepath"],
+ sheet_name=config["tab"],
+ engine="openpyxl"
+ )
+
+ postcodes = route_march_addresses[config["postcode_column"]].unique()
+
+ epcs = []
+ for postcode in tqdm(postcodes):
+ postcode_epcs = process_postcode_epcs(postcode, client)
+ if postcode_epcs.empty:
+ continue
+ epcs.append(postcode_epcs)
+
+ # Concatenate all postcodes' data and filter it
+ epcs = pd.concat(epcs)
+ epcs = filter_and_prepare_epcs(epcs)
+ epcs = rename_and_add_columns(epcs)
+
+ sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters
+ epcs.to_excel(writer, sheet_name=sheet_name, index=False)
+
+ # Save and close the writer outside the loop
+ writer.close()
+ logger.info("Data successfully written to %s", output_filepath)
diff --git a/etl/route_march/oo_prs_additional_units/requirements.txt b/etl/route_march/oo_prs_additional_units/requirements.txt
new file mode 100644
index 00000000..e2f4832c
--- /dev/null
+++ b/etl/route_march/oo_prs_additional_units/requirements.txt
@@ -0,0 +1,10 @@
+openpyxl
+epc-api-python==1.0.2
+numpy==2.1.2
+pandas==2.2.3
+usaddress==0.5.11
+fuzzywuzzy==0.18.0
+boto3==1.35.44
+python-dotenv
+tqdm
+xlsxwriter
\ No newline at end of file