diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..0e963140 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..35513387 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index f9e978c6..2d658c04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -2,6 +2,7 @@ import os import time import re +from urllib.parse import urlencode import usaddress import pandas as pd import numpy as np @@ -257,6 +258,8 @@ class SearchEpc: params = {"address": self.address1, "postcode": self.postcode} url = os.path.join(self.client.domestic.host, "search") + if size: + url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v}) for retry in range(self.max_retries): try: diff --git a/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py new file mode 100644 index 00000000..345f0afe --- /dev/null +++ b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py @@ -0,0 +1,122 @@ +import os +import pandas as pd +import numpy as np +from dotenv import load_dotenv +from urllib.parse import urlencode +from epc_api.client import EpcClient + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +CONFIG = [ + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "SETTLE GBIS x 242 ", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "ACIS GBIS x 76", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "SOUTHERN GBIS x 150", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "COMMUNITY HOUSING GBIS x 199", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "EASTLIGHT GBIS x 42", + "postcode_column": "Postcode", + }, +] + +CAVITY_WALL_DESCRIPTIONS = [ + "Cavity wall, as built, no insulation (assumed)", + "Cavity wall, as built, partial insulation (assumed)", + "Cavity wall, as built, insulated (assumed)", + "Cavity wall, with internal insulation", + "Cavity wall, with external insulation", +] + +ROOF_DESCRIPTIONS = [ + "Pitched, no insulation", + "Pitched, no insulation (assumed)", + "Pitched, 25 mm loft insulation", + "Pitched, 50 mm loft insulation", + "Pitched, 75 mm loft insulation", + "Pitched, 100 mm loft insulation", + "Pitched, 150 mm loft insulation", + "Pitched, limited insulation (assumed)", + "Pitched, insulated (assumed)", +] + +SOCIAL_TENURES = ["Rented (social)", "rental (social)"] + + +def main(): + """ + This application is used to identify additional units that are private rentals or owner occupies that can be + included in the route marches + + Required inputs are the following: + - An excel file that contains one or many tabs that include the addresses to be visited + """ + + for config in CONFIG: + # Read in the data + route_march_addresses = pd.read_excel( + config["filepath"], + sheet_name=config["tab"], + engine="openpyxl" + ) + + postcodes = route_march_addresses[config["postcode_column"]].unique() + + epcs = [] + for postcode in postcodes: + # Get the EPCs in this postcode + + params = {"postcode": postcode} + client = EpcClient(auth_token=EPC_AUTH_TOKEN) + url = os.path.join(client.domestic.host, "search") + url += "?" + urlencode({k: v for k, v in {"size": 1000}.items() if v}) + response = client.domestic.call(method="get", url=url, params=params) + + postcode_epcs = pd.DataFrame(response["rows"]) + # Get the newest EPC, per UPRN + postcode_epcs["uprn"] = np.where( + pd.isnull(postcode_epcs["uprn"]), + postcode_epcs["address"], + postcode_epcs["uprn"] + ) + postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False) + postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first") + + postcode_epcs["Is Cavity Property"] = postcode_epcs["walls-description"].isin( + CAVITY_WALL_DESCRIPTIONS + ) & (postcode_epcs["current-energy-efficiency"].astype(int) <= 72) + + postcode_epcs["Solar and Loft"] = (postcode_epcs["roof-description"].isin(ROOF_DESCRIPTIONS)) & ( + postcode_epcs["photo-supply"].isin(["0", "", "0.0"])) & ( + postcode_epcs["current-energy-efficiency"].astype(int) <= 68 + ) + + postcode_epcs = postcode_epcs[postcode_epcs["Is Cavity Property"] | postcode_epcs["Solar and Loft"]] + + # Remove any social properties + postcode_epcs = postcode_epcs[~postcode_epcs["tenure"].isin(SOCIAL_TENURES)] + + epcs.append(postcode_epcs) + + epcs = pd.concat(epcs) diff --git a/etl/route_march/oo_prs_additional_units/requirements.txt b/etl/route_march/oo_prs_additional_units/requirements.txt new file mode 100644 index 00000000..fd763a3b --- /dev/null +++ b/etl/route_march/oo_prs_additional_units/requirements.txt @@ -0,0 +1,9 @@ +openpyxl +epc-api-python==1.0.2 +numpy==2.1.2 +pandas==2.2.3 +usaddress==0.5.11 +fuzzywuzzy==0.18.0 +boto3==1.35.44 +python-dotenv +tqdm \ No newline at end of file