diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..0e963140 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..35513387 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index f9e978c6..2d658c04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -2,6 +2,7 @@ import os import time import re +from urllib.parse import urlencode import usaddress import pandas as pd import numpy as np @@ -257,6 +258,8 @@ class SearchEpc: params = {"address": self.address1, "postcode": self.postcode} url = os.path.join(self.client.domestic.host, "search") + if size: + url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v}) for retry in range(self.max_retries): try: diff --git a/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py new file mode 100644 index 00000000..3bd87a8c --- /dev/null +++ b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py @@ -0,0 +1,240 @@ +import os +import pandas as pd +import numpy as np +from tqdm import tqdm +from dotenv import load_dotenv +from urllib.parse import urlencode +from epc_api.client import EpcClient +from utils.logger import setup_logger +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes + +from recommendations.recommendation_utils import ( + estimate_perimeter, + estimate_external_wall_area, + estimate_number_of_floors +) + +logger = setup_logger() +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +CONFIG = [ + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "SETTLE GBIS x 242 ", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "ACIS GBIS x 76", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "SOUTHERN GBIS x 150", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "COMMUNITY HOUSING GBIS x 199", + "postcode_column": "Postcode", + }, + { + "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing " + "11.11.2024.xlsx", + "tab": "EASTLIGHT GBIS x 42", + "postcode_column": "Postcode", + }, +] + +CAVITY_WALL_DESCRIPTIONS = [ + "Cavity wall, as built, no insulation (assumed)", + "Cavity wall, as built, partial insulation (assumed)", + "Cavity wall, as built, insulated (assumed)", + "Cavity wall, with internal insulation", + "Cavity wall, with external insulation", +] + +ROOF_DESCRIPTIONS = [ + "Pitched, no insulation", + "Pitched, no insulation (assumed)", + "Pitched, 25 mm loft insulation", + "Pitched, 50 mm loft insulation", + "Pitched, 75 mm loft insulation", + "Pitched, 100 mm loft insulation", + "Pitched, 150 mm loft insulation", + "Pitched, limited insulation (assumed)", + "Pitched, insulated (assumed)", +] + +SOCIAL_TENURES = ["Rented (social)", "rental (social)"] + + +def process_postcode_epcs(postcode, client): + params = {"postcode": postcode.rstrip().lstrip()} + url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000}) + response = client.domestic.call(method="get", url=url, params=params) + if "rows" not in response: + logger.warning("No EPCs found for postcode %s", postcode) + return pd.DataFrame() + postcode_epcs = pd.DataFrame(response["rows"]) + + # Processing code here + postcode_epcs["uprn"] = np.where( + pd.isnull(postcode_epcs["uprn"]), + postcode_epcs["address"], + postcode_epcs["uprn"] + ) + postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False) + postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first") + return postcode_epcs + + +def filter_and_prepare_epcs(epcs): + epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & ( + epcs["current-energy-efficiency"].astype(int) <= 72 + ) + epcs["Solar and Loft"] = ( + epcs["roof-description"].isin(ROOF_DESCRIPTIONS) + ) & ( + epcs["photo-supply"].isin(["0", "", "0.0"]) + ) & ( + epcs["current-energy-efficiency"].astype(int) <= 68 + ) + epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]] + epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)] + return epcs + + +def rename_and_add_columns(epcs): + # Retrieve just the data we need + epcs = epcs[ + [ + "uprn", + "address", + "postcode", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type", + # New fields needed + "secondheat-description", + "total-floor-area", + "construction-age-band", + "floor-height", + "number-habitable-rooms", + "mainheat-description", + # + "energy-consumption-current", # kwh/m2 + "tenure", + "Is Cavity Property", + "Solar and Loft", + ] + ] + + epcs = epcs.rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "inspection-date": "Date of last EPC", + "current-energy-efficiency": "SAP score on register", + "current-energy-rating": "EPC rating on register", + "property-type": "Property Type", + "built-form": "Archetype", + "total-floor-area": "Property Floor Area", + "construction-age-band": "Property Age Band", + "floor-height": "Property Floor Height", + "number-habitable-rooms": "Number of Habitable Rooms", + "walls-description": "Wall Construction", + "roof-description": "Roof Construction", + "mainheat-description": "Heating Type", + "secondheat-description": "Secondary Heating", + "transaction-type": "Reason for last EPC", + "energy-consumption-current": "Heat Demand (kWh/m2)", + "tenure": "Tenure" + } + ) + + epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int) + epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float) + + # Add additional columns as in your original code + epcs["Estimated Number of Floors"] = epcs.apply( + lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1 + ) + + epcs["Estimated Perimeter (m)"] = epcs.apply( + lambda x: estimate_perimeter( + x["Property Floor Area"] / x["Estimated Number of Floors"], + x["Number of Habitable Rooms"] / x["Estimated Number of Floors"] + ), axis=1 + ) + epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply( + lambda x: estimate_external_wall_area( + x["Estimated Number of Floors"], + float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4, + x["Estimated Perimeter (m)"], + x["Archetype"] + ), axis=1 + ) + epcs["Roof Insulation Thickness"] = epcs.apply( + lambda x: RoofAttributes(description=x["Roof Construction"]).process()[ + "insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None, + axis=1 + ) + return epcs + + +def main(): + """ + This application is used to identify additional units that are private rentals or owner occupies that can be + included in the route marches + + Required inputs are the following: + - An excel file that contains one or many tabs that include the addresses to be visited + """ + + # This should be set: + output_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx" + ) + client = EpcClient(auth_token=EPC_AUTH_TOKEN) + writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter") + + for config in CONFIG: + logger.info("Processing %s", config["tab"]) + # Read in the data + route_march_addresses = pd.read_excel( + config["filepath"], + sheet_name=config["tab"], + engine="openpyxl" + ) + + postcodes = route_march_addresses[config["postcode_column"]].unique() + + epcs = [] + for postcode in tqdm(postcodes): + postcode_epcs = process_postcode_epcs(postcode, client) + if postcode_epcs.empty: + continue + epcs.append(postcode_epcs) + + # Concatenate all postcodes' data and filter it + epcs = pd.concat(epcs) + epcs = filter_and_prepare_epcs(epcs) + epcs = rename_and_add_columns(epcs) + + sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters + epcs.to_excel(writer, sheet_name=sheet_name, index=False) + + # Save and close the writer outside the loop + writer.close() + logger.info("Data successfully written to %s", output_filepath) diff --git a/etl/route_march/oo_prs_additional_units/requirements.txt b/etl/route_march/oo_prs_additional_units/requirements.txt new file mode 100644 index 00000000..e2f4832c --- /dev/null +++ b/etl/route_march/oo_prs_additional_units/requirements.txt @@ -0,0 +1,10 @@ +openpyxl +epc-api-python==1.0.2 +numpy==2.1.2 +pandas==2.2.3 +usaddress==0.5.11 +fuzzywuzzy==0.18.0 +boto3==1.35.44 +python-dotenv +tqdm +xlsxwriter \ No newline at end of file