diff --git a/.idea/Model.iml b/.idea/Model.iml index 762580d9..850c0cda 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index c916a158..e4070118 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 367d8c85..f9e978c6 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -256,16 +256,12 @@ class SearchEpc: else: params = {"address": self.address1, "postcode": self.postcode} + url = os.path.join(self.client.domestic.host, "search") + for retry in range(self.max_retries): try: - if "uprn" in params: - # We use the direct call method inside, since we need to implement uprn as a valid - # parameter for the search function - url = os.path.join(self.client.domestic.host, "search") - response = self.client.domestic.call(method="get", url=url, params=params) - else: - response = self.client.domestic.search(params=params, size=size) + response = self.client.domestic.call(method="get", url=url, params=params) if response: self.data = response diff --git a/etl/customers/livewest/route_march_2024_10_28.py b/etl/customers/livewest/route_march_2024_10_28.py new file mode 100644 index 00000000..fff1e7e7 --- /dev/null +++ b/etl/customers/livewest/route_march_2024_10_28.py @@ -0,0 +1,171 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes + +from recommendations.recommendation_utils import ( + estimate_perimeter, + estimate_external_wall_area, + estimate_number_of_floors +) + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is EPC pulling data for some properties owned by Livewest + + Data request contents: + Date of last EPC + Reason for EPC + SAP score on register + Property Type + Property Area + Property Age + Any Dimensions (HLP,PW,RH) + Property Wall Construction + Heating Type + Secondary Heating + Loft Insulation Depth + + Additional if possible: + Heat loss calculations + EPC recommendations + Property UPRN + + """ + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0 + ) + + epc_data = [] + for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): + + postcode = home["Postcode"] + house_number = home["Number"] + full_address = home["Full Address"] + + searcher = SearchEpc( + address1=str(house_number), + postcode=postcode, + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + # Look for EPC recommendatons + try: + property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) + except: + property_recommendations = {"rows": []} + + epc = { + "asset_list_address": full_address, + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"] + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "uprn", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type", + # New fields needed + "secondheat-description", + "total-floor-area", + "construction-age-band", + "floor-height", + "number-habitable-rooms", + "mainheat-description" + # + "energy-consumption-current", # kwh/m2 + ] + ] + + asset_list = asset_list.merge( + epc_df, + how="left", + left_on=["ADDRESS"], + right_on=["asset_list_address"] + ) + + asset_list = asset_list.drop(columns=["asset_list_address"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "inspection-date": "Date of last EPC", + "current-energy-efficiency": "SAP score on register", + "current-energy-rating": "EPC rating on register", + "property-type": "Property Type", + "built-form": "Archetype", + "total-floor-area": "Property Floor Area", + "construction-age-band": "Property Age Band", + "floor-height": "Property Floor Height", + "number-habitable-rooms": "Number of Habitable Rooms", + "walls-description": "Wall Construction", + "roof-description": "Roof Construction", + "mainheat-description": "Heating Type", + "secondheat-description": "Secondary Heating", + "transaction-type": "Reason for last EPC" + }) + + asset_list["Estimated Number of Floors"] = asset_list.apply( + lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1 + ) + + asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float) + asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float) + + asset_list["Estimated Perimeter (m)"] = asset_list.apply( + lambda x: estimate_perimeter( + floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"], + num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"], + ), axis=1 + ) + + asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x["Estimated Number of Floors"], + floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5, + perimeter=x["Estimated Perimeter (m)"], + built_form=x["Archetype"] + ), + axis=1 + ) + + asset_list["Roof Insulation Thickness"] = asset_list.apply( + lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"], + axis=1 + ) + + # Store as an excel + filename = "LHP EPC Data pull.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 988a544a..8e1a7fdb 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -226,5 +226,7 @@ def main(): extracted_data = pd.DataFrame(extracted_data) + missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()] + # if __name__ == "__main__": # main()