From e22baed16fcf6ce86e38266d557aab3cc529953d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 29 Oct 2024 12:29:24 +0000 Subject: [PATCH] sorted livewest data pull --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../livewest/route_march_2024_10_28.py | 148 ++++++++++++------ .../stonewater/Wave 3 Preparation.py | 2 + 4 files changed, 102 insertions(+), 52 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 762580d9..850c0cda 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index c916a158..e4070118 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/livewest/route_march_2024_10_28.py b/etl/customers/livewest/route_march_2024_10_28.py index c19c78b1..1b259fba 100644 --- a/etl/customers/livewest/route_march_2024_10_28.py +++ b/etl/customers/livewest/route_march_2024_10_28.py @@ -19,6 +19,53 @@ load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +def get_data(asset_list): + epc_data = [] + errors = [] + for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): + try: + postcode = home["Postcode"] + house_number = home["Number"] + full_address = home["Full Address"] + + searcher = SearchEpc( + address1=str(house_number), + postcode=postcode, + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5 + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + # Look for EPC recommendatons + try: + property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) + except: + property_recommendations = {"rows": []} + + epc = { + "row_id": home["row_id"], + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"] + } + + epc_data.append(epc) + except Exception as e: + errors.append(home["row_id"]) + time.sleep(5) + + return epc_data, errors + + def app(): """ This app is EPC pulling data for some properties owned by Livewest @@ -45,56 +92,49 @@ def app(): asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0 ) + asset_list["row_id"] = asset_list.index - epc_data = [] - errors = [] - for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): - try: - postcode = home["Postcode"] - house_number = home["Number"] - full_address = home["Full Address"] + epc_data, errors = get_data(asset_list) - searcher = SearchEpc( - address1=str(house_number), - postcode=postcode, - auth_token=EPC_AUTH_TOKEN, - os_api_key="", - property_type=None, - fast=True, - full_address=full_address, - max_retries=3 - ) - # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None - searcher.ordnance_survey_client.built_form = None + # We now retrieve any failed properties + asset_list_failed = asset_list[asset_list["row_id"].isin(errors)] + epc_data_failed, _ = get_data(asset_list_failed) - searcher.find_property(skip_os=True) - if searcher.newest_epc is None: - continue - - # Look for EPC recommendatons - try: - property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) - except: - property_recommendations = {"rows": []} - - epc = { - "asset_list_address": full_address, - **searcher.newest_epc.copy(), - "recommendations": property_recommendations["rows"] - } - - epc_data.append(epc) - except Exception as e: - errors.append(e) - time.sleep(5) + # Append the failed data to the main data + epc_data.extend(epc_data_failed) epc_df = pd.DataFrame(epc_data) + # We expand out the recommendations + recommendations_df = epc_df[["row_id", "recommendations"]] + + unique_recommendations = set() + for _, row in recommendations_df.iterrows(): + unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]]) + + columns = ["row_id"] + list(unique_recommendations) + transformed_data = [] + for _, row in recommendations_df.iterrows(): + # Initialize a dictionary for this row with False for all recommendations + row_data = {col: False for col in columns} + row_data["row_id"] = row["row_id"] + + # Set True for each recommendation present in this row + for rec in row["recommendations"]: + recommendation_text = rec["improvement-summary-text"] + row_data[recommendation_text] = True + + # Append the row data to transformed_data + transformed_data.append(row_data) + + transformed_df = pd.DataFrame(transformed_data) + # Drop the column that is "" + transformed_df = transformed_df.drop(columns=[""]) + # Retrieve just the data we need epc_df = epc_df[ [ - "asset_list_address", + "row_id", "uprn", "property-type", "built-form", @@ -110,7 +150,7 @@ def app(): "construction-age-band", "floor-height", "number-habitable-rooms", - "mainheat-description" + "mainheat-description", # "energy-consumption-current", # kwh/m2 ] @@ -119,11 +159,14 @@ def app(): asset_list = asset_list.merge( epc_df, how="left", - left_on=["ADDRESS"], - right_on=["asset_list_address"] + on="row_id" + ).merge( + transformed_df, + how="left", + on="row_id" ) - asset_list = asset_list.drop(columns=["asset_list_address"]) + asset_list = asset_list.drop(columns=["row_id"]) # Rename the columns asset_list = asset_list.rename(columns={ @@ -140,14 +183,18 @@ def app(): "roof-description": "Roof Construction", "mainheat-description": "Heating Type", "secondheat-description": "Secondary Heating", - "transaction-type": "Reason for last EPC" + "transaction-type": "Reason for last EPC", + "energy-consumption-current": "Heat Demand (kWh/m2)" }) asset_list["Estimated Number of Floors"] = asset_list.apply( - lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1 + lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull( + x["Property Type"]) else None, axis=1 ) asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float) + # Replace "" value with None + asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None) asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float) asset_list["Estimated Perimeter (m)"] = asset_list.apply( @@ -157,7 +204,7 @@ def app(): ), axis=1 ) - asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply( + asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply( lambda x: estimate_external_wall_area( num_floors=x["Estimated Number of Floors"], floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5, @@ -168,10 +215,11 @@ def app(): ) asset_list["Roof Insulation Thickness"] = asset_list.apply( - lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"], + lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull( + x["Roof Construction"]) else None, axis=1 ) # Store as an excel - filename = "LHP EPC Data pull.xlsx" + filename = "livewest EPC Data pull - 29 Oct.xlsx" asset_list.to_excel(filename, index=False) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index a8e06416..d8d01b22 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -283,6 +283,8 @@ def main(): extracted_data.append(summary_data) extracted_data = pd.DataFrame(extracted_data) + # Save this as a csv + # extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False) missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()]