diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py index 6ea6962b..26321a41 100644 --- a/etl/customers/stonewater/potential_eco_properties.py +++ b/etl/customers/stonewater/potential_eco_properties.py @@ -1,4 +1,278 @@ +import os +import time +import json +import pandas as pd +import numpy as np +from tqdm import tqdm +from dotenv import load_dotenv +from backend.SearchEpc import SearchEpc +from utils.s3 import read_from_s3, read_pickle_from_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def get_data(asset_list): + epc_data = [] + errors = [] + for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): + try: + postcode = home["Postcode"] + house_number = home["Number"] + full_address = home["Full Address"] + + searcher = SearchEpc( + address1=str(house_number), + postcode=postcode, + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5 + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + # Look for EPC recommendatons + try: + property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) + except: + property_recommendations = {"rows": []} + + epc = { + "row_id": home["row_id"], + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"] + } + + epc_data.append(epc) + except Exception as e: + errors.append(home["row_id"]) + time.sleep(5) + + return epc_data, errors + + def app(): """ This code creates a list of cavity properties, for review """ + + archetyped_properties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - " + "Archetyped V3.1.xlsx", + header=4 + ) + + cavity_descriptions = [ + "Cavity: AsBuilt (1983-1995)", + "Cavity: AsBuilt (Post 1995)", + "Cavity: AsBuilt (Pre 1976)", + "Cavity: AsBuilt (1976-1982)", + ] + + archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions) + # We also identify any properties where properties were found to need cavity wall insulation + + costed_packages = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages " + "20241030 (WIP) Single Model V2.xlsx", + sheet_name="Modelled Packages", + header=13 + ) + + needs_cwi = costed_packages[ + costed_packages["Main Wall Insulation"].isin( + [ + "Poss Extract CWI & Refill (issues identified)", + "CWI RdSAP Default" + ] + ) + ][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID", + "Main Wall Insulation", + "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]] + + # We flag these properties + archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin( + needs_cwi["Archetype ID"] + ) + + archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])] + archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"] + + # this is the big list!!! + features = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - " + "master sheet.csv", + encoding='latin1' + ) + features["Address ID"] = features["Address ID"].astype(str) + + features_to_merge = features[ + [ + "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water", + "Renewables", "Total Floor Area" + ] + ] + + stonewater_cavity_properties = archetyped_properties[ + ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name", + "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"] + ].merge( + features_to_merge, how="left", on="Address ID" + ) + + # We filter this down to the properties that are cavity properties + stonewater_cavity_properties = stonewater_cavity_properties[ + stonewater_cavity_properties["Is Cavity Property"] | + stonewater_cavity_properties["Survey shows CWI needed for Archetype"] + ] + + stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property" + stonewater_cavity_properties["Reason Included"] = np.where( + stonewater_cavity_properties["Survey shows CWI needed for Archetype"] & + ~stonewater_cavity_properties["Is Cavity Property"], + "Survey revealed potential need for CWI or extract and re-fill", + stonewater_cavity_properties["Reason Included"] + ) + stonewater_cavity_properties["Reason Included"] = np.where( + stonewater_cavity_properties["Survey shows CWI needed for Archetype"] & + stonewater_cavity_properties["Is Cavity Property"], + "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property", + stonewater_cavity_properties["Reason Included"] + ) + # We indicate the exact properties that need CWI, based on survey findings + stonewater_cavity_properties["Reason Included"] = np.where( + stonewater_cavity_properties["Address ID"].isin( + needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype( + str).values + ), + "Survey showed this property needs CWI", + stonewater_cavity_properties["Reason Included"] + ) + + stonewater_cavity_properties["Reason Included"] = np.where( + stonewater_cavity_properties["Address ID"].isin( + needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][ + "Address ID"].astype(int).astype(str).values + ), + "Survey showed this property could need extract and re-fill", + stonewater_cavity_properties["Reason Included"] + ) + + # We get the EPC data + epc_data = json.loads( + read_from_s3( + bucket_name="retrofit-data-dev", + s3_file_name="customers/Stonewater/clustering/epc_data.json" + ) + ) + epc_data = pd.DataFrame(epc_data) + + epc_data["uprn"] = np.where( + epc_data["internal_id"] == 1091, + 83143766, + epc_data["uprn"] + ) + + epc_data_batch_2 = read_pickle_from_s3( + s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl", + bucket_name="retrofit-data-dev" + ) + epc_data_batch_2 = pd.DataFrame(epc_data_batch_2) + + complete_epcs = pd.concat([epc_data, epc_data_batch_2]) + + epcs_to_merge = complete_epcs[ + [ + "uprn", + "address", + "postcode", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type", + "secondheat-description", + "total-floor-area", + "construction-age-band", + "floor-height", + "number-habitable-rooms", + "mainheat-description", + "energy-consumption-current" + ] + ].rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "inspection-date": "Date of last EPC", + "current-energy-efficiency": "SAP score on register", + "current-energy-rating": "EPC rating on register", + "property-type": "Property Type", + "built-form": "Archetype", + "total-floor-area": "Property Floor Area", + "construction-age-band": "Property Age Band", + "floor-height": "Property Floor Height", + "number-habitable-rooms": "Number of Habitable Rooms", + "walls-description": "Wall Construction", + "roof-description": "Roof Construction", + "mainheat-description": "Heating Type", + "secondheat-description": "Secondary Heating", + "transaction-type": "Reason for last EPC", + "energy-consumption-current": "Heat Demand (kWh/m2)", + } + ) + # We de-dupe, taking the newest on the date the EPC was lod + epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"]) + epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False) + epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn") + + # Merge the EPCs on, with the data we need + stonewater_cavity_properties = stonewater_cavity_properties.rename( + columns={ + "Age": "Parity - Build Age", + "Property Type": "Parity - Property Type", + "Walls": "Parity - Wall Construction", + "Roofs": "Parity - Roof Construction", + "Glazing": "Parity - Glazing Type", + "Heating": "Parity - Heating Type", + "Main Fuel": "Parity - Main Fuel", + "Hot Water": "Parity - Hot Water", + "Renewables": "Parity - Renewables", + "Total Floor Area": "Parity - Total Floor Area" + } + ).merge( + epcs_to_merge, + how="left", + left_on="UPRN", + right_on="uprn" + ) + + # We now flag the additional properties in the as built list + + additional_properties = features[ + ~features["Address ID"].isin(archetyped_properties["Address ID"].values) + ] + + # Filter on as built cavity properties + additional_properties = additional_properties[ + additional_properties["Walls"].isin( + cavity_descriptions + + ["Cavity: FilledCavity", "Cavity: External", "Cavity: Internal"] + ) + ] + + # Pull the EPCs for these properties + for _, home in tqdm(additional_properties.iterrows()): + full_address = home["Address"] + postcode = home["Postcode"] + address1 = full_address.split(",")[0] diff --git a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt index 97314b32..102f5930 100644 --- a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt +++ b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt @@ -2,3 +2,7 @@ PyPDF2 pandas tqdm openpyxl +boto3 +epc-api-python==1.0.2 +usaddress==0.5.11 +fuzzywuzzy==0.18.0