import os import time import re from etl.epc.settings import EARLIEST_EPC_DATE from dotenv import load_dotenv from tqdm import tqdm import pandas as pd import numpy as np from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from etl.spatial.OpenUprnClient import OpenUprnClient from backend.SearchEpc import SearchEpc from utils.s3 import save_csv_to_s3 load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") USER_ID = 8 PORTFOLIO_ID = 117 CAHA_PORTFOLIO_ID = 118 def hornsey(): """ This script prepares the asset lists for the additional housing associations, CAHA and Hornsey Housing Trust, that are forming a consortium led by AIHA :return: """ hornsey_asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing " "Trust.xlsx", sheet_name="Ksquared-All units information", header=3 ) # We don't need the first row hornsey_asset_list = hornsey_asset_list.iloc[1:] # Fill NA values with empty strings hornsey_asset_list = hornsey_asset_list.fillna("") hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype( str ).str.strip() hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip() hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip() # Replace double spaces for col in ["Address letter or number", "Street address", "Postcode"]: hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ") hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""] hornsey_asset_list["Wall Type Cleaned"] = np.where( "Cavity" in hornsey_asset_list["Wall type"], "Cavity", "Solid" ) missed_uprns = { "Flat 13A Stowell House": 100021213098, "Flat 24 Stowell House": 100021213110, "Flat 1 36 Haringey Park": None } extracted_data = [] asset_list = [] hornsey_asset_list["row_id"] = hornsey_asset_list.index for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)): if home["Address letter or number"] == "Flat 1 36 Haringey Park": continue # Some properties do not have an epc if not home["Energy starting band (EPC)"]: asset_list.append( { "uprn": missed_uprns[home["Address letter or number"]], "address": home["Address letter or number"], "postcode": home["Postcode"], "property_type": "Flat", # They're all flats } ) continue unit_number = home["Address letter or number"] street = home["Street address"] postcode = home["Postcode"] address = ", ".join([x for x in [unit_number, street] if x]) find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode) find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() time.sleep(0.5) # We need uprn searcher = SearchEpc( address1=address, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="", full_address=address, ) searcher.find_property(skip_os=True) newest_epc = searcher.newest_epc if newest_epc["current-energy-efficiency"] != home["Energy starting band (EPC)"].split("-")[1]: raise Exception("Something went wrong with the EPC data") extracted_data.append( { "uprn": newest_epc["uprn"], **find_epc_data, "hotwater-description": newest_epc["hotwater-description"], } ) asset_list.append( { "uprn": newest_epc["uprn"], "row_id": home["row_id"], "address": home["Address letter or number"], "postcode": home["Postcode"], "property_type": "Flat", # They're all flats } ) # Get conservation area data # uprns = [x["uprn"] for x in extracted_data] # conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev") # # addresses = pd.DataFrame(asset_list) # addresses["uprn"] = addresses["uprn"].astype(int) # conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN") # conservation_area_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv" # ) # We format the extracted data so that is has the same structure as non-intrusive recommendations # We then get the UPRNs and create the asset list non_invasive_recommendations = [ { "uprn": r["uprn"], "recommendations": r["recommendations"] } for r in extracted_data ] for r in non_invasive_recommendations: new_recommendations = [] extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0] for rec in r["recommendations"]: if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat": if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]: continue rec["survey"] = False new_recommendations.append(rec) r["recommendations"] = new_recommendations # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" save_csv_to_s3( dataframe=pd.DataFrame(asset_list), bucket_name="retrofit-plan-inputs-dev", file_name=filename ) # Store the non-invasive recommendations in s3 non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" save_csv_to_s3( dataframe=pd.DataFrame(non_invasive_recommendations), bucket_name="retrofit-plan-inputs-dev", file_name=non_invasive_recommendations_filename ) body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Social", "goal": "Increasing EPC", "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": "", "scenario_name": "Wave 3 Packages", "multi_plan": True, "budget": None, "exclusions": ["boiler_upgrade"] } print(body) def caha(): caha_asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Copy of AIHA - WHSHF Wave 3 bid - Consortium " "member properties - CAHA.xlsx", sheet_name="Ksquared-All units information", header=3 ) caha_asset_list = caha_asset_list.iloc[1:] # Fill NA values with empty strings caha_asset_list = caha_asset_list.fillna("") caha_asset_list["Address letter or number"] = caha_asset_list["Address letter or number"].astype( str ).str.strip() # We Add POstcode as it wasn't populated - split on space and take the last two entries and re-concatenate on space caha_asset_list["Street address"] = caha_asset_list["Street address"].str.strip() caha_asset_list["Postcode"] = caha_asset_list["Street address"].str.split(" ").str[-2:].str.join(" ") # Take just the columns we need caha_asset_list = caha_asset_list[["Address letter or number", "Street address", "Postcode"]] for col in ["Address letter or number", "Street address", "Postcode"]: caha_asset_list[col] = caha_asset_list[col].str.replace(" ", " ") # Pull the data from find my epc remap = { "Flat A, 50 Talbot Road N6 4QP": "50a Talbot Road", "Flat A, 51 First Avenue EN1 1BN": "51a, First Avenue", "Flat B, 51 First Avenue EN1 1BN": "51b, First Avenue" } def remap_address(address): # Match patterns like 'Flat A, 30 Grove Park Road' match = re.match(r'Flat (\w), (\d+) (.+)', address) if match: flat_letter = match.group(1) # e.g., 'A' number = match.group(2) # e.g., '30' rest_of_address = match.group(3) # e.g., 'Grove Park Road' # Format the new address as '30A Grove Park Road' return f"{number}{flat_letter} {rest_of_address}" # If pattern doesn't match, return original address return address caha_asset_list["row_id"] = caha_asset_list.index extracted_data = [] asset_list = [] for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)): if home["Street address"] == "35 Stanford road N11 3HY" and home["Address letter or number"] == "": continue if home["Street address"] == "29 Victoria Avenue N3 1BD" and home["Address letter or number"] == "": continue if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat A": continue if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat C": continue if home["Street address"] == "10 Forest Gardens N17 6XA" and home["Address letter or number"] == "Flat C": continue if home["Street address"] == "219 Cann Hall Road E11 3NJ" and home["Address letter or number"] == "Flat B": continue unit_number = home["Address letter or number"] street = home["Street address"] postcode = home["Postcode"] address = ", ".join([x for x in [unit_number, street] if x]) address = remap.get(address, address) address = address.replace(postcode, "").strip() if "Victoria Avenue" not in address: address = remap_address(address) find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode) find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data(sap_2012_date=EARLIEST_EPC_DATE) time.sleep(0.5) # We need uprn searcher = SearchEpc( address1=address, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="", full_address=address, ) searcher.find_property(skip_os=True) newest_epc = searcher.newest_epc uprn = newest_epc["uprn"] if address in ["Flat D, 11 Victoria Avenue", "Flat B, 11 Victoria Avenue"]: uprn = None extracted_data.append( { "uprn": uprn, **find_epc_data, } ) asset_list.append( { "row_id": home["row_id"], "uprn": uprn, "address": address, "postcode": home["Postcode"], "property_type": newest_epc["property-type"], "wall_type": newest_epc["walls-description"], "built_form": newest_epc["built-form"], "flat_storey_count": newest_epc['flat-storey-count'], } ) # Missing row ids missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]] no_data = [x for x in asset_list if x["uprn"] in [None, ""]] no_data = pd.DataFrame(no_data) # Get conservation area data uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]] conservation_area_data = OpenUprnClient.get_spatial_data([36284], "retrofit-data-dev") addresses = pd.DataFrame(asset_list) addresses["uprn"] = addresses["uprn"].astype(str) conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str) conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN") conservation_area_df.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv" ) non_invasive_recommendations = [ { "uprn": r["uprn"], "recommendations": r["recommendations"] } for r in extracted_data ] # for r in non_invasive_recommendations: # new_recommendations = [] # extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0] # for rec in r["recommendations"]: # if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat": # if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]: # continue # rec["survey"] = False # new_recommendations.append(rec) # r["recommendations"] = new_recommendations # We model the two properties separately asset_list = pd.DataFrame(asset_list) # Drop Flat D, 11 Victoria Avenue asset_list1 = asset_list[asset_list["address"] != "Flat D, 11 Victoria Avenue"] asset_list2 = asset_list[asset_list["address"] == "Flat D, 11 Victoria Avenue"] # Store the asset list in s3 filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list1.csv" save_csv_to_s3( dataframe=asset_list1, bucket_name="retrofit-plan-inputs-dev", file_name=filename ) filename2 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list2.csv" save_csv_to_s3( dataframe=asset_list2, bucket_name="retrofit-plan-inputs-dev", file_name=filename2 ) # Store the non-invasive recommendations in s3 non_invasive_recommendations_filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/non_invasive_recommendations.csv" save_csv_to_s3( dataframe=pd.DataFrame(non_invasive_recommendations), bucket_name="retrofit-plan-inputs-dev", file_name=non_invasive_recommendations_filename ) body = { "portfolio_id": str(CAHA_PORTFOLIO_ID), "housing_type": "Social", "goal": "Increasing EPC", "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": "", "scenario_name": "Wave 3 Packages", "multi_plan": True, "budget": None, "exclusions": ["boiler_upgrade"] } print(body) body2 = { "portfolio_id": str(CAHA_PORTFOLIO_ID), "housing_type": "Social", "goal": "Increasing EPC", "goal_value": "C", "trigger_file_path": filename2, "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": "", "scenario_name": "Wave 3 Packages", "multi_plan": True, "budget": None, "exclusions": ["boiler_upgrade"] } print(body2) # asset_list3 = [ { "address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197 } ] filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv" save_csv_to_s3( dataframe=pd.DataFrame(asset_list3), bucket_name="retrofit-plan-inputs-dev", file_name=filename3 ) body3 = { "portfolio_id": str(119), "housing_type": "Social", "goal": "Increasing EPC", "goal_value": "C", "trigger_file_path": filename3, "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": "", "valuation_file_path": "", "scenario_name": "Wave 3 Packages", "multi_plan": True, "budget": None, "exclusions": ["boiler_upgrade"] } print(body3)