""" This script prepares the data for the principal pitch modelling """ import os import pandas as pd from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") PORTFOLIO_ID = 206 USER_ID = 8 EPC_TARGET = "C" # Read the input file properties = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx" ) # Keep just the D's and below properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy() # Focus on houses properties = properties[properties["property_type_std"] != "Flat"] properties = properties[properties["property_type"] != "flat"] # Rename the key columns properties = properties.rename( columns={ "address1": "address", "number_of_bathrooms": "n_bathrooms", "num_beds": "n_bedrooms" } ) properties["patch"] = True # Pull the non-invasive recommendations asset_list_epc_client = AssetListEpcData( asset_list=properties, epc_auth_token=EPC_AUTH_TOKEN ) asset_list_epc_client.get_data() asset_list_epc_client.get_non_invasive_recommendations() asset_list_epc_client.get_patch() extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data) epc_df = pd.DataFrame(asset_list_epc_client.epc_data) # Find examples where patches are different to the api compare_epc = [] for patch in asset_list_epc_client.patches: extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze() epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze() compare_epc.append( { "uprn": extracted["uprn"], "address": extracted["address"], "postcode": extracted["postcode"], "api_epc": int(extracted["current_epc_efficiency"]), "fme_epc": int(epc["current-energy-efficiency"]), } ) compare_epc = pd.DataFrame(compare_epc) diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]] # Compare matched addresses to make sure they are the same compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge( epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}), how="left", on=["uprn"] ) # Add on uprn properties = properties.merge( extracted_df[["address", "postcode", "uprn"]], how="left", on=["address", "postcode"] ) # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" save_csv_to_s3( dataframe=properties, bucket_name="retrofit-plan-inputs-dev", file_name=filename ) # Store non-invasive recommendations in S3 non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" save_csv_to_s3( dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations), bucket_name="retrofit-plan-inputs-dev", file_name=non_invasive_recommendations_filename ) # Store patches in S3 patches_filename = "" if asset_list_epc_client.patches: patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv" save_csv_to_s3( dataframe=pd.DataFrame(asset_list_epc_client.patches), bucket_name="retrofit-plan-inputs-dev", file_name=patches_filename ) body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": "", "scenario_name": "EPC C", "multi_plan": True, "budget": None, "ashp_cop": 3.5, # This is new - when optimising, we drop scores by a few points to account for SAP 10 "simulate_sap_10": True, "exclusions": ["external_wall_insulation"], "required_measures": ["cavity_wall_insulation", "loft_insulation"] } print(body)