""" This script will create an input csv for the recommendation engine and upload it to S3, which can be used for testing """ import os import numpy as np import pandas as pd from epc_api.client import EpcClient from utils.s3 import save_csv_to_s3 FILE_SIZE = 250 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None) USER_ID = 8 PORTFOLIO_ID = 48 def app(): starting_csv = pd.read_csv("input_property_list.csv") remaining_files_to_sample = FILE_SIZE - len(starting_csv) # For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above n_epc_d_below = int(remaining_files_to_sample * 0.8) n_epc_c_above = remaining_files_to_sample - n_epc_d_below n_g = int(np.ceil(n_epc_d_below / 4)) n_f = int(np.ceil(n_epc_d_below / 4)) n_e = int(np.ceil(n_epc_d_below / 4)) n_d = int(np.ceil(n_epc_d_below / 4)) n_c = int(np.ceil(n_epc_c_above / 3)) n_b = int(np.ceil(n_epc_c_above / 3)) n_a = int(np.ceil(n_epc_c_above / 3)) epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN) g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g) f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f) e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e) d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d) c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c) b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b) a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a) # Combine the final data final_data = ( g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"] + a_data["rows"] ) # TODO: We also take homes with just a specific type of wall final_data = [ x for x in final_data if ("cavity wall" in x["walls-description"].lower()) or ( "solid brick" in x["walls-description"].lower() ) or ("average thermal transmittance" in x["walls-description"].lower()) ] # TODO: For the moment, don't use park homes final_csv_data = pd.DataFrame( [{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x in final_data if x["property-type"] not in ["Park home"]] ) final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True) # Store the data in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv" save_csv_to_s3( dataframe=final_csv_data, bucket_name="retrofit-plan-inputs-dev", file_name=filename ) body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Social", "goal": "Increase EPC", "goal_value": "B", "trigger_file_path": filename } print(body)