Model/etl/testing_data/engine_inputs.py

"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os

import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3

FILE_SIZE = 100
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 2
PORTFOLIO_ID = 47


def app():
    starting_csv = pd.read_csv("input_property_list.csv")

    remaining_files_to_sample = FILE_SIZE - len(starting_csv)

    # For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above
    n_epc_d_below = int(remaining_files_to_sample * 0.8)
    n_epc_c_above = remaining_files_to_sample - n_epc_d_below

    n_g = int(np.ceil(n_epc_d_below / 4))
    n_f = int(np.ceil(n_epc_d_below / 4))
    n_e = int(np.ceil(n_epc_d_below / 4))
    n_d = int(np.ceil(n_epc_d_below / 4))
    n_c = int(np.ceil(n_epc_c_above / 3))
    n_b = int(np.ceil(n_epc_c_above / 3))
    n_a = int(np.ceil(n_epc_c_above / 3))

    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)

    g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
    f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
    e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
    d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
    c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
    b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
    a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)

    # Combine the final data
    final_data = (
        g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
        + a_data["rows"]
    )

    # TODO: For the moment, don't use park homes
    final_csv_data = pd.DataFrame(
        [{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x
         in final_data if
         x["property-type"] not in ["Park home"]]
    )

    final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)

    # Store the data in s3
    filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
    save_csv_to_s3(
        dataframe=final_csv_data,
        bucket_name="retrofit-plan-inputs-dev",
        file_name=filename
    )

    body = {
        "portfolio_id": str(PORTFOLIO_ID),
        "housing_type": "Social",
        "goal": "Increase EPC",
        "goal_value": "B",
        "trigger_file_path": filename
    }
    print(body)