Model/etl/testing_data/birmingham_pilot.py

"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os

import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3

FILE_SIZE = 5
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 8
PORTFOLIO_ID = 54


def app():
    # For this dataset, we want 3 properties, all hourses. A mid-terrace, and end-terrace and a semi-detached

    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)

    # Birmingham has a Local Authority Code of E08000025

    # Let's take an EPC D property
    example_1_reponse = epc_client.domestic.search(
        params={
            "local-authority": "E08000025",
            "property-type": "house",
        }
    )

    g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
    f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
    e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
    d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
    c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
    b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
    a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)

    # Combine the final data
    final_data = (
        g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
        + a_data["rows"]
    )

    # TODO: We also take homes with just a specific type of wall

    final_data = [
        x for x in final_data if ("cavity wall" in x["walls-description"].lower()) or (
            "solid brick" in x["walls-description"].lower()
        ) or ("average thermal transmittance" in x["walls-description"].lower())
    ]

    # TODO: For the moment, don't use park homes
    final_csv_data = pd.DataFrame(
        [{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x
         in final_data if
         x["property-type"] not in ["Park home"]]
    )

    final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)

    # Store the data in s3
    filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
    save_csv_to_s3(
        dataframe=final_csv_data,
        bucket_name="retrofit-plan-inputs-dev",
        file_name=filename
    )

    body = {
        "portfolio_id": str(PORTFOLIO_ID),
        "housing_type": "Social",
        "goal": "Increase EPC",
        "goal_value": "B",
        "trigger_file_path": filename
    }
    print(body)