Model/etl/testing_data/bills_model_testing.py

# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes


import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
from utils.s3 import save_csv_to_s3

src_file_path = inspect.getfile(lambda: None)

EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"

USER_ID = 8
PORTFOLIO_ID = -1


def app():
    """
    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
    estimated energy consumption for properties
    :return:
    """

    lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"

    data = pd.read_csv(lewes_directory, low_memory=False)
    # Rename the columns to the same format as the api returns
    data.columns = [c.replace("_", "-").lower() for c in data.columns]

    # Take just date before the date threshold
    data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]

    data = data[~pd.isnull(data["uprn"])]
    data = data[data["current-energy-efficiency"].astype(float) < 52]
    data = data.sample(10)

    # Create an asset list
    asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
    asset_list["uprn"] = asset_list["uprn"].astype(str)

    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
    save_csv_to_s3(
        dataframe=asset_list,
        bucket_name="retrofit-plan-inputs-dev",
        file_name=filename
    )

    body = {
        "portfolio_id": str(PORTFOLIO_ID),
        "housing_type": "Private",
        "goal": "Increasing EPC",
        "goal_value": "B",
        "trigger_file_path": filename,
        "already_installed_file_path": "",
        "patches_file_path": "",
        "non_invasive_recommendations_file_path": "",
        "budget": None,
    }
    print(body)