Model/etl/testing_data/bills_model_testing.py

60 lines
1.8 KiB
Python

# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes
import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
from utils.s3 import save_csv_to_s3
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
USER_ID = 8
PORTFOLIO_ID = -1
def app():
"""
This application is tasked with pulling a large quantity of data from the find my epc website, containing the
estimated energy consumption for properties
:return:
"""
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
data = pd.read_csv(lewes_directory, low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
data = data[data["current-energy-efficiency"].astype(float) < 52]
data = data.sample(10)
# Create an asset list
asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
asset_list["uprn"] = asset_list["uprn"].astype(str)
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)