# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes import inspect import pandas as pd from etl.epc.settings import EARLIEST_EPC_DATE from pathlib import Path from utils.s3 import save_csv_to_s3 src_file_path = inspect.getfile(lambda: None) EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" USER_ID = 8 PORTFOLIO_ID = -1 def app(): """ This application is tasked with pulling a large quantity of data from the find my epc website, containing the estimated energy consumption for properties :return: """ lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv" data = pd.read_csv(lewes_directory, low_memory=False) # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] data = data[~pd.isnull(data["uprn"])] data = data[data["current-energy-efficiency"].astype(float) < 52] data = data.sample(10) # Create an asset list asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"}) asset_list["uprn"] = asset_list["uprn"].astype(str) filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( dataframe=asset_list, bucket_name="retrofit-plan-inputs-dev", file_name=filename ) body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increasing EPC", "goal_value": "B", "trigger_file_path": filename, "already_installed_file_path": "", "patches_file_path": "", "non_invasive_recommendations_file_path": "", "budget": None, } print(body)