mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
"""
|
|
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
|
testing
|
|
"""
|
|
import os
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from epc_api.client import EpcClient
|
|
from utils.s3 import save_csv_to_s3
|
|
|
|
FILE_SIZE = 250
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
|
|
USER_ID = 8
|
|
PORTFOLIO_ID = 48
|
|
|
|
|
|
def app():
|
|
starting_csv = pd.read_csv("input_property_list.csv")
|
|
|
|
remaining_files_to_sample = FILE_SIZE - len(starting_csv)
|
|
|
|
# For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above
|
|
n_epc_d_below = int(remaining_files_to_sample * 0.8)
|
|
n_epc_c_above = remaining_files_to_sample - n_epc_d_below
|
|
|
|
n_g = int(np.ceil(n_epc_d_below / 4))
|
|
n_f = int(np.ceil(n_epc_d_below / 4))
|
|
n_e = int(np.ceil(n_epc_d_below / 4))
|
|
n_d = int(np.ceil(n_epc_d_below / 4))
|
|
n_c = int(np.ceil(n_epc_c_above / 3))
|
|
n_b = int(np.ceil(n_epc_c_above / 3))
|
|
n_a = int(np.ceil(n_epc_c_above / 3))
|
|
|
|
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
|
|
|
g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
|
|
f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
|
|
e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
|
|
d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
|
|
c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
|
|
b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
|
|
a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)
|
|
|
|
# Combine the final data
|
|
final_data = (
|
|
g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
|
|
+ a_data["rows"]
|
|
)
|
|
|
|
# TODO: We also take homes with just a specific type of wall
|
|
|
|
final_data = [
|
|
x for x in final_data if ("cavity wall" in x["walls-description"].lower()) or (
|
|
"solid brick" in x["walls-description"].lower()
|
|
) or ("average thermal transmittance" in x["walls-description"].lower())
|
|
]
|
|
|
|
# TODO: For the moment, don't use park homes
|
|
final_csv_data = pd.DataFrame(
|
|
[{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x
|
|
in final_data if
|
|
x["property-type"] not in ["Park home"]]
|
|
)
|
|
|
|
final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)
|
|
|
|
# Store the data in s3
|
|
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
|
|
save_csv_to_s3(
|
|
dataframe=final_csv_data,
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename
|
|
)
|
|
|
|
body = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Social",
|
|
"goal": "Increase EPC",
|
|
"goal_value": "B",
|
|
"trigger_file_path": filename
|
|
}
|
|
print(body)
|