diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 3b05c6ac..ca0e1cd9 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
-
+
diff --git a/etl/testing_data/__init__.py b/etl/testing_data/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/testing_data/engine_inputs.py b/etl/testing_data/engine_inputs.py
new file mode 100644
index 00000000..507208e3
--- /dev/null
+++ b/etl/testing_data/engine_inputs.py
@@ -0,0 +1,71 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import numpy as np
+import pandas as pd
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+FILE_SIZE = 100
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 2
+PORTFOLIO_ID = 47
+
+
+def app():
+ starting_csv = pd.read_csv("input_property_list.csv")
+
+ remaining_files_to_sample = FILE_SIZE - len(starting_csv)
+
+ # For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above
+ n_epc_d_below = int(remaining_files_to_sample * 0.8)
+ n_epc_c_above = remaining_files_to_sample - n_epc_d_below
+
+ n_g = int(np.ceil(n_epc_d_below / 4))
+ n_f = int(np.ceil(n_epc_d_below / 4))
+ n_e = int(np.ceil(n_epc_d_below / 4))
+ n_d = int(np.ceil(n_epc_d_below / 4))
+ n_c = int(np.ceil(n_epc_c_above / 3))
+ n_b = int(np.ceil(n_epc_c_above / 3))
+ n_a = int(np.ceil(n_epc_c_above / 3))
+
+ epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+
+ g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
+ f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
+ e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
+ d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
+ c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
+ b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
+ a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)
+
+ # Combine the final data
+ final_data = (
+ g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
+ + a_data["rows"]
+ )
+
+ final_csv_data = pd.DataFrame(
+ [{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x in final_data]
+ )
+
+ final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)
+
+ # Store the data in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
+ save_csv_to_s3(
+ dataframe=final_csv_data,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename
+ }
diff --git a/utils/s3.py b/utils/s3.py
index 7414da3f..e63b7192 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -1,5 +1,5 @@
import boto3
-from io import BytesIO
+from io import BytesIO, StringIO
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
import pandas as pd
from utils.logger import setup_logger
@@ -113,3 +113,33 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
df = pd.read_parquet(parquet_buffer)
return df
+
+
+def save_csv_to_s3(dataframe, bucket_name, file_name):
+ """
+ Save a Pandas DataFrame to a CSV file in an S3 bucket.
+
+ Parameters:
+ dataframe (pd.DataFrame): The Pandas DataFrame to save.
+ bucket_name (str): The name of the S3 bucket.
+ file_name (str): The name of the file to save in the S3 bucket.
+
+ Returns:
+ bool: True if the file was successfully saved, False otherwise.
+ """
+ # Initialize S3 client
+ s3 = boto3.client('s3')
+
+ # Create an in-memory text stream
+ csv_buffer = StringIO()
+
+ # Save DataFrame to buffer
+ dataframe.to_csv(csv_buffer, index=False)
+
+ # Upload buffer contents to S3
+ try:
+ s3.put_object(Body=csv_buffer.getvalue(), Bucket=bucket_name, Key=file_name)
+ return True
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ return False