Created script to create sample input file

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-11 13:19:23 +08:00
parent b2142a7f8e
commit fec6fab42a
5 changed files with 104 additions and 3 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

View file

@ -0,0 +1,71 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os
import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
FILE_SIZE = 100
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 2
PORTFOLIO_ID = 47
def app():
starting_csv = pd.read_csv("input_property_list.csv")
remaining_files_to_sample = FILE_SIZE - len(starting_csv)
# For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above
n_epc_d_below = int(remaining_files_to_sample * 0.8)
n_epc_c_above = remaining_files_to_sample - n_epc_d_below
n_g = int(np.ceil(n_epc_d_below / 4))
n_f = int(np.ceil(n_epc_d_below / 4))
n_e = int(np.ceil(n_epc_d_below / 4))
n_d = int(np.ceil(n_epc_d_below / 4))
n_c = int(np.ceil(n_epc_c_above / 3))
n_b = int(np.ceil(n_epc_c_above / 3))
n_a = int(np.ceil(n_epc_c_above / 3))
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)
# Combine the final data
final_data = (
g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
+ a_data["rows"]
)
final_csv_data = pd.DataFrame(
[{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x in final_data]
)
final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=final_csv_data,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename
}

View file

@ -1,5 +1,5 @@
import boto3
from io import BytesIO
from io import BytesIO, StringIO
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
import pandas as pd
from utils.logger import setup_logger
@ -113,3 +113,33 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
df = pd.read_parquet(parquet_buffer)
return df
def save_csv_to_s3(dataframe, bucket_name, file_name):
"""
Save a Pandas DataFrame to a CSV file in an S3 bucket.
Parameters:
dataframe (pd.DataFrame): The Pandas DataFrame to save.
bucket_name (str): The name of the S3 bucket.
file_name (str): The name of the file to save in the S3 bucket.
Returns:
bool: True if the file was successfully saved, False otherwise.
"""
# Initialize S3 client
s3 = boto3.client('s3')
# Create an in-memory text stream
csv_buffer = StringIO()
# Save DataFrame to buffer
dataframe.to_csv(csv_buffer, index=False)
# Upload buffer contents to S3
try:
s3.put_object(Body=csv_buffer.getvalue(), Bucket=bucket_name, Key=file_name)
return True
except Exception as e:
print(f"An error occurred: {e}")
return False