mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Created script to create sample input file
This commit is contained in:
parent
b2142a7f8e
commit
fec6fab42a
5 changed files with 104 additions and 3 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
0
etl/testing_data/__init__.py
Normal file
0
etl/testing_data/__init__.py
Normal file
71
etl/testing_data/engine_inputs.py
Normal file
71
etl/testing_data/engine_inputs.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
FILE_SIZE = 100
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
|
||||
USER_ID = 2
|
||||
PORTFOLIO_ID = 47
|
||||
|
||||
|
||||
def app():
|
||||
starting_csv = pd.read_csv("input_property_list.csv")
|
||||
|
||||
remaining_files_to_sample = FILE_SIZE - len(starting_csv)
|
||||
|
||||
# For the remaining addresses, 80% of them will be EPC D and below and the remaining 20% will be EPC A and above
|
||||
n_epc_d_below = int(remaining_files_to_sample * 0.8)
|
||||
n_epc_c_above = remaining_files_to_sample - n_epc_d_below
|
||||
|
||||
n_g = int(np.ceil(n_epc_d_below / 4))
|
||||
n_f = int(np.ceil(n_epc_d_below / 4))
|
||||
n_e = int(np.ceil(n_epc_d_below / 4))
|
||||
n_d = int(np.ceil(n_epc_d_below / 4))
|
||||
n_c = int(np.ceil(n_epc_c_above / 3))
|
||||
n_b = int(np.ceil(n_epc_c_above / 3))
|
||||
n_a = int(np.ceil(n_epc_c_above / 3))
|
||||
|
||||
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
g_data = epc_client.domestic.search(params={"energy-band": "g"}, size=n_g)
|
||||
f_data = epc_client.domestic.search(params={"energy-band": "f"}, size=n_f)
|
||||
e_data = epc_client.domestic.search(params={"energy-band": "e"}, size=n_e)
|
||||
d_data = epc_client.domestic.search(params={"energy-band": "d"}, size=n_d)
|
||||
c_data = epc_client.domestic.search(params={"energy-band": "c"}, size=n_c)
|
||||
b_data = epc_client.domestic.search(params={"energy-band": "b"}, size=n_b)
|
||||
a_data = epc_client.domestic.search(params={"energy-band": "a"}, size=n_a)
|
||||
|
||||
# Combine the final data
|
||||
final_data = (
|
||||
g_data["rows"] + f_data["rows"] + e_data["rows"] + d_data["rows"] + c_data["rows"] + b_data["rows"]
|
||||
+ a_data["rows"]
|
||||
)
|
||||
|
||||
final_csv_data = pd.DataFrame(
|
||||
[{"address": x["address"], "postcode": x["postcode"], "Notes": None} for x in final_data]
|
||||
)
|
||||
|
||||
final_csv_data = pd.concat([starting_csv, final_csv_data]).reset_index(drop=True)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=final_csv_data,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "B",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
32
utils/s3.py
32
utils/s3.py
|
|
@ -1,5 +1,5 @@
|
|||
import boto3
|
||||
from io import BytesIO
|
||||
from io import BytesIO, StringIO
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
import pandas as pd
|
||||
from utils.logger import setup_logger
|
||||
|
|
@ -113,3 +113,33 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
|
|||
df = pd.read_parquet(parquet_buffer)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def save_csv_to_s3(dataframe, bucket_name, file_name):
|
||||
"""
|
||||
Save a Pandas DataFrame to a CSV file in an S3 bucket.
|
||||
|
||||
Parameters:
|
||||
dataframe (pd.DataFrame): The Pandas DataFrame to save.
|
||||
bucket_name (str): The name of the S3 bucket.
|
||||
file_name (str): The name of the file to save in the S3 bucket.
|
||||
|
||||
Returns:
|
||||
bool: True if the file was successfully saved, False otherwise.
|
||||
"""
|
||||
# Initialize S3 client
|
||||
s3 = boto3.client('s3')
|
||||
|
||||
# Create an in-memory text stream
|
||||
csv_buffer = StringIO()
|
||||
|
||||
# Save DataFrame to buffer
|
||||
dataframe.to_csv(csv_buffer, index=False)
|
||||
|
||||
# Upload buffer contents to S3
|
||||
try:
|
||||
s3.put_object(Body=csv_buffer.getvalue(), Bucket=bucket_name, Key=file_name)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return False
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue