fixed minor bug with reading in model

This commit is contained in:
Khalim Conn-Kowlessar 2023-09-05 12:18:42 +01:00
parent abb0e23c9f
commit 244665c27f
4 changed files with 41 additions and 20 deletions

View file

@ -0,0 +1 @@
local_model/*

View file

@ -68,7 +68,7 @@ class AutogluonModel:
else:
logger.info(f"Loading model from s3")
client.download_model(filepath=filepath, model_folder=model_folder)
self.model = TabularPredictor.load(path=model_folder)
self.model = TabularPredictor.load(path=str(Path(model_folder) / filepath))
def save_model(self, output_filepath: Path, client: BotoClient) -> None:
"""

View file

@ -1,4 +1,7 @@
import boto3
from botocore.exceptions import NoCredentialsError
import json
from io import StringIO
import os
import logging
@ -10,6 +13,33 @@ logger.setLevel(logging.INFO)
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")
def upload_dataframe_to_s3(df, bucket, s3_file_name):
"""
Upload a pandas DataFrame to an S3 bucket as CSV
:param df: DataFrame to upload
:param bucket: Bucket to upload to
:param s3_file_name: S3 object name
:return: True if file was uploaded, else False
"""
# Initialize the S3 client
s3 = boto3.client('s3')
csv_buffer = StringIO()
# Write the DataFrame to the buffer as CSV
df.to_csv(csv_buffer, index=False)
try:
# Upload the CSV from the buffer to S3
s3.put_object(Bucket=bucket, Key=s3_file_name, Body=csv_buffer.getvalue())
print(f"Successfully uploaded DataFrame to {bucket}/{s3_file_name}")
return True
except NoCredentialsError:
print("Credentials not available")
return False
def handler(event, context):
"""
Take in event and trigger the prediction pipeline
@ -32,9 +62,14 @@ def handler(event, context):
outputs = prediction(model_path=None, data_path=data_path)
# Store into s3, with key of {portfolio_id}-{property_id}
storage_filepath = f"s3://retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}/{portfolio_id}/{property_id}/" \
f"{created_at}.csv"
outputs.to_csv(storage_filepath)
storage_filepath = f"{portfolio_id}/{property_id}/{created_at}.csv"
upload_dataframe_to_s3(
df=outputs,
bucket=f"retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}",
s3_file_name=storage_filepath
)
return storage_filepath
except (Exception, KeyError, ValueError) as e:

View file

@ -80,17 +80,6 @@ def prediction(
Main pipeline function
"""
if RUNTIME_ENVIRONMENT == "local":
registry_path = BASE_REGISTRY_PATH / target_column / REGISTRY_FILE
if registry_path is None or not registry_path.exists():
logger.error("No registry path provided or registry doesn't exist")
exit(1)
elif RUNTIME_ENVIRONMENT in ["local-mock", "dev"]:
registry_path = "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv"
else:
raise NotImplemented("TO be implemented")
if model_path is not None:
logger.info("User specified a model to load - ignoring registry")
model_location = model_path
@ -133,13 +122,10 @@ def prediction(
if data is None:
raise ValueError("No data loaded")
# # TODO: DOWNSAMPLING DOWN TO JUST USE ONE FOR PREDICTION
# data = data.sample(1)
else:
logger.info("Using data provided")
data = json.loads(str(data))
data = pd.DataFrame([data])
print(data)
logger.info("--- Loading Model ---")
@ -147,8 +133,7 @@ def prediction(
logger.info("Using an Autogluon model")
model = AutogluonModel()
else:
logger.error("No other model currently")
exit(1)
raise ValueError("No other model currently")
model.load_model(filepath=model_location, client=CLIENT, model_folder="local_model")