diff --git a/model_data/simulation_system/.gitignore b/model_data/simulation_system/.gitignore new file mode 100644 index 00000000..5ec28995 --- /dev/null +++ b/model_data/simulation_system/.gitignore @@ -0,0 +1 @@ +local_model/* \ No newline at end of file diff --git a/model_data/simulation_system/MLModel/Models.py b/model_data/simulation_system/MLModel/Models.py index 4a4f195e..ac8697d5 100644 --- a/model_data/simulation_system/MLModel/Models.py +++ b/model_data/simulation_system/MLModel/Models.py @@ -68,7 +68,7 @@ class AutogluonModel: else: logger.info(f"Loading model from s3") client.download_model(filepath=filepath, model_folder=model_folder) - self.model = TabularPredictor.load(path=model_folder) + self.model = TabularPredictor.load(path=str(Path(model_folder) / filepath)) def save_model(self, output_filepath: Path, client: BotoClient) -> None: """ diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index c12c3f61..5ea0d997 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -1,4 +1,7 @@ +import boto3 +from botocore.exceptions import NoCredentialsError import json +from io import StringIO import os import logging @@ -10,6 +13,33 @@ logger.setLevel(logging.INFO) RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") +def upload_dataframe_to_s3(df, bucket, s3_file_name): + """ + Upload a pandas DataFrame to an S3 bucket as CSV + + :param df: DataFrame to upload + :param bucket: Bucket to upload to + :param s3_file_name: S3 object name + :return: True if file was uploaded, else False + """ + + # Initialize the S3 client + s3 = boto3.client('s3') + csv_buffer = StringIO() + + # Write the DataFrame to the buffer as CSV + df.to_csv(csv_buffer, index=False) + + try: + # Upload the CSV from the buffer to S3 + s3.put_object(Bucket=bucket, Key=s3_file_name, Body=csv_buffer.getvalue()) + print(f"Successfully uploaded DataFrame to {bucket}/{s3_file_name}") + return True + except NoCredentialsError: + print("Credentials not available") + return False + + def handler(event, context): """ Take in event and trigger the prediction pipeline @@ -32,9 +62,14 @@ def handler(event, context): outputs = prediction(model_path=None, data_path=data_path) # Store into s3, with key of {portfolio_id}-{property_id} - storage_filepath = f"s3://retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}/{portfolio_id}/{property_id}/" \ - f"{created_at}.csv" - outputs.to_csv(storage_filepath) + storage_filepath = f"{portfolio_id}/{property_id}/{created_at}.csv" + + upload_dataframe_to_s3( + df=outputs, + bucket=f"retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}", + s3_file_name=storage_filepath + ) + return storage_filepath except (Exception, KeyError, ValueError) as e: diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index 9f1ff344..f158af1e 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -80,17 +80,6 @@ def prediction( Main pipeline function """ - if RUNTIME_ENVIRONMENT == "local": - registry_path = BASE_REGISTRY_PATH / target_column / REGISTRY_FILE - - if registry_path is None or not registry_path.exists(): - logger.error("No registry path provided or registry doesn't exist") - exit(1) - elif RUNTIME_ENVIRONMENT in ["local-mock", "dev"]: - registry_path = "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv" - else: - raise NotImplemented("TO be implemented") - if model_path is not None: logger.info("User specified a model to load - ignoring registry") model_location = model_path @@ -133,13 +122,10 @@ def prediction( if data is None: raise ValueError("No data loaded") - # # TODO: DOWNSAMPLING DOWN TO JUST USE ONE FOR PREDICTION - # data = data.sample(1) else: logger.info("Using data provided") data = json.loads(str(data)) data = pd.DataFrame([data]) - print(data) logger.info("--- Loading Model ---") @@ -147,8 +133,7 @@ def prediction( logger.info("Using an Autogluon model") model = AutogluonModel() else: - logger.error("No other model currently") - exit(1) + raise ValueError("No other model currently") model.load_model(filepath=model_location, client=CLIENT, model_folder="local_model")