fixed minor bug with reading in model

2026-07-27 23:35:01 +00:00 · 2023-09-05 12:18:42 +01:00 · 2023-09-05 12:18:42 +01:00 · 244665c27f
commit 244665c27f
parent abb0e23c9f
4 changed files with 41 additions and 20 deletions
--- a/model_data/simulation_system/.gitignore
+++ b/model_data/simulation_system/.gitignore
@ -0,0 +1 @@
+local_model/*
--- a/model_data/simulation_system/MLModel/Models.py
+++ b/model_data/simulation_system/MLModel/Models.py
@ -68,7 +68,7 @@ class AutogluonModel:
        else:
            logger.info(f"Loading model from s3")
            client.download_model(filepath=filepath, model_folder=model_folder)
-            self.model = TabularPredictor.load(path=model_folder)
+            self.model = TabularPredictor.load(path=str(Path(model_folder) / filepath))

    def save_model(self, output_filepath: Path, client: BotoClient) -> None:
        """
--- a/model_data/simulation_system/handlers/predictions_app.py
+++ b/model_data/simulation_system/handlers/predictions_app.py
@ -1,4 +1,7 @@
+import boto3
+from botocore.exceptions import NoCredentialsError
 import json
+from io import StringIO
 import os
 import logging

@ -10,6 +13,33 @@ logger.setLevel(logging.INFO)
 RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")


+def upload_dataframe_to_s3(df, bucket, s3_file_name):
+    """
+    Upload a pandas DataFrame to an S3 bucket as CSV
+
+    :param df: DataFrame to upload
+    :param bucket: Bucket to upload to
+    :param s3_file_name: S3 object name
+    :return: True if file was uploaded, else False
+    """
+
+    # Initialize the S3 client
+    s3 = boto3.client('s3')
+    csv_buffer = StringIO()
+
+    # Write the DataFrame to the buffer as CSV
+    df.to_csv(csv_buffer, index=False)
+
+    try:
+        # Upload the CSV from the buffer to S3
+        s3.put_object(Bucket=bucket, Key=s3_file_name, Body=csv_buffer.getvalue())
+        print(f"Successfully uploaded DataFrame to {bucket}/{s3_file_name}")
+        return True
+    except NoCredentialsError:
+        print("Credentials not available")
+        return False
+
+
 def handler(event, context):
    """
    Take in event and trigger the prediction pipeline
@ -32,9 +62,14 @@ def handler(event, context):
        outputs = prediction(model_path=None, data_path=data_path)
        # Store into s3, with key of {portfolio_id}-{property_id}

-        storage_filepath = f"s3://retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}/{portfolio_id}/{property_id}/" \
-                           f"{created_at}.csv"
-        outputs.to_csv(storage_filepath)
+        storage_filepath = f"{portfolio_id}/{property_id}/{created_at}.csv"
+
+        upload_dataframe_to_s3(
+            df=outputs,
+            bucket=f"retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}",
+            s3_file_name=storage_filepath
+        )
+
        return storage_filepath

    except (Exception, KeyError, ValueError) as e:
--- a/model_data/simulation_system/predictions.py
+++ b/model_data/simulation_system/predictions.py
@ -80,17 +80,6 @@ def prediction(
    Main pipeline function
    """

-    if RUNTIME_ENVIRONMENT == "local":
-        registry_path = BASE_REGISTRY_PATH / target_column / REGISTRY_FILE
-
-        if registry_path is None or not registry_path.exists():
-            logger.error("No registry path provided or registry doesn't exist")
-            exit(1)
-    elif RUNTIME_ENVIRONMENT in ["local-mock", "dev"]:
-        registry_path = "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv"
-    else:
-        raise NotImplemented("TO be implemented")
-
    if model_path is not None:
        logger.info("User specified a model to load - ignoring registry")
        model_location = model_path
@ -133,13 +122,10 @@ def prediction(
        if data is None:
            raise ValueError("No data loaded")

-        # # TODO: DOWNSAMPLING DOWN TO JUST USE ONE FOR PREDICTION
-        # data = data.sample(1)
    else:
        logger.info("Using data provided")
        data = json.loads(str(data))
        data = pd.DataFrame([data])
-        print(data)

    logger.info("--- Loading Model ---")

@ -147,8 +133,7 @@ def prediction(
        logger.info("Using an Autogluon model")
        model = AutogluonModel()
    else:
-        logger.error("No other model currently")
-        exit(1)
+        raise ValueError("No other model currently")

    model.load_model(filepath=model_location, client=CLIENT, model_folder="local_model")