Merge pull request #54 from Hestia-Homes/master

Got deployment working
2026-08-02 12:58:26 +00:00 · 2023-10-03 17:09:36 +01:00 · 2023-10-03 17:09:36 +01:00 · f8409ac63b
commit f8409ac63b
parent 97c1469451 fd07605502
6 changed files with 98 additions and 70 deletions
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@ -18,7 +18,8 @@ RUN pip install --no-cache-dir -r ./requirements.txt
 # Copy the project code
 COPY modules/ml-pipeline/src/pipeline ./pipeline
 # Copy the handler
-COPY deployment/handlers/prediction_app.py prediction_app.py
+COPY deployment/handlers/prediction_app.py ./pipeline/prediction_app.py
+WORKDIR ${LAMBDA_TASK_ROOT}/pipeline


 CMD [ "prediction_app.handler" ]
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@ -9,12 +9,14 @@ import json
 from io import StringIO
 import os
 import logging
+from generate_predictions import generate_predictions
+from core.MLModels import model_factory
+from config import settings
+from core.DataClient import dataclient_factory

 logger = logging.getLogger()
 logger.setLevel(logging.INFO)

-RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")
-

 def upload_dataframe_to_s3(df, bucket, s3_file_name):
    """
@ -57,7 +59,6 @@ def handler(event, context):
            else event["body"]
        )

-        data_path = body["file_location"]
        property_id = body["property_id"]
        portfolio_id = body["portfolio_id"]
        created_at = body["created_at"]
@ -66,11 +67,39 @@ def handler(event, context):

        storage_filepath = f"{portfolio_id}/{property_id}/{created_at}.csv"

-        # upload_dataframe_to_s3(
-        #     df=outputs,
-        #     bucket=f"retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}",
-        #     s3_file_name=storage_filepath
-        # )
+        logger.info("-------------------------")
+        logger.info(f"--- Initiate MLModel ---")
+        logger.info("-------------------------")
+
+        build_model_params = settings.build_model
+        client_params = settings.client
+        feature_process_params = settings.feature_processor
+        generate_predictions_params = settings.generate_predictions
+
+        model = model_factory(build_model_params["model_type"])
+
+        input_dataclient = dataclient_factory(
+            dataclient_type="aws-s3",
+            dataclient_config=client_params["aws-s3"],
+        )
+
+        output_dataclient = dataclient_factory(
+            dataclient_type="aws-s3",
+            dataclient_config=client_params["aws-s3"],
+        )
+
+        generate_predictions(
+            input_dataclient=input_dataclient,
+            output_dataclient=output_dataclient,
+            model=model,
+            target=feature_process_params["feature_processor_config"]["target"],
+            model_filepath=build_model_params["model_save_filepath"],
+            test_data_filepath=body["file_location"],
+            predictions_output_filepath=storage_filepath,
+            predictions_column_name=generate_predictions_params[
+                "predictions_column_name"
+            ],
+        )

        return {
            "statusCode": 200,
--- a/modules/ml-pipeline/.gitignore
+++ b/modules/ml-pipeline/.gitignore
@ -3,3 +3,4 @@
 __pycache__/
 .DS_Store
 .vscode/
+data/
--- a/modules/ml-pipeline/src/pipeline/3_generate_predictions.py
+++ b/modules/ml-pipeline/src/pipeline/3_generate_predictions.py
@ -4,16 +4,11 @@ After the model is built, we can evaluate its performance
 """

 import os
-import yaml
-import pandas as pd
-from pathlib import Path
-from core.interface.InterfaceModels import MLModel
-from core.interface.InterfaceDataClient import DataClient
 from core.DataClient import dataclient_factory
 from core.MLModels import model_factory
 from core.Logger import logger
-from configs.post_prediction_logic import post_prediction_logic
 from config import settings
+from generate_predictions import generate_predictions

 logger.info("----------------------------")
 logger.info(f"--- Initiate Parameters ---")
@ -62,58 +57,6 @@ output_dataclient = dataclient_factory(
 )


-def generate_predictions(
-    input_dataclient: DataClient,
-    output_dataclient: DataClient,
-    model: MLModel,
-    target: str,
-    model_filepath: str,
-    test_data_filepath: str,
-    predictions_output_filepath: str,
-    predictions_column_name: str,
-):
-    """
-    For a given model, we generate prediction and evaluate this against the true target
-    """
-
-    logger.info("-------------------------")
-    logger.info("--- Loading test data ---")
-    logger.info("-------------------------")
-
-    test_data = input_dataclient.load_data(
-        location=test_data_filepath, load_config=None
-    )
-
-    logger.info("---------------------")
-    logger.info("--- Loading model ---")
-    logger.info("---------------------")
-
-    model.load_model(model_filepath)
-
-    logger.info("------------------------------")
-    logger.info("--- Generating predictions ---")
-    logger.info("------------------------------")
-
-    prediction_data = (
-        test_data.drop(columns=target) if target in test_data.columns else test_data
-    )
-
-    predictions = model.predict(
-        data=prediction_data, post_prediction_logic=post_prediction_logic
-    )
-
-    logger.info("--------------------------")
-    logger.info("--- Saving predictions ---")
-    logger.info("--------------------------")
-
-    predictions_df = pd.DataFrame(predictions)
-    predictions_df.columns = [predictions_column_name]
-
-    output_dataclient.save_data(
-        obj=predictions_df, location=predictions_output_filepath, save_config=None
-    )
-
-
 if __name__ == "__main__":

    logger.info("----------------------------")
--- a/modules/ml-pipeline/src/pipeline/data/.gitignore
+++ b/modules/ml-pipeline/src/pipeline/data/.gitignore
@ -1,3 +0,0 @@
-/prepared_data
-/model
-/predictions
--- a/modules/ml-pipeline/src/pipeline/generate_predictions.py
+++ b/modules/ml-pipeline/src/pipeline/generate_predictions.py
@ -0,0 +1,57 @@
+import pandas as pd
+from configs.post_prediction_logic import post_prediction_logic
+from core.interface.InterfaceModels import MLModel
+from core.interface.InterfaceDataClient import DataClient
+from core.Logger import logger
+
+
+def generate_predictions(
+    input_dataclient: DataClient,
+    output_dataclient: DataClient,
+    model: MLModel,
+    target: str,
+    model_filepath: str,
+    test_data_filepath: str,
+    predictions_output_filepath: str,
+    predictions_column_name: str,
+):
+    """
+    For a given model, we generate prediction and evaluate this against the true target
+    """
+
+    logger.info("-------------------------")
+    logger.info("--- Loading test data ---")
+    logger.info("-------------------------")
+
+    test_data = input_dataclient.load_data(
+        location=test_data_filepath, load_config=None
+    )
+
+    logger.info("---------------------")
+    logger.info("--- Loading model ---")
+    logger.info("---------------------")
+
+    model.load_model(model_filepath)
+
+    logger.info("------------------------------")
+    logger.info("--- Generating predictions ---")
+    logger.info("------------------------------")
+
+    prediction_data = (
+        test_data.drop(columns=target) if target in test_data.columns else test_data
+    )
+
+    predictions = model.predict(
+        data=prediction_data, post_prediction_logic=post_prediction_logic
+    )
+
+    logger.info("--------------------------")
+    logger.info("--- Saving predictions ---")
+    logger.info("--------------------------")
+
+    predictions_df = pd.DataFrame(predictions)
+    predictions_df.columns = [predictions_column_name]
+
+    output_dataclient.save_data(
+        obj=predictions_df, location=predictions_output_filepath, save_config=None
+    )