From 17b5464acf17883cd37ea405b2ba3f8f9bd6acb0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 14:38:34 +0100 Subject: [PATCH] got the dockerfile working completely --- .../Dockerfiles/Dockerfile.prediction.lambda | 8 ++++---- .../simulation_system/handlers/predictions_app.py | 7 ++----- model_data/simulation_system/predictions.py | 10 ++++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda index 95da89c4..f3708ed9 100644 --- a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda +++ b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda @@ -2,14 +2,14 @@ FROM public.ecr.aws/lambda/python:3.10 # Set the working directory WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system +ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system" # Install necessary build tools - required to test locally RUN yum install -y gcc python3-devel # Install python packages -COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt -COPY requirements/predictions/predictions-dev.txt ./simulation_system/requirements.txt -RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt +COPY requirements/predictions/predictions.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt # Copy the project code to the working directory COPY ./core ./core @@ -19,4 +19,4 @@ COPY ./handlers/predictions_app.py ./predictions_app.py COPY ./__init__.py ./__init__.py # Run off a lambda trigger -CMD [ "predictions_app.handler" ] +CMD [ "simulation_system.predictions_app.handler" ] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 6e827388..c7e61338 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -5,12 +5,10 @@ from predictions import prediction RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") -def handler(): +def handler(event, context): """ Take in event and trigger the prediction pipeline """ - print("HIHIHI") - event = {} # Assuming a file in a bucket landing for now? # Assuming we have a model to use @@ -19,7 +17,6 @@ def handler(): # key = urllib.parse.unquote_plus( # event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8" # ) - payload = event["body"] data_path = payload["file_location"] property_id = payload["property_id"] @@ -32,7 +29,7 @@ def handler(): # model_path = os.environ.get("MODEL_PATH", "http://minio:9000/data/model_directory/") model_path = os.environ.get( "MODEL_PATH", - "s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" + f"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" "-2023-08-30_11-43-41/deployment/", ) diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index 22104993..285edafb 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -23,6 +23,7 @@ from core.Settings import ( TIMESTAMP = datetime.now().strftime(TIMESTAMP_FORMAT) RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") + # FOR TESTING # For now just loading data first and then passing into function (i.e. as if we receive json data and convert to # DataFrame) @@ -76,7 +77,7 @@ def prediction( exit(1) elif RUNTIME_ENVIRONMENT == "dev": registry_path = ( - "s3://retrofit-model-directory-dev/RDSAP_CHANGE/model_registry.csv" + "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv" ) else: raise NotImplemented("TO be implemented") @@ -108,7 +109,7 @@ def prediction( if data_path and data is None: logger.info("Loading data from provided path") dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT) - data = dataloader.load(filepath=data_path, index_col="UPRN") + data = dataloader.load(filepath=data_path, index_col="id") if data is None: raise ValueError("No data loaded") @@ -134,7 +135,7 @@ def prediction( logger.info("--- Generating Predictions ---") prediction = model.generate_predictions(data=data) - return pd.concat([data["recommendation_id"], prediction], axis=1) + return pd.concat([data["id"], prediction], axis=1) # Save prediction some where? # prediction.to_csv("s3?") @@ -167,7 +168,8 @@ if __name__ == "__main__": args = ingest_arguments() # Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}' - # Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet + # Data path can be passed as so: python3 predictions.py --data-path + # ./model_build_data/change_data/rdsap_full/test_data.parquet prediction( target_column=args.target_column, model_path=args.model_path,