From 058194fc14128302708e8ed880110ef13b3ebd35 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 12:06:14 +0100 Subject: [PATCH 1/5] working on dockerfile and handler and added new s3 bucket --- infrastructure/terraform/main.tf | 6 ++++++ .../Dockerfiles/Dockerfile.prediction.lambda | 16 +++++++++------- .../handlers/predictions_app.py | 10 ++++++++-- .../requirements/predictions/predictions.txt | 2 ++ 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index a90c4a78..5f623313 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -108,6 +108,12 @@ module "retrofit_sap_predictions" { allowed_origins = var.allowed_origins } +module "retrofit_sap_data" { + source = "./modules/s3" + bucketname = "retrofit-data-${var.stage}" + allowed_origins = var.allowed_origins +} + # Set up the route53 record for the API module "route53" { diff --git a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda index bd7479e6..95da89c4 100644 --- a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda +++ b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda @@ -1,20 +1,22 @@ FROM public.ecr.aws/lambda/python:3.10 # Set the working directory -WORKDIR ${LAMBDA_TASK_ROOT} +WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system # Install necessary build tools - required to test locally -# RUN yum install -y gcc python3-devel +RUN yum install -y gcc python3-devel # Install python packages COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt +COPY requirements/predictions/predictions-dev.txt ./simulation_system/requirements.txt RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt # Copy the project code to the working directory -COPY ./core ./simulation_system/core -COPY ./MLModel ./simulation_system/MLModel -COPY ./predictions.py ./simulation_system/predictions.py -COPY ./handlers/predictions_app.py ./simulation_system/predictions_app.py +COPY ./core ./core +COPY ./MLModel ./MLModel +COPY ./predictions.py ./predictions.py +COPY ./handlers/predictions_app.py ./predictions_app.py +COPY ./__init__.py ./__init__.py # Run off a lambda trigger -CMD [ "simulation_system.predictions_app.handler" ] +CMD [ "predictions_app.handler" ] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 02b1673f..6e827388 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -1,14 +1,16 @@ import os import urllib.parse -from ..predictions import prediction +from predictions import prediction RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") -def handler(event, context): +def handler(): """ Take in event and trigger the prediction pipeline """ + print("HIHIHI") + event = {} # Assuming a file in a bucket landing for now? # Assuming we have a model to use @@ -43,3 +45,7 @@ def handler(event, context): except (Exception, KeyError, ValueError): print("Prediction failed") + + +if __name__ == "__main__": + handler() diff --git a/model_data/simulation_system/requirements/predictions/predictions.txt b/model_data/simulation_system/requirements/predictions/predictions.txt index 07b3e6d2..5d545ccb 100644 --- a/model_data/simulation_system/requirements/predictions/predictions.txt +++ b/model_data/simulation_system/requirements/predictions/predictions.txt @@ -1,3 +1,5 @@ autogluon==0.8.2 pandas==1.5.3 s3fs==2023.6.0 +seaborn==0.12.2 +matplotlib==3.7.2 From 17b5464acf17883cd37ea405b2ba3f8f9bd6acb0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 14:38:34 +0100 Subject: [PATCH 2/5] got the dockerfile working completely --- .../Dockerfiles/Dockerfile.prediction.lambda | 8 ++++---- .../simulation_system/handlers/predictions_app.py | 7 ++----- model_data/simulation_system/predictions.py | 10 ++++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda index 95da89c4..f3708ed9 100644 --- a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda +++ b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda @@ -2,14 +2,14 @@ FROM public.ecr.aws/lambda/python:3.10 # Set the working directory WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system +ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system" # Install necessary build tools - required to test locally RUN yum install -y gcc python3-devel # Install python packages -COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt -COPY requirements/predictions/predictions-dev.txt ./simulation_system/requirements.txt -RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt +COPY requirements/predictions/predictions.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt # Copy the project code to the working directory COPY ./core ./core @@ -19,4 +19,4 @@ COPY ./handlers/predictions_app.py ./predictions_app.py COPY ./__init__.py ./__init__.py # Run off a lambda trigger -CMD [ "predictions_app.handler" ] +CMD [ "simulation_system.predictions_app.handler" ] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 6e827388..c7e61338 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -5,12 +5,10 @@ from predictions import prediction RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") -def handler(): +def handler(event, context): """ Take in event and trigger the prediction pipeline """ - print("HIHIHI") - event = {} # Assuming a file in a bucket landing for now? # Assuming we have a model to use @@ -19,7 +17,6 @@ def handler(): # key = urllib.parse.unquote_plus( # event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8" # ) - payload = event["body"] data_path = payload["file_location"] property_id = payload["property_id"] @@ -32,7 +29,7 @@ def handler(): # model_path = os.environ.get("MODEL_PATH", "http://minio:9000/data/model_directory/") model_path = os.environ.get( "MODEL_PATH", - "s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" + f"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" "-2023-08-30_11-43-41/deployment/", ) diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index 22104993..285edafb 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -23,6 +23,7 @@ from core.Settings import ( TIMESTAMP = datetime.now().strftime(TIMESTAMP_FORMAT) RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") + # FOR TESTING # For now just loading data first and then passing into function (i.e. as if we receive json data and convert to # DataFrame) @@ -76,7 +77,7 @@ def prediction( exit(1) elif RUNTIME_ENVIRONMENT == "dev": registry_path = ( - "s3://retrofit-model-directory-dev/RDSAP_CHANGE/model_registry.csv" + "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv" ) else: raise NotImplemented("TO be implemented") @@ -108,7 +109,7 @@ def prediction( if data_path and data is None: logger.info("Loading data from provided path") dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT) - data = dataloader.load(filepath=data_path, index_col="UPRN") + data = dataloader.load(filepath=data_path, index_col="id") if data is None: raise ValueError("No data loaded") @@ -134,7 +135,7 @@ def prediction( logger.info("--- Generating Predictions ---") prediction = model.generate_predictions(data=data) - return pd.concat([data["recommendation_id"], prediction], axis=1) + return pd.concat([data["id"], prediction], axis=1) # Save prediction some where? # prediction.to_csv("s3?") @@ -167,7 +168,8 @@ if __name__ == "__main__": args = ingest_arguments() # Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}' - # Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet + # Data path can be passed as so: python3 predictions.py --data-path + # ./model_build_data/change_data/rdsap_full/test_data.parquet prediction( target_column=args.target_column, model_path=args.model_path, From 118fec54f6698d56563d8a92d2a7cd88ba50d142 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 14:51:13 +0100 Subject: [PATCH 3/5] testing streamlined approach to pushing and caching in github actions --- .github/workflows/deploy_sap_model_lambda.yml | 33 +++++-------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index c9be9083..113a3ecf 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -22,10 +22,6 @@ jobs: npm install -g serverless npm install -g serverless-domain-manager - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - name: AWS credentials for dev if: github.ref == 'refs/heads/dev' uses: aws-actions/configure-aws-credentials@v1 @@ -54,28 +50,15 @@ jobs: - name: Setup Docker uses: docker/setup-buildx-action@v1 - - name: Cache Docker layers - uses: actions/cache@v2 + # Building and pushing Docker image with caching + - name: Build and push Docker image + uses: docker/build-push-action@v4 with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Build Docker Image - run: | - cd model_data/simulation_system - docker buildx create --use - docker buildx build --load --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache --build-arg BUILDKIT_INLINE_CACHE=1 --tag lambda-sap-prediction-image:${{ github.sha }} -f Dockerfiles/Dockerfile.prediction.lambda . - - - name: Login to ECR - run: | - aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }} - - - name: Tag and Push Docker Image to ECR - run: | - docker tag lambda-sap-prediction-image:${{ github.sha }} ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} - docker push ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + context: ./model_data/simulation_system + push: true + tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max - name: Deploy to AWS Lambda via Serverless env: From e0a7e6dd4b77ecf098ff335749ea3e31e08a2a22 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 14:56:00 +0100 Subject: [PATCH 4/5] specifying the dockerfile --- .github/workflows/deploy_sap_model_lambda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index 113a3ecf..f8b8fde3 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -55,6 +55,7 @@ jobs: uses: docker/build-push-action@v4 with: context: ./model_data/simulation_system + file: Dockerfiles/Dockerfile.prediction.lambda push: true tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} cache-from: type=gha From 28942a9cd6b0e48de6ff700f162635b654797205 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 15:25:03 +0100 Subject: [PATCH 5/5] specifying full path --- .github/workflows/deploy_sap_model_lambda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index f8b8fde3..7a9939aa 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -55,7 +55,7 @@ jobs: uses: docker/build-push-action@v4 with: context: ./model_data/simulation_system - file: Dockerfiles/Dockerfile.prediction.lambda + file: ./model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda push: true tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} cache-from: type=gha