From e2ce04aa0db275c8ce0b26bc419fb06e744f0281 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Oct 2023 11:45:51 +0100 Subject: [PATCH] formatted --- .github/workflows/Deploy.yml | 115 ++++++++++++++++++++++++ README.md | 25 +++++- deployment/Dockerfile.prediction.lambda | 21 +++++ deployment/handlers/prediction_app.py | 93 +++++++++++++++++++ deployment/serverless.yml | 56 ++++++++++++ 5 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/Deploy.yml create mode 100644 deployment/Dockerfile.prediction.lambda create mode 100644 deployment/handlers/prediction_app.py create mode 100644 deployment/serverless.yml diff --git a/.github/workflows/Deploy.yml b/.github/workflows/Deploy.yml new file mode 100644 index 0000000..f97bddc --- /dev/null +++ b/.github/workflows/Deploy.yml @@ -0,0 +1,115 @@ +name: Sap Change Model Deploy + +on: + push: + branches: [ sap_change-dev, sap_change-prod ] + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.10.12 + + - name: Install Serverless and plugins + run: | + npm install -g serverless + npm install -g serverless-domain-manager + + - name: AWS credentials for dev + if: github.ref == 'refs/heads/dev' + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + aws-region: eu-west-2 + + - name: AWS credentials for prod + if: github.ref == 'refs/heads/prod' + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }} + aws-region: eu-west-2 + + # Set up all of the secrets required for the deployment + - name: set secret prefix which is used across multiple steps + id: secret_prefix + run: | + # Convert branch name to uppercase and replace hyphens with underscores + echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')" + + - name: Set domain name + id: set_domain + run: echo "::set-output name=domain::${{ secrets[format('{0}_DOMAIN_NAME', steps.secret_prefix.outputs.secret_prefix)] }}" + + - name: Set ECR credentials + id: set_ecr_credentials + run: | + # Fetch the secret using the secret prefix + echo "::set-output name=ecr_uri::${{ secrets[format('{0}_ECR_URI', steps.secret_prefix.outputs.secret_prefix)] }}" + + - name: Set S3 buckets + id: set_s3_buckets + run: | + # Fetch the secret using the secret prefix + echo "::set-output name=data_bucket::${{ secrets[format('{0}_DATA_BUCKET', steps.secret_prefix.outputs.secret_prefix)] }}" + echo "::set-output name=predictions_bucket::${{ secrets[format('{0}_PREDICTIONS_BUCKET', steps.secret_prefix.outputs.secret_prefix)] }}" + echo "::set-output name=model_directory_bucket::${{ secrets[format('{0}_MODEL_DIRECTORY_BUCKET', steps.secret_prefix.outputs.secret_prefix)] }}" + + - name: Set stack_name + id: set_stack_name + run: | + if [[ "${{ github.ref_name }}" == "sap_change-dev" || "${{ github.ref_name }}" == "sap_change-prod" ]]; then + echo "::set-output name=stack_name::sapmodel" + else + echo "::set-output name=stack_name::" + fi + + - name: Set runtime_environment + id: set_runtime_environment + run: | + # Extract the suffix after the hyphen from the branch name + runtime_environment=$(echo "${{ github.ref_name }}" | awk -F'-' '{print $NF}') + echo "::set-output name=runtime_environment::$runtime_environment" + + + - name: Setup Docker + uses: docker/setup-buildx-action@v1 + + - name: Login to ECR + run: | + aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }} + + # Building and pushing Docker image with caching + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: ./model_data/simulation_system + file: ./model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda + push: true + tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + platform: linux/amd64 + provenance: false + + - name: Deploy to AWS Lambda via Serverless + env: + RUNTIME_ENVIRONMENT: ${{ steps.set_runtime_environment.outputs.runtime_environment }} + MODEL_DIRECTORY_BUCKET: ${{ steps.s3_buckets.outputs.model_directory_bucket }} + PREDICTIONS_BUCKET: ${{ steps.s3_buckets.outputs.predictions_bucket }} + DATA_BUCKET: ${{ steps.s3_buckets.outputs.data_bucket }} + DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }} + ECR_URI: ${{ steps.set_ecr_credentials.outputs.ecr_uri }} + GITHUB_SHA: ${{ github.sha }} + STACK_NAME: ${{ steps.set_stack_name.outputs.stack_name }} + run: | + # Deploy to AWS Lambda via Serverless + sls deploy --config serverless.yml --stage ${{ steps.set_runtime_environment.outputs.runtime_environment }} --verbose diff --git a/README.md b/README.md index 35242a0..638520a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Creating a ML-toolkit that can be reused: - ML pipeline: - - A generic pipeline that has data version control, experiment + - A generic pipeline that has data version control, experiment tracking and a model registry - ML monitoring: @@ -17,7 +17,26 @@ There are multiple protected branches which adapt the generic pipeline to produc These branches will differ by the configuration files that define the data used and the outputs of the ML-pipeline - There can be different additional logic for each branch but the pipeline will be the same. -# Deployment +# Deployment -TBD +Scripts associated to deployment can be found in the deployment/ folder. +Deployment is automated via Github Actions, where a deployment is triggered by a push to one of the +protected branch, with one of dev or prod as the suffix, describing the target environment. + +The github actions file will build and push a docker image to ECR and then deploy a lambda +which produces predictions for the relevant model. + +In order for this to be set up, some key environment variables needs to be inserted into Github +secrets. Each different model and protected branch has its own set of secrets which allows for flexibility +between different pipelines. + +For example, for the branch sap_change-dev, the prefix=SAP_CHANGE_DEV, and the following secrets are: + +- {prefix}_ECR_URI, which is the URI of the ECR repository to push to. For example, for the + sap change model this is the lambda-sap-prediction-dev repository. +- {prefix}_DOMAIN_NAME, is the custom domain name. This is likely going to be the same across the different + models, but is still included in the secrets for flexibility. +- {prefix}_DATA_BUCKET, is the name of the s3 data bucket where data to be scored by the model is stored +- {prefix}_MODEL_BUCKET, is the name of the s3 bucket where the model is stored +- {prefix}_PREDICTIONS_BUCKET, is the name of the s3 bucket where the predictions are stored diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda new file mode 100644 index 0000000..8a759fd --- /dev/null +++ b/deployment/Dockerfile.prediction.lambda @@ -0,0 +1,21 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set the working directory +WORKDIR ${LAMBDA_TASK_ROOT} +ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}" + +# Install necessary build tools - required to test locally +RUN yum install -y gcc python3-devel + +# Install python packages +COPY modules/ml-pipeline/src/requirements/predictions/requirements.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt + +# Copy the project code +COPY modules/ml-pipeline/src/pipeline ./pipeline +# Copy the handler +COPY deployment/handlers/prediction_app.py prediction_app.py +# Get the model +# RUN dev pull -r ${RUNTIME_ENVIRONMENT} + +CMD [ "prediction_handler.handler" ] diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py new file mode 100644 index 0000000..da73742 --- /dev/null +++ b/deployment/handlers/prediction_app.py @@ -0,0 +1,93 @@ +""" +This script is the handler for the lambda prediction function, responsible +for producting predictions for a model +""" + +import boto3 +from botocore.exceptions import NoCredentialsError +import json +from io import StringIO +import os +import logging + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") + + +def upload_dataframe_to_s3(df, bucket, s3_file_name): + """ + Upload a pandas DataFrame to an S3 bucket as CSV + + :param df: DataFrame to upload + :param bucket: Bucket to upload to + :param s3_file_name: S3 object name + :return: True if file was uploaded, else False + """ + + # Initialize the S3 client + s3 = boto3.client("s3") + csv_buffer = StringIO() + + # Write the DataFrame to the buffer as CSV + df.to_csv(csv_buffer, index=False) + + try: + # Upload the CSV from the buffer to S3 + s3.put_object(Bucket=bucket, Key=s3_file_name, Body=csv_buffer.getvalue()) + print(f"Successfully uploaded DataFrame to {bucket}/{s3_file_name}") + return True + except NoCredentialsError: + print("Credentials not available") + return False + + +def handler(event, context): + """ + Take in event and trigger the prediction pipeline + """ + + logger.info("received event: " + str(event)) + + try: + body = ( + json.loads(event["body"]) + if not isinstance(event["body"], dict) + else event["body"] + ) + + logger.info("Inside handler with body: " + str(body)) + + data_path = body["file_location"] + property_id = body["property_id"] + portfolio_id = body["portfolio_id"] + created_at = body["created_at"] + + # TODO: Implement the loading of the model and prediction + + storage_filepath = f"{portfolio_id}/{property_id}/{created_at}.csv" + + # upload_dataframe_to_s3( + # df=outputs, + # bucket=f"retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}", + # s3_file_name=storage_filepath + # ) + + return { + "statusCode": 200, + "body": json.dumps( + { + "message": "Successfully processed input", + "storage_filepath": storage_filepath, + } + ), + } + + except (Exception, KeyError, ValueError) as e: + logger.info("Prediction failed") + logger.info(e) + return { + "statusCode": 500, + "body": json.dumps({"message": "Prediction failed", "error": str(e)}), + } diff --git a/deployment/serverless.yml b/deployment/serverless.yml new file mode 100644 index 0000000..e546f13 --- /dev/null +++ b/deployment/serverless.yml @@ -0,0 +1,56 @@ +service: ${env:STACK_NAME} + +provider: + name: aws + region: eu-west-2 + architecture: x86_64 + environment: + RUNTIME_ENVIRONMENT: ${env:RUNTIME_ENVIRONMENT} + MODEL_DIRECTORY_BUCKET: ${env:MODEL_DIRECTORY_BUCKET} + PREDICTIONS_BUCKET: ${env:PREDICTIONS_BUCKET} + DATA_BUCKET: ${env:DATA_BUCKET} + DOMAIN_NAME: ${env:DOMAIN_NAME} + ECR_URI: ${env:ECR_URI} + GITHUB_SHA: ${env:GITHUB_SHA} + iam: + role: + name: ${env:STACK_NAME}_s3_access + statements: + # Allow reading from MODEL_DIRECTORY_BUCKET and DATA_BUCKET + - Effect: Allow + Action: + - s3:* + Resource: + - arn:aws:s3:::${env:MODEL_DIRECTORY_BUCKET} + - arn:aws:s3:::${env:MODEL_DIRECTORY_BUCKET}/* + - arn:aws:s3:::${env:DATA_BUCKET} + - arn:aws:s3:::${env:DATA_BUCKET}/* + # Allow reading and writing to PREDICTIONS_BUCKET + - Effect: Allow + Action: + - s3:* + Resource: + - arn:aws:s3:::${env:PREDICTIONS_BUCKET} + - arn:aws:s3:::${env:PREDICTIONS_BUCKET}/* + + + +plugins: + - serverless-domain-manager + +custom: + customDomain: + domainName: api.${self:provider.environment.DOMAIN_NAME} + basePath: ${env:STACK_NAME} + createRoute53Record: true + certificateArn: ${ssm:/ssl_certificate_arn} + +functions: + sap_prediction_lambda: + image: + uri: ${env:ECR_URI}:${env:GITHUB_SHA} + events: + - http: + path: /predict + method: POST + timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed