diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index c9be9083..7a9939aa 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -22,10 +22,6 @@ jobs: npm install -g serverless npm install -g serverless-domain-manager - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - name: AWS credentials for dev if: github.ref == 'refs/heads/dev' uses: aws-actions/configure-aws-credentials@v1 @@ -54,28 +50,16 @@ jobs: - name: Setup Docker uses: docker/setup-buildx-action@v1 - - name: Cache Docker layers - uses: actions/cache@v2 + # Building and pushing Docker image with caching + - name: Build and push Docker image + uses: docker/build-push-action@v4 with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Build Docker Image - run: | - cd model_data/simulation_system - docker buildx create --use - docker buildx build --load --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache --build-arg BUILDKIT_INLINE_CACHE=1 --tag lambda-sap-prediction-image:${{ github.sha }} -f Dockerfiles/Dockerfile.prediction.lambda . - - - name: Login to ECR - run: | - aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }} - - - name: Tag and Push Docker Image to ECR - run: | - docker tag lambda-sap-prediction-image:${{ github.sha }} ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} - docker push ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + context: ./model_data/simulation_system + file: ./model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda + push: true + tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max - name: Deploy to AWS Lambda via Serverless env: diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index a90c4a78..5f623313 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -108,6 +108,12 @@ module "retrofit_sap_predictions" { allowed_origins = var.allowed_origins } +module "retrofit_sap_data" { + source = "./modules/s3" + bucketname = "retrofit-data-${var.stage}" + allowed_origins = var.allowed_origins +} + # Set up the route53 record for the API module "route53" { diff --git a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda index bd7479e6..f3708ed9 100644 --- a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda +++ b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda @@ -1,20 +1,22 @@ FROM public.ecr.aws/lambda/python:3.10 # Set the working directory -WORKDIR ${LAMBDA_TASK_ROOT} +WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system +ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system" # Install necessary build tools - required to test locally -# RUN yum install -y gcc python3-devel +RUN yum install -y gcc python3-devel # Install python packages -COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt -RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt +COPY requirements/predictions/predictions.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt # Copy the project code to the working directory -COPY ./core ./simulation_system/core -COPY ./MLModel ./simulation_system/MLModel -COPY ./predictions.py ./simulation_system/predictions.py -COPY ./handlers/predictions_app.py ./simulation_system/predictions_app.py +COPY ./core ./core +COPY ./MLModel ./MLModel +COPY ./predictions.py ./predictions.py +COPY ./handlers/predictions_app.py ./predictions_app.py +COPY ./__init__.py ./__init__.py # Run off a lambda trigger CMD [ "simulation_system.predictions_app.handler" ] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 2b47c0db..c7e61338 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -1,6 +1,6 @@ import os import urllib.parse -from ..predictions import prediction +from predictions import prediction RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") @@ -17,7 +17,6 @@ def handler(event, context): # key = urllib.parse.unquote_plus( # event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8" # ) - payload = event["body"] data_path = payload["file_location"] property_id = payload["property_id"] @@ -43,3 +42,7 @@ def handler(event, context): except (Exception, KeyError, ValueError): print("Prediction failed") + + +if __name__ == "__main__": + handler() diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index da987978..9ff8ebc2 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -114,7 +114,7 @@ def prediction( if data_path and data is None: logger.info("Loading data from provided path") dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT) - data = dataloader.load(filepath=data_path, index_col="UPRN") + data = dataloader.load(filepath=data_path, index_col="id") if data is None: raise ValueError("No data loaded") @@ -142,7 +142,7 @@ def prediction( logger.info("--- Generating Predictions ---") prediction = model.generate_predictions(data=data) - return pd.concat([data["recommendation_id"], prediction], axis=1) + return pd.concat([data["id"], prediction], axis=1) # Save prediction some where? # prediction.to_csv("s3?") @@ -175,7 +175,8 @@ if __name__ == "__main__": args = ingest_arguments() # Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}' - # Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet + # Data path can be passed as so: python3 predictions.py --data-path + # ./model_build_data/change_data/rdsap_full/test_data.parquet prediction( target_column=args.target_column, model_path=args.model_path, diff --git a/model_data/simulation_system/requirements/predictions/predictions.txt b/model_data/simulation_system/requirements/predictions/predictions.txt index 07b3e6d2..5d545ccb 100644 --- a/model_data/simulation_system/requirements/predictions/predictions.txt +++ b/model_data/simulation_system/requirements/predictions/predictions.txt @@ -1,3 +1,5 @@ autogluon==0.8.2 pandas==1.5.3 s3fs==2023.6.0 +seaborn==0.12.2 +matplotlib==3.7.2