diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index c9be9083..113a3ecf 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -22,10 +22,6 @@ jobs: npm install -g serverless npm install -g serverless-domain-manager - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - name: AWS credentials for dev if: github.ref == 'refs/heads/dev' uses: aws-actions/configure-aws-credentials@v1 @@ -54,28 +50,15 @@ jobs: - name: Setup Docker uses: docker/setup-buildx-action@v1 - - name: Cache Docker layers - uses: actions/cache@v2 + # Building and pushing Docker image with caching + - name: Build and push Docker image + uses: docker/build-push-action@v4 with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Build Docker Image - run: | - cd model_data/simulation_system - docker buildx create --use - docker buildx build --load --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache --build-arg BUILDKIT_INLINE_CACHE=1 --tag lambda-sap-prediction-image:${{ github.sha }} -f Dockerfiles/Dockerfile.prediction.lambda . - - - name: Login to ECR - run: | - aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }} - - - name: Tag and Push Docker Image to ECR - run: | - docker tag lambda-sap-prediction-image:${{ github.sha }} ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} - docker push ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + context: ./model_data/simulation_system + push: true + tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max - name: Deploy to AWS Lambda via Serverless env: diff --git a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda index 95da89c4..f3708ed9 100644 --- a/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda +++ b/model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda @@ -2,14 +2,14 @@ FROM public.ecr.aws/lambda/python:3.10 # Set the working directory WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system +ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system" # Install necessary build tools - required to test locally RUN yum install -y gcc python3-devel # Install python packages -COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt -COPY requirements/predictions/predictions-dev.txt ./simulation_system/requirements.txt -RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt +COPY requirements/predictions/predictions.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt # Copy the project code to the working directory COPY ./core ./core @@ -19,4 +19,4 @@ COPY ./handlers/predictions_app.py ./predictions_app.py COPY ./__init__.py ./__init__.py # Run off a lambda trigger -CMD [ "predictions_app.handler" ] +CMD [ "simulation_system.predictions_app.handler" ] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 6e827388..c7e61338 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -5,12 +5,10 @@ from predictions import prediction RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") -def handler(): +def handler(event, context): """ Take in event and trigger the prediction pipeline """ - print("HIHIHI") - event = {} # Assuming a file in a bucket landing for now? # Assuming we have a model to use @@ -19,7 +17,6 @@ def handler(): # key = urllib.parse.unquote_plus( # event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8" # ) - payload = event["body"] data_path = payload["file_location"] property_id = payload["property_id"] @@ -32,7 +29,7 @@ def handler(): # model_path = os.environ.get("MODEL_PATH", "http://minio:9000/data/model_directory/") model_path = os.environ.get( "MODEL_PATH", - "s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" + f"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" "-2023-08-30_11-43-41/deployment/", ) diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index 22104993..285edafb 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -23,6 +23,7 @@ from core.Settings import ( TIMESTAMP = datetime.now().strftime(TIMESTAMP_FORMAT) RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") + # FOR TESTING # For now just loading data first and then passing into function (i.e. as if we receive json data and convert to # DataFrame) @@ -76,7 +77,7 @@ def prediction( exit(1) elif RUNTIME_ENVIRONMENT == "dev": registry_path = ( - "s3://retrofit-model-directory-dev/RDSAP_CHANGE/model_registry.csv" + "s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv" ) else: raise NotImplemented("TO be implemented") @@ -108,7 +109,7 @@ def prediction( if data_path and data is None: logger.info("Loading data from provided path") dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT) - data = dataloader.load(filepath=data_path, index_col="UPRN") + data = dataloader.load(filepath=data_path, index_col="id") if data is None: raise ValueError("No data loaded") @@ -134,7 +135,7 @@ def prediction( logger.info("--- Generating Predictions ---") prediction = model.generate_predictions(data=data) - return pd.concat([data["recommendation_id"], prediction], axis=1) + return pd.concat([data["id"], prediction], axis=1) # Save prediction some where? # prediction.to_csv("s3?") @@ -167,7 +168,8 @@ if __name__ == "__main__": args = ingest_arguments() # Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}' - # Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet + # Data path can be passed as so: python3 predictions.py --data-path + # ./model_build_data/change_data/rdsap_full/test_data.parquet prediction( target_column=args.target_column, model_path=args.model_path,