added download and load for predcitions

This commit is contained in:
Michael Duong 2023-09-01 15:43:13 +01:00
commit 3dfe05f77d
6 changed files with 36 additions and 38 deletions

View file

@ -22,10 +22,6 @@ jobs:
npm install -g serverless
npm install -g serverless-domain-manager
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- name: AWS credentials for dev
if: github.ref == 'refs/heads/dev'
uses: aws-actions/configure-aws-credentials@v1
@ -54,28 +50,16 @@ jobs:
- name: Setup Docker
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
# Building and pushing Docker image with caching
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Build Docker Image
run: |
cd model_data/simulation_system
docker buildx create --use
docker buildx build --load --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache --build-arg BUILDKIT_INLINE_CACHE=1 --tag lambda-sap-prediction-image:${{ github.sha }} -f Dockerfiles/Dockerfile.prediction.lambda .
- name: Login to ECR
run: |
aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }}
- name: Tag and Push Docker Image to ECR
run: |
docker tag lambda-sap-prediction-image:${{ github.sha }} ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }}
docker push ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }}
context: ./model_data/simulation_system
file: ./model_data/simulation_system/Dockerfiles/Dockerfile.prediction.lambda
push: true
tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Deploy to AWS Lambda via Serverless
env:

View file

@ -108,6 +108,12 @@ module "retrofit_sap_predictions" {
allowed_origins = var.allowed_origins
}
module "retrofit_sap_data" {
source = "./modules/s3"
bucketname = "retrofit-data-${var.stage}"
allowed_origins = var.allowed_origins
}
# Set up the route53 record for the API
module "route53" {

View file

@ -1,20 +1,22 @@
FROM public.ecr.aws/lambda/python:3.10
# Set the working directory
WORKDIR ${LAMBDA_TASK_ROOT}
WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system"
# Install necessary build tools - required to test locally
# RUN yum install -y gcc python3-devel
RUN yum install -y gcc python3-devel
# Install python packages
COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt
RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt
COPY requirements/predictions/predictions.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
# Copy the project code to the working directory
COPY ./core ./simulation_system/core
COPY ./MLModel ./simulation_system/MLModel
COPY ./predictions.py ./simulation_system/predictions.py
COPY ./handlers/predictions_app.py ./simulation_system/predictions_app.py
COPY ./core ./core
COPY ./MLModel ./MLModel
COPY ./predictions.py ./predictions.py
COPY ./handlers/predictions_app.py ./predictions_app.py
COPY ./__init__.py ./__init__.py
# Run off a lambda trigger
CMD [ "simulation_system.predictions_app.handler" ]

View file

@ -1,6 +1,6 @@
import os
import urllib.parse
from ..predictions import prediction
from predictions import prediction
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")
@ -17,7 +17,6 @@ def handler(event, context):
# key = urllib.parse.unquote_plus(
# event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8"
# )
payload = event["body"]
data_path = payload["file_location"]
property_id = payload["property_id"]
@ -43,3 +42,7 @@ def handler(event, context):
except (Exception, KeyError, ValueError):
print("Prediction failed")
if __name__ == "__main__":
handler()

View file

@ -114,7 +114,7 @@ def prediction(
if data_path and data is None:
logger.info("Loading data from provided path")
dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT)
data = dataloader.load(filepath=data_path, index_col="UPRN")
data = dataloader.load(filepath=data_path, index_col="id")
if data is None:
raise ValueError("No data loaded")
@ -142,7 +142,7 @@ def prediction(
logger.info("--- Generating Predictions ---")
prediction = model.generate_predictions(data=data)
return pd.concat([data["recommendation_id"], prediction], axis=1)
return pd.concat([data["id"], prediction], axis=1)
# Save prediction some where?
# prediction.to_csv("s3?")
@ -175,7 +175,8 @@ if __name__ == "__main__":
args = ingest_arguments()
# Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}'
# Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet
# Data path can be passed as so: python3 predictions.py --data-path
# ./model_build_data/change_data/rdsap_full/test_data.parquet
prediction(
target_column=args.target_column,
model_path=args.model_path,

View file

@ -1,3 +1,5 @@
autogluon==0.8.2
pandas==1.5.3
s3fs==2023.6.0
seaborn==0.12.2
matplotlib==3.7.2