From d0687987bbf7ebf0f6b2cad9361ae3dd63002b1a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 16:01:13 +0100 Subject: [PATCH 1/3] added back in the login to ecr --- .github/workflows/deploy_sap_model_lambda.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index 7a9939aa..dec16f6d 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -50,6 +50,10 @@ jobs: - name: Setup Docker uses: docker/setup-buildx-action@v1 + - name: Login to ECR + run: | + aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin ${{ steps.set_ecr_credentials.outputs.ecr_uri }} + # Building and pushing Docker image with caching - name: Build and push Docker image uses: docker/build-push-action@v4 From 7bd15357d607685d878521ba60a1b231fbcf35bc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 17:52:11 +0100 Subject: [PATCH 2/3] docker working woo --- .../simulation_system/core/DataLoader.py | 1 - .../handlers/predictions_app.py | 28 ++++++------------- model_data/simulation_system/predictions.py | 4 ++- .../requirements/predictions/predictions.txt | 5 ++-- 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/model_data/simulation_system/core/DataLoader.py b/model_data/simulation_system/core/DataLoader.py index a6c9cfc0..9cb82a8a 100644 --- a/model_data/simulation_system/core/DataLoader.py +++ b/model_data/simulation_system/core/DataLoader.py @@ -127,7 +127,6 @@ class S3DataLoader: @staticmethod def load(filepath: str, index_col: str | None = None) -> pd.DataFrame: - filepath_split = filepath.split("s3://")[-1].split("/", 1) bucket = filepath_split[0] key = filepath_split[1] diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index c7e61338..2ecdd522 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -13,35 +13,25 @@ def handler(event, context): # Assuming a file in a bucket landing for now? # Assuming we have a model to use - # bucket = event["Records"][0]["s3"]["bucket"]["name"] - # key = urllib.parse.unquote_plus( - # event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8" - # ) payload = event["body"] data_path = payload["file_location"] property_id = payload["property_id"] portfolio_id = payload["portfolio_id"] created_at = payload["created_at"] - # prediction_file = bucket + "/" + key - - # TODO: put a model into s3, both locally and in aws - # model_path = os.environ.get("MODEL_PATH", "http://minio:9000/data/model_directory/") - model_path = os.environ.get( - "MODEL_PATH", - f"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30" - "-2023-08-30_11-43-41/deployment/", - ) - try: - outputs = prediction(model_path=model_path, data_path=data_path) + # We could fix the model path but for the moment, we just take the best model path based on the registry + outputs = prediction(model_path=None, data_path=data_path) # Store into s3, with key of {portfolio_id}-{property_id} - outputs.to_csv( - f"s3://retrofit-sap-prediction-{RUNTIME_ENVIRONMENT}/{portfolio_id}/{property_id}/{created_at}.csv" - ) - except (Exception, KeyError, ValueError): + storage_filepath = f"s3://retrofit-sap-predictions-{RUNTIME_ENVIRONMENT}/{portfolio_id}/{property_id}/" \ + f"{created_at}.csv" + outputs.to_csv(storage_filepath) + return storage_filepath + + except (Exception, KeyError, ValueError) as e: print("Prediction failed") + print(e) if __name__ == "__main__": diff --git a/model_data/simulation_system/predictions.py b/model_data/simulation_system/predictions.py index 9ff8ebc2..1aa3defa 100644 --- a/model_data/simulation_system/predictions.py +++ b/model_data/simulation_system/predictions.py @@ -26,6 +26,7 @@ RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev") CLIENT = S3FSClient(runtime_environment=RUNTIME_ENVIRONMENT) + # FOR TESTING # For now just loading data first and then passing into function (i.e. as if we receive json data and convert to # DataFrame) @@ -142,7 +143,8 @@ def prediction( logger.info("--- Generating Predictions ---") prediction = model.generate_predictions(data=data) - return pd.concat([data["id"], prediction], axis=1) + + return pd.concat([pd.Series(data.index, name='id'), prediction], axis=1) # Save prediction some where? # prediction.to_csv("s3?") diff --git a/model_data/simulation_system/requirements/predictions/predictions.txt b/model_data/simulation_system/requirements/predictions/predictions.txt index 5d545ccb..6aeeaa45 100644 --- a/model_data/simulation_system/requirements/predictions/predictions.txt +++ b/model_data/simulation_system/requirements/predictions/predictions.txt @@ -1,5 +1,6 @@ +boto3 autogluon==0.8.2 pandas==1.5.3 -s3fs==2023.6.0 +s3fs seaborn==0.12.2 -matplotlib==3.7.2 +matplotlib==3.7.2 \ No newline at end of file From c3fac38388c8ff9c690ff11a580150fb610fff3b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 1 Sep 2023 18:03:42 +0100 Subject: [PATCH 3/3] changing platform for github actions deployment --- .github/workflows/deploy_sap_model_lambda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_sap_model_lambda.yml b/.github/workflows/deploy_sap_model_lambda.yml index dec16f6d..0f72e3e3 100644 --- a/.github/workflows/deploy_sap_model_lambda.yml +++ b/.github/workflows/deploy_sap_model_lambda.yml @@ -64,6 +64,7 @@ jobs: tags: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}:${{ github.sha }} cache-from: type=gha cache-to: type=gha,mode=max + platform: linux/amd64 - name: Deploy to AWS Lambda via Serverless env: