mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-30 13:10:43 +00:00
commit
ff09528228
5 changed files with 22 additions and 6 deletions
|
|
@ -31,7 +31,7 @@ In order for this to be set up, some key environment variables needs to be inser
|
||||||
secrets. Each different model and protected branch has its own set of secrets which allows for flexibility
|
secrets. Each different model and protected branch has its own set of secrets which allows for flexibility
|
||||||
between different pipelines.
|
between different pipelines.
|
||||||
|
|
||||||
For example, for the branch sap_change-dev, the prefix=SAP_CHANGE_DEV, and the following secrets are:
|
For example, for the branch sap-dev, the prefix=SAP_DEV, and the following secrets are:
|
||||||
|
|
||||||
- {prefix}_ECR_URI, which is the URI of the ECR repository to push to. For example, for the
|
- {prefix}_ECR_URI, which is the URI of the ECR repository to push to. For example, for the
|
||||||
sap change model this is the lambda-sap-prediction-dev repository.
|
sap change model this is the lambda-sap-prediction-dev repository.
|
||||||
|
|
@ -58,7 +58,7 @@ First, navigate to the root directory of the repository. Open a terminal and exe
|
||||||
2. command to build the Docker image:
|
2. command to build the Docker image:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t sap_change -f deployment/Dockerfile.prediction.lambda .
|
docker build -t sap -f deployment/Dockerfile.prediction.lambda .
|
||||||
```
|
```
|
||||||
|
|
||||||
This will build a Docker image tagged as sap_change using the Dockerfile.prediction.lambda located
|
This will build a Docker image tagged as sap_change using the Dockerfile.prediction.lambda located
|
||||||
|
|
@ -68,7 +68,7 @@ in the deployment directory.
|
||||||
Once the image is built, you can run it using the following command:
|
Once the image is built, you can run it using the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev sap_change
|
docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev sap
|
||||||
```
|
```
|
||||||
This command does the following:
|
This command does the following:
|
||||||
|
|
||||||
|
|
@ -79,6 +79,7 @@ Sets the RUNTIME_ENVIRONMENT variable to dev.
|
||||||
To test the Lambda function, use the following curl command:
|
To test the Lambda function, use the following curl command:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"'
|
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
This will send a POST request to the running Lambda function and pass in the required data as JSON.
|
This will send a POST request to the running Lambda function and pass in the required data as JSON.
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,7 @@ def handler(event, context):
|
||||||
predictions_column_name=generate_predictions_params[
|
predictions_column_name=generate_predictions_params[
|
||||||
"predictions_column_name"
|
"predictions_column_name"
|
||||||
],
|
],
|
||||||
|
identifier_column=generate_predictions_params["identifier_column"],
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ default:
|
||||||
test_data_filepath: ./data/prepared_data/test.parquet
|
test_data_filepath: ./data/prepared_data/test.parquet
|
||||||
predictions_output_filepath: ./data/predictions/predictions.parquet
|
predictions_output_filepath: ./data/predictions/predictions.parquet
|
||||||
predictions_column_name: predictions
|
predictions_column_name: predictions
|
||||||
|
identifier_column: id
|
||||||
|
|
||||||
generate_metrics:
|
generate_metrics:
|
||||||
dataclient_type: local
|
dataclient_type: local
|
||||||
|
|
|
||||||
|
|
@ -142,9 +142,15 @@ class AWSS3Client:
|
||||||
buffer = BytesIO()
|
buffer = BytesIO()
|
||||||
obj.to_parquet(buffer, index=False)
|
obj.to_parquet(buffer, index=False)
|
||||||
|
|
||||||
|
# Reset the buffer position to the beginning
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
bucket, key = location.strip("s3://").split("/", 1)
|
bucket, key = location.strip("s3://").split("/", 1)
|
||||||
self.client.upload_fileobj(buffer, bucket, key)
|
self.client.upload_fileobj(buffer, bucket, key)
|
||||||
|
|
||||||
|
# Close the buffer
|
||||||
|
buffer.close()
|
||||||
|
|
||||||
def _load_parquet(self, location: str, load_config: dict) -> pd.DataFrame:
|
def _load_parquet(self, location: str, load_config: dict) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Load a parquet file
|
Load a parquet file
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ def generate_predictions(
|
||||||
test_data_filepath: str,
|
test_data_filepath: str,
|
||||||
predictions_output_filepath: str,
|
predictions_output_filepath: str,
|
||||||
predictions_column_name: str,
|
predictions_column_name: str,
|
||||||
|
identifier_column: str = "id",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
For a given model, we generate prediction and evaluate this against the true target
|
For a given model, we generate prediction and evaluate this against the true target
|
||||||
|
|
@ -52,6 +53,12 @@ def generate_predictions(
|
||||||
predictions_df = pd.DataFrame(predictions)
|
predictions_df = pd.DataFrame(predictions)
|
||||||
predictions_df.columns = [predictions_column_name]
|
predictions_df.columns = [predictions_column_name]
|
||||||
|
|
||||||
output_dataclient.save_data(
|
output_df = (
|
||||||
obj=predictions_df, location=predictions_output_filepath, save_config=None
|
pd.concat([test_data[identifier_column], predictions_df], axis=1)
|
||||||
|
if identifier_column in test_data.columns
|
||||||
|
else predictions_df
|
||||||
|
)
|
||||||
|
|
||||||
|
output_dataclient.save_data(
|
||||||
|
obj=output_df, location=predictions_output_filepath, save_config=None
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue