mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
commit
ae474fedb4
5 changed files with 22 additions and 6 deletions
|
|
@ -31,7 +31,7 @@ In order for this to be set up, some key environment variables needs to be inser
|
|||
secrets. Each different model and protected branch has its own set of secrets which allows for flexibility
|
||||
between different pipelines.
|
||||
|
||||
For example, for the branch sap_change-dev, the prefix=SAP_CHANGE_DEV, and the following secrets are:
|
||||
For example, for the branch sap-dev, the prefix=SAP_DEV, and the following secrets are:
|
||||
|
||||
- {prefix}_ECR_URI, which is the URI of the ECR repository to push to. For example, for the
|
||||
sap change model this is the lambda-sap-prediction-dev repository.
|
||||
|
|
@ -58,7 +58,7 @@ First, navigate to the root directory of the repository. Open a terminal and exe
|
|||
2. command to build the Docker image:
|
||||
|
||||
```bash
|
||||
docker build -t sap_change -f deployment/Dockerfile.prediction.lambda .
|
||||
docker build -t sap -f deployment/Dockerfile.prediction.lambda .
|
||||
```
|
||||
|
||||
This will build a Docker image tagged as sap_change using the Dockerfile.prediction.lambda located
|
||||
|
|
@ -68,7 +68,7 @@ in the deployment directory.
|
|||
Once the image is built, you can run it using the following command:
|
||||
|
||||
```bash
|
||||
docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev sap_change
|
||||
docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev sap
|
||||
```
|
||||
This command does the following:
|
||||
|
||||
|
|
@ -79,6 +79,7 @@ Sets the RUNTIME_ENVIRONMENT variable to dev.
|
|||
To test the Lambda function, use the following curl command:
|
||||
|
||||
```json
|
||||
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"'
|
||||
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}'
|
||||
```
|
||||
|
||||
This will send a POST request to the running Lambda function and pass in the required data as JSON.
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ def handler(event, context):
|
|||
predictions_column_name=generate_predictions_params[
|
||||
"predictions_column_name"
|
||||
],
|
||||
identifier_column=generate_predictions_params["identifier_column"],
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ default:
|
|||
test_data_filepath: ./data/prepared_data/test.parquet
|
||||
predictions_output_filepath: ./data/predictions/predictions.parquet
|
||||
predictions_column_name: predictions
|
||||
identifier_column: id
|
||||
|
||||
generate_metrics:
|
||||
dataclient_type: local
|
||||
|
|
|
|||
|
|
@ -142,9 +142,15 @@ class AWSS3Client:
|
|||
buffer = BytesIO()
|
||||
obj.to_parquet(buffer, index=False)
|
||||
|
||||
# Reset the buffer position to the beginning
|
||||
buffer.seek(0)
|
||||
|
||||
bucket, key = location.strip("s3://").split("/", 1)
|
||||
self.client.upload_fileobj(buffer, bucket, key)
|
||||
|
||||
# Close the buffer
|
||||
buffer.close()
|
||||
|
||||
def _load_parquet(self, location: str, load_config: dict) -> pd.DataFrame:
|
||||
"""
|
||||
Load a parquet file
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ def generate_predictions(
|
|||
test_data_filepath: str,
|
||||
predictions_output_filepath: str,
|
||||
predictions_column_name: str,
|
||||
identifier_column: str = "id",
|
||||
):
|
||||
"""
|
||||
For a given model, we generate prediction and evaluate this against the true target
|
||||
|
|
@ -52,6 +53,12 @@ def generate_predictions(
|
|||
predictions_df = pd.DataFrame(predictions)
|
||||
predictions_df.columns = [predictions_column_name]
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=predictions_df, location=predictions_output_filepath, save_config=None
|
||||
output_df = (
|
||||
pd.concat([test_data[identifier_column], predictions_df], axis=1)
|
||||
if identifier_column in test_data.columns
|
||||
else predictions_df
|
||||
)
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=output_df, location=predictions_output_filepath, save_config=None
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue