got the dockerfile working completely

This commit is contained in:
Khalim Conn-Kowlessar 2023-09-01 14:38:34 +01:00
parent 058194fc14
commit 17b5464acf
3 changed files with 12 additions and 13 deletions

View file

@ -2,14 +2,14 @@ FROM public.ecr.aws/lambda/python:3.10
# Set the working directory
WORKDIR ${LAMBDA_TASK_ROOT}/simulation_system
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}/simulation_system"
# Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel
# Install python packages
COPY requirements/predictions/predictions.txt ./simulation_system/requirements.txt
COPY requirements/predictions/predictions-dev.txt ./simulation_system/requirements.txt
RUN pip install --no-cache-dir -r ./simulation_system/requirements.txt
COPY requirements/predictions/predictions.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
# Copy the project code to the working directory
COPY ./core ./core
@ -19,4 +19,4 @@ COPY ./handlers/predictions_app.py ./predictions_app.py
COPY ./__init__.py ./__init__.py
# Run off a lambda trigger
CMD [ "predictions_app.handler" ]
CMD [ "simulation_system.predictions_app.handler" ]

View file

@ -5,12 +5,10 @@ from predictions import prediction
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")
def handler():
def handler(event, context):
"""
Take in event and trigger the prediction pipeline
"""
print("HIHIHI")
event = {}
# Assuming a file in a bucket landing for now?
# Assuming we have a model to use
@ -19,7 +17,6 @@ def handler():
# key = urllib.parse.unquote_plus(
# event["Records"][0]["s3"]["bucket"]["key"], encoding="utf-8"
# )
payload = event["body"]
data_path = payload["file_location"]
property_id = payload["property_id"]
@ -32,7 +29,7 @@ def handler():
# model_path = os.environ.get("MODEL_PATH", "http://minio:9000/data/model_directory/")
model_path = os.environ.get(
"MODEL_PATH",
"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30"
f"s3://retrofit-model-directory-{RUNTIME_ENVIRONMENT}/RDSAP_CHANGE/autogluon/rdsap_change-medium_quality-30"
"-2023-08-30_11-43-41/deployment/",
)

View file

@ -23,6 +23,7 @@ from core.Settings import (
TIMESTAMP = datetime.now().strftime(TIMESTAMP_FORMAT)
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "dev")
# FOR TESTING
# For now just loading data first and then passing into function (i.e. as if we receive json data and convert to
# DataFrame)
@ -76,7 +77,7 @@ def prediction(
exit(1)
elif RUNTIME_ENVIRONMENT == "dev":
registry_path = (
"s3://retrofit-model-directory-dev/RDSAP_CHANGE/model_registry.csv"
"s3://retrofit-model-directory-dev/model_directory/RDSAP_CHANGE/model_registry.csv"
)
else:
raise NotImplemented("TO be implemented")
@ -108,7 +109,7 @@ def prediction(
if data_path and data is None:
logger.info("Loading data from provided path")
dataloader = dataloader_factory(runtime_environment=RUNTIME_ENVIRONMENT)
data = dataloader.load(filepath=data_path, index_col="UPRN")
data = dataloader.load(filepath=data_path, index_col="id")
if data is None:
raise ValueError("No data loaded")
@ -134,7 +135,7 @@ def prediction(
logger.info("--- Generating Predictions ---")
prediction = model.generate_predictions(data=data)
return pd.concat([data["recommendation_id"], prediction], axis=1)
return pd.concat([data["id"], prediction], axis=1)
# Save prediction some where?
# prediction.to_csv("s3?")
@ -167,7 +168,8 @@ if __name__ == "__main__":
args = ingest_arguments()
# Data can be passed in as JSON string: python3 predictions.py --data '{"TOTAL_FLOOR_AREA": 1}'
# Data path can be passed as so: python3 predictions.py --data-path ./model_build_data/change_data/rdsap_full/test_data.parquet
# Data path can be passed as so: python3 predictions.py --data-path
# ./model_build_data/change_data/rdsap_full/test_data.parquet
prediction(
target_column=args.target_column,
model_path=args.model_path,