change logging style

This commit is contained in:
Michael Duong 2023-10-20 15:45:04 +01:00
parent 56b7139b41
commit 867f4e0bf0
6 changed files with 3 additions and 91 deletions

View file

@ -16,13 +16,9 @@ def run_cleanup(artefacts_directory: str, metrics_directory: str) -> None:
Remove the directory where artefacts are stored
"""
logger.info("---------------------")
logger.info(f"--- Run Clean up ---")
logger.info("---------------------")
logger.info("-------------------------")
logger.info(f"--- Delete artefacts ---")
logger.info("-------------------------")
artefact_directory_path = Path(artefacts_directory)
@ -31,9 +27,7 @@ def run_cleanup(artefacts_directory: str, metrics_directory: str) -> None:
logger.info(f"Removing the directory: {artefacts_directory}")
shutil.rmtree(artefact_directory_path)
logger.info("-----------------------")
logger.info(f"--- Delete metrics ---")
logger.info("-----------------------")
metrics_directory_path = Path(metrics_directory)
@ -45,15 +39,11 @@ def run_cleanup(artefacts_directory: str, metrics_directory: str) -> None:
if __name__ == "__main__":
logger.info("----------------------------")
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
run_cleanup(
artefacts_directory=startup_cleanup_params["artefacts"],
metrics_directory=startup_cleanup_params["metrics"],
)
logger.info("-------------------------------")
logger.info(f"--- {__file__} - Complete! ---")
logger.info("-------------------------------")

View file

@ -17,9 +17,7 @@ from core.DataClient import dataclient_factory
from core.FeatureProcessor import feature_processor_factory
from config import settings
logger.info("----------------------------")
logger.info(f"--- Initiate Parameters ---")
logger.info("----------------------------")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -33,9 +31,7 @@ output_train_filepath = prepare_data_params["output_train_filepath"]
output_test_filepath = prepare_data_params["output_test_filepath"]
feature_processor_config = feature_process_params["feature_processor_config"]
logger.info("----------------------------")
logger.info(f"--- Initiate DataClient ---")
logger.info("----------------------------")
input_dataclient_type = prepare_data_params["input_dataclient_type"]
output_dataclient_type = prepare_data_params["output_dataclient_type"]
@ -49,9 +45,7 @@ output_dataclient = dataclient_factory(
dataclient_config=client_params[output_dataclient_type],
)
logger.info("----------------------------------")
logger.info(f"--- Initiate FeatureProcessor ---")
logger.info("----------------------------------")
feature_processor = feature_processor_factory(
feature_process_params["feature_processor_type"]
@ -76,15 +70,11 @@ def prepare_data(
:param pipeline_mode: bool, Default False, this caches out the file for experimentation, objects returned in pipeline mode
"""
logger.info("--------------------")
logger.info("--- Loading data ---")
logger.info("--------------------")
data = input_dataclient.load_data(location=data_filepath, load_config={})
logger.info("--------------------------")
logger.info("--- Feature Processing ---")
logger.info("--------------------------")
data = feature_processor.feature_process(
data,
@ -93,9 +83,7 @@ def prepare_data(
new_feature_funcs=new_feature_funcs,
)
logger.info("----------------------")
logger.info("--- Splitting data ---")
logger.info("----------------------")
if train_proportion == 1:
train = data
@ -108,9 +96,7 @@ def prepare_data(
train = train.reset_index(drop=True)
logger.info("-----------------------")
logger.info("--- Outputting data ---")
logger.info("-----------------------")
output_dataclient.save_data(
obj=train, location=output_train_filepath, save_config=None
@ -126,13 +112,9 @@ def prepare_data(
if __name__ == "__main__":
logger.info("----------------------------")
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
logger.info("---------------------------")
logger.info(f"--- Prepare Data Stage ---")
logger.info("---------------------------")
prepare_data(
input_dataclient=input_dataclient,
@ -147,6 +129,4 @@ if __name__ == "__main__":
new_feature_funcs=new_feature_funcs,
)
logger.info("-------------------------------")
logger.info(f"--- {__file__} - Complete! ---")
logger.info("-------------------------------")

View file

@ -18,9 +18,7 @@ from core.MLMetrics import metrics_factory
from configs.post_prediction_logic import post_prediction_logic
from config import settings
logger.info("----------------------------")
logger.info(f"--- Initiate Parameters ---")
logger.info("----------------------------")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -40,22 +38,16 @@ train_filepath = prepare_data_params["output_train_filepath"]
test_filepath = prepare_data_params["output_test_filepath"]
fit_metrics_filepath = build_model_params["fit_metrics_filepath"]
logger.info("----------------------------")
logger.info(f"--- Initiate DataClient ---")
logger.info("----------------------------")
# Output of previous prepare data step, will be where the data is
dataclient = dataclient_factory(prepare_data_params["output_dataclient_type"])
logger.info("-------------------------")
logger.info(f"--- Initiate MLModel ---")
logger.info("-------------------------")
model = model_factory(model_type)
logger.info("-------------------------")
logger.info(f"--- Initiate Metrics ---")
logger.info("-------------------------")
metrics = metrics_factory(generate_metrics_params["metrics_type"])
@ -75,9 +67,8 @@ def build_model(
test_data: Union[pd.DataFrame, None] = None,
pipeline_mode: bool = False,
):
logger.info("--------------------------------------")
logger.info("--- Loading Data for build process ---")
logger.info("--------------------------------------")
if train_data is None:
if train_filepath is None:
@ -89,9 +80,7 @@ def build_model(
raise ValueError(f"Need {test_filepath} if no data supplied")
test_data = dataclient.load_data(location=test_filepath, load_config=None)
logger.info("----------------------")
logger.info("--- Training model ---")
logger.info("----------------------")
model.train_model(
data=train_data.drop(columns=identifier_columns),
@ -99,32 +88,24 @@ def build_model(
model_hyperparameters=model_hyperparameters,
)
logger.info("----------------------------------")
logger.info("--- Generating fit predictions ---")
logger.info("----------------------------------")
fit_predictions = model.predict(
data=train_data, post_prediction_logic=post_prediction_logic
)
logger.info("------------------------------")
logger.info("--- Generating fit metrics ---")
logger.info("------------------------------")
metrics_output = metrics.generate_metrics(
target=train_data[target],
predictions=pd.Series(fit_predictions),
)
logger.info("--------------------")
logger.info("--- Saving model ---")
logger.info("--------------------")
model.save_model(path=Path(model_save_location))
logger.info("--------------------------")
logger.info("--- Saving fit metrics ---")
logger.info("--------------------------")
dataclient.save_data(
obj=metrics_output, location=fit_metrics_filepath, save_config=None
@ -133,13 +114,9 @@ def build_model(
if __name__ == "__main__":
logger.info("----------------------------")
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
logger.info("--------------------------")
logger.info(f"--- Build Model Stage ---")
logger.info("--------------------------")
build_model(
dataclient=dataclient,
@ -154,6 +131,4 @@ if __name__ == "__main__":
fit_metrics_filepath=fit_metrics_filepath,
)
logger.info("-------------------------------")
logger.info(f"--- {__file__} - Complete! ---")
logger.info("-------------------------------")

View file

@ -10,9 +10,7 @@ from core.Logger import logger
from config import settings
from generate_predictions import generate_predictions
logger.info("----------------------------")
logger.info(f"--- Initiate Parameters ---")
logger.info("----------------------------")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -33,15 +31,11 @@ model_filepath = build_model_params["model_save_filepath"]
predictions_output_filepath = generate_predictions_params["predictions_output_filepath"]
predictions_column_name = generate_predictions_params["predictions_column_name"]
logger.info("-------------------------")
logger.info(f"--- Initiate MLModel ---")
logger.info("-------------------------")
model = model_factory(build_model_params["model_type"])
logger.info("----------------------------")
logger.info(f"--- Initiate DataClient ---")
logger.info("----------------------------")
# We may have different locations of loading hence why we use one specified in generate_predictions.yaml
# I.e. for metric runs, this will be a local data client
@ -59,13 +53,9 @@ output_dataclient = dataclient_factory(
if __name__ == "__main__":
logger.info("----------------------------")
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
logger.info("----------------------------------")
logger.info(f"--- Generate Predictions Stage---")
logger.info("----------------------------------")
generate_predictions(
input_dataclient=input_dataclient,
@ -78,6 +68,4 @@ if __name__ == "__main__":
predictions_column_name=predictions_column_name,
)
logger.info("-------------------------------")
logger.info(f"--- {__file__} - Complete! ---")
logger.info("-------------------------------")

View file

@ -16,9 +16,7 @@ from core.MLMetrics import metrics_factory
from core.Logger import logger
from config import settings
logger.info("----------------------------")
logger.info(f"--- Initiate Parameters ---")
logger.info("----------------------------")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -35,16 +33,11 @@ predictions_output_filepath = generate_predictions_params["predictions_output_fi
predictions_column_name = generate_predictions_params["predictions_column_name"]
metrics_output_filepath = generate_metrics_params["metrics_output_filepath"]
logger.info("-------------------------")
logger.info(f"--- Initiate MLModel ---")
logger.info("-------------------------")
model = model_factory(build_model_params["model_type"])
logger.info("----------------------------")
logger.info(f"--- Initiate DataClient ---")
logger.info("----------------------------")
# Use data client for input and output, as we use dvc to cache later to the cloud
dataclient_type = generate_metrics_params["dataclient_type"]
@ -53,9 +46,7 @@ dataclient = dataclient_factory(
dataclient_config=client_params[dataclient_type],
)
logger.info("---------------------------")
logger.info(f"--- Initiate MLMetrics ---")
logger.info("---------------------------")
metrics = metrics_factory(generate_metrics_params["metrics_type"])
@ -75,34 +66,26 @@ def generate_metrics(
For a given model, we generate prediction and evaluate this against the true target
"""
logger.info("-------------------------")
logger.info("--- Loading test data ---")
logger.info("-------------------------")
test_data = input_dataclient.load_data(
location=test_data_filepath, load_config=None
)
logger.info("---------------------------")
logger.info("--- Loading predictions ---")
logger.info("---------------------------")
predictions = input_dataclient.load_data(
location=predictions_output_filepath, load_config=None
)
logger.info("--------------------------")
logger.info("--- Generating metrics ---")
logger.info("--------------------------")
metrics_output = metrics.generate_metrics(
target=test_data[target],
predictions=pd.Series(predictions[predictions_column_name]),
)
logger.info("----------------------")
logger.info("--- Saving metrics ---")
logger.info("----------------------")
output_dataclient.save_data(
obj=metrics_output, location=metrics_output_filepath, save_config=None
@ -111,13 +94,9 @@ def generate_metrics(
if __name__ == "__main__":
logger.info("----------------------------")
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
logger.info("------------------------------")
logger.info(f"--- Generate Metrics Stage---")
logger.info("------------------------------")
generate_metrics(
input_dataclient=dataclient,
@ -131,6 +110,4 @@ if __name__ == "__main__":
metrics_output_filepath=metrics_output_filepath,
)
logger.info("-------------------------------")
logger.info(f"--- {__file__} - Complete! ---")
logger.info("-------------------------------")

View file

@ -22,6 +22,8 @@ def setup_logger():
# Add the stream handler to the logger
logger.addHandler(stream_handler)
logger.propagate = False
return logger