Model/model_data/simulation_system/training.py
2023-08-12 19:11:22 +00:00

47 lines
No EOL
1 KiB
Python

import os
import pandas as pd
import argparse
from Logger import logger
from autogluon.tabular import TabularDataset, TabularPredictor
def ingest_arguments() -> argparse.Namespace:
"""
Helper function to take in arguments from script start
"""
parser = argparse.ArgumentParser(description='Inputs for training script')
parser.add_argument('--filepath', type=str, help='Location of Parquet dataset to load')
args = parser.parse_args()
return args
def training(filepath: str) -> None:
"""
Pipeline to run training on the dataset
"""
logger.info('Loading data')
data = pd.read_parquet(filepath)
logger.info('Feature selection')
feature_columns = data.columns
data = data[feature_columns]
logger.info('Split data into train and validation')
logger.info('Build Model')
logger.info('Evaluate matrics')
if __name__ == "__main__":
logger.info('---Begin Pipeline---')
logger.info('---Ingest Arguments---')
args = ingest_arguments()
training(filepath=args.filepath)