mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
47 lines
No EOL
1 KiB
Python
47 lines
No EOL
1 KiB
Python
import os
|
|
import pandas as pd
|
|
import argparse
|
|
from Logger import logger
|
|
from autogluon.tabular import TabularDataset, TabularPredictor
|
|
|
|
def ingest_arguments() -> argparse.Namespace:
|
|
"""
|
|
Helper function to take in arguments from script start
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser(description='Inputs for training script')
|
|
|
|
parser.add_argument('--filepath', type=str, help='Location of Parquet dataset to load')
|
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
def training(filepath: str) -> None:
|
|
"""
|
|
Pipeline to run training on the dataset
|
|
"""
|
|
|
|
logger.info('Loading data')
|
|
data = pd.read_parquet(filepath)
|
|
|
|
logger.info('Feature selection')
|
|
feature_columns = data.columns
|
|
data = data[feature_columns]
|
|
|
|
logger.info('Split data into train and validation')
|
|
|
|
|
|
logger.info('Build Model')
|
|
|
|
|
|
logger.info('Evaluate matrics')
|
|
|
|
if __name__ == "__main__":
|
|
|
|
logger.info('---Begin Pipeline---')
|
|
|
|
logger.info('---Ingest Arguments---')
|
|
args = ingest_arguments()
|
|
|
|
training(filepath=args.filepath) |