mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add protocol for ml class
This commit is contained in:
parent
18673e3147
commit
f0809966b7
3 changed files with 178 additions and 18 deletions
113
model_data/simulation_system/MLModel.py
Normal file
113
model_data/simulation_system/MLModel.py
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
"""
|
||||||
|
MLModel class
|
||||||
|
Key tasks:
|
||||||
|
- Template Model class for different model types
|
||||||
|
- Save model
|
||||||
|
- Load Model
|
||||||
|
- Generate Inference
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Protocol, NamedTuple
|
||||||
|
import pandas as pd
|
||||||
|
from autogluon import TabularPredictor
|
||||||
|
|
||||||
|
AUTOGLUON_HYPERPARAMETERS = ['problem_type', 'eval_metric', 'time_limit', 'presets', 'excluded_model_types']
|
||||||
|
|
||||||
|
class MLModel(Protocol):
|
||||||
|
'''
|
||||||
|
Base ML Model protocol
|
||||||
|
'''
|
||||||
|
|
||||||
|
def load_model(self, filepath: Path) -> None:
|
||||||
|
"""
|
||||||
|
Providing a path, this function will load the model to be used. Will load to internal variable
|
||||||
|
"""
|
||||||
|
|
||||||
|
def save_model(self, output_filepath: Path) -> None:
|
||||||
|
"""
|
||||||
|
Providing a path, this function will save the model to be used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def train_model(
|
||||||
|
self,
|
||||||
|
data: pd.DataFrame,
|
||||||
|
target: str,
|
||||||
|
hyperparameter: dict) -> None:
|
||||||
|
"""
|
||||||
|
For the given data and hyperparameters (specified to the model), a model is trained
|
||||||
|
"""
|
||||||
|
|
||||||
|
def generate_predictions(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
For the given dataframe, model is loaded and predictions are generated
|
||||||
|
"""
|
||||||
|
|
||||||
|
def model_evaluation(self, validation_data: pd.DataFrame) -> NamedTuple:
|
||||||
|
"""
|
||||||
|
For any validation data, a set of predictions and metrics are return
|
||||||
|
"""
|
||||||
|
|
||||||
|
class AutogluonModel(MLModel):
|
||||||
|
"""
|
||||||
|
Autogluon model that implements the MLModel Protocol
|
||||||
|
"""
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.model = None
|
||||||
|
|
||||||
|
def load_model(self, filepath: Path) -> None:
|
||||||
|
"""
|
||||||
|
Providing a path, this function will load the model to be used. Will load to internal variable
|
||||||
|
"""
|
||||||
|
self.model = TabularPredictor.load(path=filepath)
|
||||||
|
|
||||||
|
|
||||||
|
def save_model(self, output_filepath: Path) -> None:
|
||||||
|
"""
|
||||||
|
Providing a path, this function will save the model to be used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def train_model(
|
||||||
|
self,
|
||||||
|
data: pd.DataFrame,
|
||||||
|
target_column: str,
|
||||||
|
hyperparameters: dict = None) -> None:
|
||||||
|
"""
|
||||||
|
For the given data and hyperparameters, a model is trained
|
||||||
|
"""
|
||||||
|
|
||||||
|
if set(AUTOGLUON_HYPERPARAMETERS) != set(hyperparameters.keys()):
|
||||||
|
print("Hyperparameters (dict) is incorrectly defined - please check what hyperparameters are required")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
self.model = TabularPredictor(
|
||||||
|
label=target_column,
|
||||||
|
path=hyperparameters['output_path'],
|
||||||
|
problem_type=hyperparameters['problem_type'],
|
||||||
|
eval_metric=hyperparameters['eval_metric']
|
||||||
|
).fit(
|
||||||
|
data,
|
||||||
|
time_limit=hyperparameters['time_limit'],
|
||||||
|
presets=hyperparameters['presets'],
|
||||||
|
excluded_model_types=hyperparameters['excluded_model_types']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_predictions(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
For the given dataframe, model is loaded and predictions are generated
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.model is None:
|
||||||
|
print("No model loaded/ trained")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
predictions = self.model.predict(data)
|
||||||
|
|
||||||
|
return predictions
|
||||||
|
|
||||||
|
def model_evaluation(self, validation_data: pd.DataFrame) -> NamedTuple:
|
||||||
|
"""
|
||||||
|
For any validation data, a set of predictions and metrics are return
|
||||||
|
"""
|
||||||
|
|
||||||
64
model_data/simulation_system/predictions.py
Normal file
64
model_data/simulation_system/predictions.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
"""
|
||||||
|
Script to load MLModel class and generate predictions
|
||||||
|
"""
|
||||||
|
|
||||||
|
from Logger import logger
|
||||||
|
from MLModel import AutogluonModel
|
||||||
|
from DataLoader import DataLoader
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# These will be provided in some configuration setup
|
||||||
|
HYPERPARAMETERS = {
|
||||||
|
'problem_type': 'regression',
|
||||||
|
'output_path': 'agModels-predictRDSAP',
|
||||||
|
'eval_metric': 'mean_absolute_error',
|
||||||
|
'time_limit': 8000,
|
||||||
|
'presets': 'best_quality',
|
||||||
|
'excluded_model_types': ['KNN']
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def main(model_path: str = None, data: pd.DataFrame = None, data_path: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Main pipeline function
|
||||||
|
"""
|
||||||
|
|
||||||
|
if model_path is None:
|
||||||
|
logger.error("No model path provided")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if data is None and data_path is None:
|
||||||
|
logger.error("No Data/Data Path passed")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if data_path and data is None:
|
||||||
|
logger.info("--- Loading Data ---")
|
||||||
|
data = DataLoader().load()
|
||||||
|
else:
|
||||||
|
logger.warning('Ignoring data_path and loading data provided')
|
||||||
|
|
||||||
|
logger.info("--- Loading Model ---")
|
||||||
|
model = AutogluonModel()
|
||||||
|
model.load_model(filepath=model_path)
|
||||||
|
|
||||||
|
# model.train_model(
|
||||||
|
# data=data,
|
||||||
|
# target_column='RDSAP_CHANGE',
|
||||||
|
# hyperparameters=HYPERPARAMETERS
|
||||||
|
# )
|
||||||
|
|
||||||
|
logger.info("--- Generating Predictions ---")
|
||||||
|
prediction = model.generate_predictions(data=data)
|
||||||
|
|
||||||
|
# Save prediction some where?
|
||||||
|
prediction.to_csv("s3?")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# For now just loading data first and then passing into function (i.e. as if we receive json data and convert to DataFrame)
|
||||||
|
data = DataLoader.load(filepath="../simulation_system/preprocessed_data/dataset.parquet")
|
||||||
|
data_for_prediction = data.sample(1)
|
||||||
|
|
||||||
|
main(filepath="", data=data_for_prediction)
|
||||||
|
|
@ -3,6 +3,7 @@ import pandas as pd
|
||||||
import argparse
|
import argparse
|
||||||
from typing import List
|
from typing import List
|
||||||
from Logger import logger
|
from Logger import logger
|
||||||
|
from DataLoader import DataLoader
|
||||||
from autogluon.tabular import TabularDataset, TabularPredictor
|
from autogluon.tabular import TabularDataset, TabularPredictor
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,24 +29,6 @@ def ingest_arguments() -> argparse.Namespace:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
class DataLoader():
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def load(filepath: str) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
Load different datasets
|
|
||||||
"""
|
|
||||||
if filepath.endswith('.parquet'):
|
|
||||||
df = pd.read_parquet(filepath)
|
|
||||||
elif filepath.endswith('.csv.'):
|
|
||||||
df = pd.read_csv(filepath)
|
|
||||||
else:
|
|
||||||
logger.error('Not implemented!')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
class FeatureProcessor:
|
class FeatureProcessor:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue