Model/model_data/simulation_system/predictions.py
2023-08-15 18:18:55 +01:00

64 lines
No EOL
1.8 KiB
Python

"""
Script to load MLModel class and generate predictions
"""
from Logger import logger
from MLModel import AutogluonModel
from DataLoader import DataLoader
from pathlib import Path
import pandas as pd
from typing import Optional
# These will be provided in some configuration setup
HYPERPARAMETERS = {
'problem_type': 'regression',
'output_path': 'agModels-predictRDSAP',
'eval_metric': 'mean_absolute_error',
'time_limit': 8000,
'presets': 'best_quality',
'excluded_model_types': ['KNN']
}
def main(model_path: str = None, data: pd.DataFrame = None, data_path: Optional[str] = None):
"""
Main pipeline function
"""
if model_path is None:
logger.error("No model path provided")
exit(1)
if data is None and data_path is None:
logger.error("No Data/Data Path passed")
exit(1)
if data_path and data is None:
logger.info("--- Loading Data ---")
data = DataLoader().load()
else:
logger.warning('Ignoring data_path and loading data provided')
logger.info("--- Loading Model ---")
model = AutogluonModel()
model.load_model(filepath=model_path)
# model.train_model(
# data=data,
# target_column='RDSAP_CHANGE',
# hyperparameters=HYPERPARAMETERS
# )
logger.info("--- Generating Predictions ---")
prediction = model.generate_predictions(data=data)
# Save prediction some where?
prediction.to_csv("s3?")
if __name__ == "__main__":
# For now just loading data first and then passing into function (i.e. as if we receive json data and convert to DataFrame)
data = DataLoader.load(filepath="../simulation_system/preprocessed_data/dataset.parquet")
data_for_prediction = data.sample(1)
main(filepath="", data=data_for_prediction)