mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
update to python 3.12 and autogluon 1.3
This commit is contained in:
parent
88c5b6c93a
commit
7b001f3abf
13 changed files with 92 additions and 74 deletions
|
|
@ -1,7 +1,8 @@
|
|||
export PYENV_ROOT=$(HOME)/.pyenv
|
||||
export PATH := $(PYENV_ROOT)/bin:$(PATH)
|
||||
PYTHON_VERSION ?= 3.10.12
|
||||
CONDA_ENV=dev_env_pipeline
|
||||
PYTHON_VERSION ?= 3.12.12
|
||||
CONDA_ENV=dev_env_pipeline_1
|
||||
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
|
||||
|
||||
.PHONY: init
|
||||
init: dev-conda
|
||||
|
|
@ -12,11 +13,13 @@ dev-conda:
|
|||
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
|
||||
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
|
||||
conda init bash
|
||||
conda run -v -n ${CONDA_ENV} pip install --upgrade pip
|
||||
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt
|
||||
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
|
||||
conda run -v -n ${CONDA_ENV} pre-commit install
|
||||
conda run -v -n ${CONDA_ENV} pip install ipykernel
|
||||
${CONDA_ACTIVATE} ${CONDA_ENV} && \
|
||||
which pip && \
|
||||
pip install --upgrade pip && \
|
||||
pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
|
||||
pip install -r src/pipeline/requirements/version_control/requirements.txt && \
|
||||
pre-commit install && \
|
||||
pip install ipykernel
|
||||
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
|
||||
echo "conda activate ${CONDA_ENV}"
|
||||
|
||||
|
|
|
|||
|
|
@ -99,6 +99,12 @@ def generate_scenario_predictions(
|
|||
]
|
||||
)
|
||||
|
||||
# TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
|
||||
if "is_post_sap10_starting" not in scenario_data.columns:
|
||||
scenario_data["is_post_sap10_starting"] = False
|
||||
if "is_post_sap10_ending" not in scenario_data.columns:
|
||||
scenario_data["is_post_sap10_ending"] = False
|
||||
|
||||
logger.info("--- Loading Model ---")
|
||||
|
||||
model.load_model(model_filepath)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ default:
|
|||
time_limit: 1800
|
||||
presets: medium_quality
|
||||
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
|
||||
infer_limit: 0.05
|
||||
infer_limit: 0.0005
|
||||
infer_limit_batch_size: 10000
|
||||
"fit_strategy": "parallel"
|
||||
ag_args_ensemble: {'num_folds_parallel': 2}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ default:
|
|||
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
|
||||
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
|
||||
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
|
||||
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
|
||||
# data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
|
||||
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
|
||||
train_proportion: 0.9
|
||||
output_train_filepath: ./data/prepared_data/train.parquet
|
||||
output_test_filepath: ./data/prepared_data/test.parquet
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
""""
|
||||
""" "
|
||||
Implementations of MLModels, all of which will have four methods to:
|
||||
- Load model
|
||||
- Save Model
|
||||
|
|
@ -152,6 +152,7 @@ class AutogluonAutoML:
|
|||
"infer_limit",
|
||||
"infer_limit_batch_size",
|
||||
"ag_args_ensemble",
|
||||
"fit_strategy",
|
||||
]
|
||||
|
||||
def load_model(self, path: Union[Path, str]) -> None:
|
||||
|
|
@ -209,6 +210,7 @@ class AutogluonAutoML:
|
|||
infer_limit=model_hyperparameters["infer_limit"],
|
||||
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
|
||||
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
|
||||
fit_strategy=model_hyperparameters["fit_strategy"],
|
||||
)
|
||||
|
||||
def predict(
|
||||
|
|
|
|||
|
|
@ -49,18 +49,20 @@ stages:
|
|||
default.feature_processor.feature_processor_config.subsample_seed: 0
|
||||
default.feature_processor.feature_processor_config.target: sap_ending
|
||||
default.feature_processor.feature_processor_type: dataframe
|
||||
default.prepare_data.data_filepath:
|
||||
s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
|
||||
default.prepare_data.data_filepath:
|
||||
s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
|
||||
default.prepare_data.input_dataclient_type: aws-s3
|
||||
default.prepare_data.output_dataclient_type: local
|
||||
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
|
||||
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
|
||||
default.prepare_data.output_test_filepath:
|
||||
./data/prepared_data/test.parquet
|
||||
default.prepare_data.output_train_filepath:
|
||||
./data/prepared_data/train.parquet
|
||||
default.prepare_data.train_proportion: 0.9
|
||||
outs:
|
||||
- path: data/prepared_data/
|
||||
hash: md5
|
||||
md5: 7cc090d55cb296ce5d360d655c06e861.dir
|
||||
size: 46183314
|
||||
md5: 5c56787d9e6450e26a78c15700e104c7.dir
|
||||
size: 45746089
|
||||
nfiles: 2
|
||||
build_model:
|
||||
cmd: python 2_build_model.py
|
||||
|
|
@ -71,8 +73,8 @@ stages:
|
|||
size: 4820
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 7cc090d55cb296ce5d360d655c06e861.dir
|
||||
size: 46183314
|
||||
md5: 5c56787d9e6450e26a78c15700e104c7.dir
|
||||
size: 45746089
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/build_model.yaml:
|
||||
|
|
@ -97,25 +99,26 @@ stages:
|
|||
- NN_TORCH
|
||||
- KNN
|
||||
- XT
|
||||
infer_limit: 0.05
|
||||
infer_limit: 0.0005
|
||||
infer_limit_batch_size: 10000
|
||||
fit_strategy: parallel
|
||||
ag_args_ensemble:
|
||||
num_folds_parallel: 2
|
||||
outs:
|
||||
- path: data/fit_predictions/
|
||||
hash: md5
|
||||
md5: a6196bf08607c43ba6bc637611bb32b0.dir
|
||||
size: 3491001
|
||||
md5: 4fa77e3f129d2e6f9ef7222c44978c26.dir
|
||||
size: 3474669
|
||||
nfiles: 1
|
||||
- path: data/model/
|
||||
hash: md5
|
||||
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
|
||||
size: 790430916
|
||||
nfiles: 36
|
||||
md5: e27b9216bc7455f8245d5b49f27b2707.dir
|
||||
size: 753575768
|
||||
nfiles: 30
|
||||
- path: metrics/fit_metrics.json
|
||||
hash: md5
|
||||
md5: 33421d5e3a2d569dbe6d4486c568a2b7
|
||||
size: 225
|
||||
md5: 426a162284ca9e29c043eb1d72e547e6
|
||||
size: 224
|
||||
generate_predictions:
|
||||
cmd: python 3_generate_predictions.py
|
||||
deps:
|
||||
|
|
@ -125,26 +128,28 @@ stages:
|
|||
size: 2464
|
||||
- path: data/model
|
||||
hash: md5
|
||||
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
|
||||
size: 790430916
|
||||
nfiles: 36
|
||||
md5: e27b9216bc7455f8245d5b49f27b2707.dir
|
||||
size: 753575768
|
||||
nfiles: 30
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 7cc090d55cb296ce5d360d655c06e861.dir
|
||||
size: 46183314
|
||||
md5: 5c56787d9e6450e26a78c15700e104c7.dir
|
||||
size: 45746089
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
default.generate_predictions.input_dataclient_type: local
|
||||
default.generate_predictions.output_dataclient_type: local
|
||||
default.generate_predictions.predictions_column_name: predictions
|
||||
default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
|
||||
default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
|
||||
default.generate_predictions.predictions_output_filepath:
|
||||
./data/predictions/predictions.parquet
|
||||
default.generate_predictions.test_data_filepath:
|
||||
./data/prepared_data/test.parquet
|
||||
outs:
|
||||
- path: data/predictions/
|
||||
hash: md5
|
||||
md5: bd6821db9abc95af8c74aa20effd7f37.dir
|
||||
size: 487194
|
||||
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
|
||||
size: 484524
|
||||
nfiles: 1
|
||||
generate_metrics:
|
||||
cmd: python 4_generate_metrics.py
|
||||
|
|
@ -155,13 +160,13 @@ stages:
|
|||
size: 3484
|
||||
- path: data/predictions
|
||||
hash: md5
|
||||
md5: bd6821db9abc95af8c74aa20effd7f37.dir
|
||||
size: 487194
|
||||
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
|
||||
size: 484524
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 7cc090d55cb296ce5d360d655c06e861.dir
|
||||
size: 46183314
|
||||
md5: 5c56787d9e6450e26a78c15700e104c7.dir
|
||||
size: 45746089
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -171,15 +176,15 @@ stages:
|
|||
outs:
|
||||
- path: metrics/metrics.json
|
||||
hash: md5
|
||||
md5: 9c2a7802554f5c2f750b2242c6003026
|
||||
md5: b9ae6d24424f2d5389697577e9076b91
|
||||
size: 223
|
||||
generate_scenerio_metrics:
|
||||
cmd: python 5_generate_scenarios.py
|
||||
deps:
|
||||
- path: 5_generate_scenarios.py
|
||||
hash: md5
|
||||
md5: 40506749fefd926d47c60ff5b16db307
|
||||
size: 5337
|
||||
md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
|
||||
size: 5658
|
||||
params:
|
||||
configs/scenarios.yaml:
|
||||
default.scenarios:
|
||||
|
|
@ -192,9 +197,9 @@ stages:
|
|||
outs:
|
||||
- path: metrics/scenario_metrics.md
|
||||
hash: md5
|
||||
md5: c01524a0cc2e61151c106d7049af3bf9
|
||||
md5: 32d78c20d91fedf2f5dbb4162f323e25
|
||||
size: 356
|
||||
- path: metrics/scenario_table.md
|
||||
hash: md5
|
||||
md5: a995c8ef7ffbe2ca254441150817ae21
|
||||
md5: 52cbd19566151b0c300f9673252704d2
|
||||
size: 872
|
||||
|
|
|
|||
|
|
@ -1,4 +1,2 @@
|
|||
/fit_metrics.json
|
||||
/metrics.json
|
||||
/scenario_table.md
|
||||
/scenario_metrics.md
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
{"mean_absolute_error": 1.2158480882644653, "median_absolute_error": 0.8539352416992188, "mean_squared_error": 3.116994857788086, "mean_absolute_percentage_error": 0.01968802697956562, "symmetric_mape": 0.019615056540152054}
|
||||
1
modules/ml-pipeline/src/pipeline/metrics/metrics.json
Normal file
1
modules/ml-pipeline/src/pipeline/metrics/metrics.json
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"mean_absolute_error": 2.121211290359497, "median_absolute_error": 1.3063621520996094, "mean_squared_error": 11.15064525604248, "mean_absolute_percentage_error": 0.03622421622276306, "symmetric_mape": 0.035541225671999285}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
joblib==1.3.2
|
||||
boto3==1.28.17
|
||||
pandas==2.1.4
|
||||
autogluon.tabular[all]==1.0.0
|
||||
dynaconf==3.2.1
|
||||
pyarrow==13.0.0
|
||||
pre-commit==3.3.3
|
||||
joblib==1.5.2
|
||||
boto3==1.40.61
|
||||
pandas==2.2.3
|
||||
autogluon.tabular[all]==1.3
|
||||
dynaconf==3.2.12
|
||||
pyarrow==22.0.0
|
||||
pre-commit==4.3.0
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
joblib==1.3.2
|
||||
boto3==1.28.17
|
||||
pandas==2.1.4
|
||||
autogluon.tabular[all]==1.0.0
|
||||
dynaconf==3.2.1
|
||||
pyarrow==13.0.0
|
||||
PyYAML==6.0.1
|
||||
joblib==1.5.2
|
||||
boto3==1.40.61
|
||||
pandas==2.2.3
|
||||
autogluon.tabular[all]==1.3
|
||||
dynaconf==3.2.12
|
||||
pyarrow==22.0.0
|
||||
PyYAML==6.0.3
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
joblib==1.3.2
|
||||
boto3==1.28.17
|
||||
pandas==2.1.4
|
||||
autogluon.tabular[all]==1.0.0
|
||||
ray==2.6.3
|
||||
dynaconf==3.2.1
|
||||
alibi==0.9.5
|
||||
shap==0.42.1
|
||||
pyarrow==13.0.0
|
||||
pre-commit==3.3.3
|
||||
joblib==1.5.2
|
||||
boto3==1.40.61
|
||||
pandas==2.2.3
|
||||
autogluon.tabular[all]==1.3
|
||||
ray==2.44.1
|
||||
dynaconf==3.2.12
|
||||
alibi==0.5.5
|
||||
shap==0.49.1
|
||||
pyarrow==22.0.0
|
||||
pre-commit==4.3.0
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
boto3==1.28.41
|
||||
pandas==2.1.4
|
||||
autogluon.tabular[all]==1.0.0
|
||||
dynaconf==3.2.1
|
||||
boto3==1.40.61
|
||||
pandas==2.2.3
|
||||
autogluon.tabular[all]==1.3
|
||||
dynaconf==3.2.12
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue