Compare commits

...

78 commits

Author SHA1 Message Date
quandanrepo
b8dcf626b2
Merge pull request #117 from Hestia-Homes/sap-dev
Sap dev
2024-05-30 20:18:25 +01:00
Github-Bot
d09c534e0d Update Registry 2024-05-30 11:47:46 +00:00
Github-Bot
9925b54af2 Update Registry 2024-05-30 11:47:04 +00:00
KhalimCK
d307d9e093
Merge pull request #116 from Hestia-Homes/sap-dev-assumed
Sap dev assumed
2024-05-30 12:46:28 +01:00
Michael Duong
1944ea1cf1 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-assumed 2024-05-28 19:59:07 +01:00
Michael Duong
8399092e20 formatting 2024-05-28 19:58:46 +01:00
Github-Bot
dc260fddd0 Update Registry 2024-05-28 15:58:31 +00:00
Github-Bot
6f00d6afb8 Update Registry 2024-05-28 15:57:55 +00:00
quandanrepo
1f0414a905
Merge pull request #115 from Hestia-Homes/sap-dev-assumed
Sap dev assumed
2024-05-28 16:57:22 +01:00
Michael Duong
5e0118ca0b change deployment - pineed serverless pajkage 2024-05-28 16:55:47 +01:00
Michael Duong
7e3a6f7700 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-assumed 2024-05-26 10:46:38 +01:00
Github-Bot
396a5ffb08 Update Registry 2024-05-26 09:08:23 +00:00
Github-Bot
a78c5a50b0 Update Registry 2024-05-26 09:07:46 +00:00
quandanrepo
dc70b84626
Merge pull request #113 from Hestia-Homes/sap-dev-gto
Sap dev gto
2024-05-26 10:07:07 +01:00
Michael Duong
e0954b52bc Upgrade dvc packages to fix pygit2 error 2024-05-26 09:56:05 +01:00
Michael Duong
9e23eae27a add testing script 2024-05-26 09:54:22 +01:00
Michael Duong
0768ace947 add testing script 2024-05-26 09:50:39 +01:00
Michael Duong
4ff7cfb271 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-gto 2024-05-26 09:47:23 +01:00
Michael Duong
a4dffe527a add testing script 2024-05-26 09:47:08 +01:00
quandanrepo
8adfa72036
Merge pull request #111 from Hestia-Homes/sap-dev-package
Sap dev package
2024-05-26 09:31:46 +01:00
Michael Duong
29b350e33b Merge branch 'master' of github.com:Hestia-Homes/ML into sap-dev-assumed 2024-05-26 09:28:16 +01:00
Michael Duong
b985bbf753 new model with is_as_built_ending and is assumed columns 2024-05-26 09:28:00 +01:00
Michael Duong
f43d077479 use previous model with new downstream processes 2024-04-22 19:10:40 +01:00
Michael Duong
52f33239f4 Merge branch 'sap-dev-package' of github.com:Hestia-Homes/ML into sap-dev-package 2024-04-22 19:02:13 +01:00
Michael Duong
874b1db5f3 add ignored file to dockerignore 2024-04-22 19:01:56 +01:00
Michael Duong
7a3477c0e1 change to all packages 2024-04-22 13:30:58 +01:00
Michael Duong
87e3cc391e push files to s3 2024-04-19 17:48:15 +01:00
Michael Duong
380bd6b595 correct the dockerignore files and test model with just tabular 2024-04-19 17:34:10 +01:00
Michael Duong
50a3e2d5be correct the dockerignore files and test model with just tabular 2024-04-19 16:25:26 +01:00
Michael Duong
620c1d10a1 correct the dockerignore files and test model with just tabular 2024-04-19 16:22:06 +01:00
Michael Duong
179c334b6e add switch to turn off scenario data (for carbon and heat for now) 2024-04-19 14:38:57 +01:00
quandanrepo
502621e434
Merge pull request #110 from Hestia-Homes/sap-dev
Sap dev
2024-04-19 14:36:45 +01:00
Github-Bot
e97c01c366 Update Registry 2024-03-28 15:23:18 +00:00
Github-Bot
94a6aaa38f Update Registry 2024-03-28 15:22:33 +00:00
quandanrepo
98254555a1
Merge pull request #108 from Hestia-Homes/sap-dev-model
add c++ to docker, fixed dynaconf
2024-03-28 15:21:31 +00:00
Michael Duong
7aeaa9a5f6 add c++ to docker, fixed dynaconf 2024-03-28 15:13:20 +00:00
Github-Bot
a7bb61433a Update Registry 2024-03-28 09:31:07 +00:00
Github-Bot
64a5c93833 Update Registry 2024-03-28 09:30:30 +00:00
KhalimCK
e746352977
Merge pull request #104 from Hestia-Homes/sap-dev-model
Sap dev model
2024-03-28 09:29:53 +00:00
Michael Duong
1bb1f8d61f add metrics for scenarios 2024-03-27 12:30:31 +00:00
Michael Duong
c3985e2104 add metrics for scenarios 2024-03-27 12:22:58 +00:00
Michael Duong
9b6aeae0da medium model with scenario and upgraded autogluon 2024-03-26 22:32:44 +00:00
Michael Duong
96f5b37001 medium model with scenario and upgraded autogluon 2024-03-26 22:32:14 +00:00
Michael Duong
8a9b5877b5 medium model with scenario and upgraded autogluon 2024-03-26 22:30:50 +00:00
Michael Duong
ad2c4d6019 upgrade autogluon 2024-03-21 14:41:58 +00:00
Michael Duong
d5f40a8eb2 only ending 2024-02-17 21:17:34 +00:00
Michael Duong
cec3cc60e7 test less features 2024-02-17 16:26:49 +00:00
Michael Duong
81e7c2a4bd test this version 2024-02-16 16:57:37 +00:00
Michael Duong
fe430c4326 test this version 2024-02-16 16:54:18 +00:00
Michael Duong
49e66411ce test this version 2024-02-16 16:51:43 +00:00
Michael Duong
fdbf339d63 try the scenario cml 2024-02-16 16:44:43 +00:00
Michael Duong
2221283de4 try the scenario cml 2024-02-16 16:43:23 +00:00
Github-Bot
7f2f80af22 Update Registry 2024-02-16 16:36:38 +00:00
Github-Bot
99e883584b Update Registry 2024-02-16 16:35:54 +00:00
KhalimCK
3ee352b719
Merge pull request #103 from Hestia-Homes/sap-dev-revert
revert change on sap-dev-model
2024-02-16 16:35:18 +00:00
Michael Duong
0e2bff9d64 revert changes 2024-02-16 16:30:13 +00:00
Michael Duong
e060aeb4c0 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-revert 2024-02-16 16:25:57 +00:00
Michael Duong
a9b50c8a2d revert change on sap-dev-model 2024-02-16 16:23:37 +00:00
Github-Bot
6e76716fbc Update Registry 2024-02-16 14:52:15 +00:00
Github-Bot
86352ce0ce Update Registry 2024-02-16 14:51:31 +00:00
KhalimCK
33d0f6b323
Merge pull request #102 from Hestia-Homes/sap-dev-model
Sap dev model
2024-02-16 14:50:51 +00:00
Michael Duong
8363d5f0de Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-model 2024-02-15 15:11:08 +00:00
Michael Duong
603dfe2eab new model with starting and ending rooms 2024-02-15 15:10:49 +00:00
Github-Bot
babbc155e9 Update Registry 2024-02-12 18:34:09 +00:00
Github-Bot
d21fd1c4e8 Update Registry 2024-02-12 18:33:28 +00:00
KhalimCK
6815cfcc66
Merge pull request #101 from Hestia-Homes/sap-dev-model
Sap dev model
2024-02-12 18:32:38 +00:00
Michael Duong
fedcd1ed92 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-model 2024-02-10 12:30:52 +00:00
Michael Duong
eeb653c041 new model 2024-02-10 11:03:38 +00:00
Github-Bot
8a1e2958b4 Update Registry 2024-02-09 18:54:16 +00:00
Github-Bot
bc44376e07 Update Registry 2024-02-09 18:53:22 +00:00
KhalimCK
89604645d5
Merge pull request #99 from Hestia-Homes/sap-dev-model
Sap dev model
2024-02-09 18:52:45 +00:00
Michael Duong
1e36d6e4f6 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-model 2024-02-09 18:46:33 +00:00
Michael Duong
778bff37fb 4000 model 2024-02-09 18:46:19 +00:00
Github-Bot
f17119382b Update Registry 2024-02-09 16:27:45 +00:00
Github-Bot
a98fc9d93a Update Registry 2024-02-09 16:27:01 +00:00
KhalimCK
051921ff3f
Merge pull request #97 from Hestia-Homes/sap-dev-model
Sap dev model
2024-02-09 16:26:24 +00:00
Michael Duong
18ea4a2177 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-model 2024-02-09 16:20:02 +00:00
Michael Duong
f92c97f6cf drop days_starting and days_ending 2024-02-09 16:19:47 +00:00
25 changed files with 378 additions and 89 deletions

9
.dockerignore Normal file
View file

@ -0,0 +1,9 @@
modules/ml-pipeline/src/pipeline/data/predictions
modules/ml-pipeline/src/pipeline/data/fit_predictions
modules/ml-pipeline/src/pipeline/data/prepared_data
modules/ml-pipeline/src/pipeline/data/model/allmodels
modules/ml-pipeline/src/pipeline/metrics
modules/ml-pipeline/src/pipeline/__pycache__
modules/ml-pipeline/src/pipeline/.dvc
modules/ml-pipeline/src/pipeline/analysis
modules/ml-pipeline/src/pipeline/metrics

View file

@ -19,8 +19,8 @@ jobs:
- name: Install Serverless and plugins
run: |
npm install -g serverless
npm install -g serverless-domain-manager
npm install -g serverless@^3.38.0
npm install -g serverless-domain-manager@^7.3.8
- name: Install DVC
run: |

View file

@ -98,6 +98,16 @@ jobs:
git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
echo "## Scenario comparison" >> report.md
cat metrics/scenario_table.md >> report.md
echo "" >> report.md
echo "## Scenario metrics" >> report.md
cat metrics/scenario_metrics.md >> report.md
cml comment create report.md
# echo "## Residuals plot from model" >> report.md

View file

@ -8,6 +8,14 @@
"active": true
},
"sap": {
"version": "v0.14.0",
"stage": {
"dev": "v0.14.0"
},
"registered": true,
"active": true
},
"heat": {
"version": "v0.5.0",
"stage": {
"dev": "v0.5.0"
@ -15,18 +23,10 @@
"registered": true,
"active": true
},
"heat": {
"version": "v0.3.0",
"stage": {
"dev": "v0.3.0"
},
"registered": true,
"active": true
},
"carbon": {
"version": "v0.3.0",
"version": "v0.5.0",
"stage": {
"dev": "v0.3.0"
"dev": "v0.5.0"
},
"registered": true,
"active": true

View file

@ -1,4 +1,9 @@
modules/ml-pipeline/src/pipeline/data/predictions*
modules/ml-pipeline/src/pipeline/data/prepared_data*
modules/ml-pipeline/src/pipeline/data/model/allmodels*
modules/ml-pipeline/src/pipeline/metrics*
modules/ml-pipeline/src/pipeline/data/predictions
modules/ml-pipeline/src/pipeline/data/fit_predictions
modules/ml-pipeline/src/pipeline/data/prepared_data
modules/ml-pipeline/src/pipeline/data/model/allmodels
modules/ml-pipeline/src/pipeline/metrics
modules/ml-pipeline/src/__pycache__
modules/ml-pipeline/src/.dvc
modules/ml-pipeline/src/analysis
modules/ml-pipeline/src/metrics

View file

@ -9,7 +9,7 @@ ARG RUNTIME_ENVIRONMENT
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
# Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel
RUN yum install -y gcc python3-devel gcc-c++
# Install python packages
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt

View file

@ -1,4 +1,8 @@
pipeline/data/predictions*
pipeline/data/prepared_data/train.parquet*
pipeline/data/model/allmodels*
pipeline/metrics*
pipeline/data/predictions
pipeline/data/fit_predictions
pipeline/data/prepared_data/train.parquet
pipeline/data/fit_predictions
pipeline/data/model/allmodels
pipeline/metrics
pipeline/.dvc
pipeline/analysis

View file

@ -1,7 +1,7 @@
# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
FROM python:3.10.12-slim
RUN apt-get update && apt-get install -y libgomp1
RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
COPY pipeline/requirements/predictions/requirements.txt requirements.txt

View file

@ -0,0 +1,162 @@
"""
Fourth part of the pipeline:
After the model is built and metrics are generated,
we want to test this model against known scenarios
"""
import os
import pandas as pd
from core.interface.InterfaceModels import MLModel
from core.interface.InterfaceDataClient import DataClient
from core.interface.InterfaceMetrics import MLMetrics
from configs.post_prediction_logic import post_prediction_logic
from core.DataClient import dataclient_factory
from core.MLModels import model_factory
from core.MLMetrics import metrics_factory
from core.Logger import logger
from config import settings
logger.info(f"--- Initiate Parameters ---")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
client_params = settings.client
prepare_data_params = settings.prepare_data
build_model_params = settings.build_model
generate_predictions_params = settings.generate_predictions
generate_metrics_params = settings.generate_metrics
feature_process_params = settings.feature_processor
scenarios_params = settings.scenarios
model_filepath = build_model_params["model_save_filepath"]
target = feature_process_params["feature_processor_config"]["target"]
scenario_data_filepaths = scenarios_params["scenario_data_filepaths"]
predictions_column_name = generate_predictions_params["predictions_column_name"]
comparison_output_filepath = scenarios_params["comparison_output_filepath"]
metrics_output_filepath = scenarios_params["metrics_output_filepath"]
logger.info(f"--- Initiate MLModel ---")
model = model_factory(build_model_params["model_type"])
logger.info(f"--- Initiate DataClient ---")
# Use data client for input and output, as we use dvc to cache later to the cloud
input_dataclient_type = scenarios_params["input_dataclient_type"]
input_dataclient = dataclient_factory(
dataclient_type=input_dataclient_type,
dataclient_config=client_params[input_dataclient_type],
)
output_dataclient_type = scenarios_params["output_dataclient_type"]
output_dataclient = dataclient_factory(
dataclient_type=output_dataclient_type,
dataclient_config=client_params[output_dataclient_type],
)
logger.info(f"--- Initiate MLMetrics ---")
metrics = metrics_factory(generate_metrics_params["metrics_type"])
def generate_scenario_predictions(
input_dataclient: DataClient,
output_dataclient: DataClient,
model: MLModel,
metrics: MLMetrics,
model_filepath: str,
scenario_data_filepaths: list,
predictions_column_name: str,
comparison_output_filepath: str,
metrics_output_filepath: str,
):
"""
Given the new model, we generate prediction for expected scenarios
"""
logger.info("--- Loading Scenario Data ---")
scenario_data = pd.DataFrame()
# If we have no scenario data, we can save empty dataframes
if scenario_data_filepaths is None:
logger.info("No scenario data filepaths provided")
output_dataclient.save_data(
obj=scenario_data, location=comparison_output_filepath, save_config=None
)
output_dataclient.save_data(
obj=scenario_data, location=metrics_output_filepath, save_config=None
)
return
# Can have multiple scenario data files
for scenario_data_filepath in scenario_data_filepaths:
scenario_data = pd.concat(
[
scenario_data,
input_dataclient.load_data(scenario_data_filepath, load_config=None),
]
)
logger.info("--- Loading Model ---")
model.load_model(model_filepath)
logger.info("--- Generating Predictions ---")
predictions = model.predict(
data=scenario_data, post_prediction_logic=post_prediction_logic
)
logger.info("--- Generate Scenario Predicted Impact ---")
predictions_df = pd.DataFrame(predictions)
predictions_df.columns = [predictions_column_name]
scenario_data = pd.concat([scenario_data, predictions_df], axis=1)
scenario_data["predicted_impact"] = abs(
scenario_data[predictions_column_name] - scenario_data["sap_starting"]
)
logger.info("--- Generate Metrics ---")
metrics_dict = metrics.generate_metrics(
scenario_data["impact"], scenario_data["predicted_impact"]
)
metrics_df = pd.DataFrame(metrics_dict, index=[0]).T.reset_index()
metrics_df.columns = ["metric", "value"]
logger.info("--- Save prediction into metrics ---")
output_df = scenario_data[["uprn", "id", "impact", "predicted_impact"]]
output_dataclient.save_data(
obj=output_df, location=comparison_output_filepath, save_config=None
)
output_dataclient.save_data(
obj=metrics_df, location=metrics_output_filepath, save_config=None
)
if __name__ == "__main__":
logger.info(f"--- {__file__} - Start! ---")
logger.info(f"--- Generate Scenario Predictions ---")
generate_scenario_predictions(
input_dataclient=input_dataclient,
output_dataclient=output_dataclient,
model=model,
metrics=metrics,
model_filepath=model_filepath,
scenario_data_filepaths=scenario_data_filepaths,
predictions_column_name=predictions_column_name,
comparison_output_filepath=comparison_output_filepath,
metrics_output_filepath=metrics_output_filepath,
)
logger.info(f"--- {__file__} - Complete! ---")

View file

@ -37,3 +37,4 @@ Workflow:
- This experiment will have the corresponding .dvc files for the hashed model and data
- Use version control as normal
- git add, git commit etc
- To revert change, use `git checkout {COMMIT_HASH}`, followed by `git switch -c {NEW_BRANCH_NAME}`

View file

@ -7,6 +7,7 @@ settings = Dynaconf(
"./configs/settings.yaml",
"./configs/build_model.yaml",
"./configs/analysis.yaml",
"./configs/scenarios.yaml",
],
)

View file

@ -14,8 +14,9 @@ default:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error
time_limit: 4000
time_limit: 1800
presets: medium_quality
excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT']
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.05
infer_limit_batch_size: 10000
ag_args_ensemble: {'num_folds_parallel': 2}

View file

@ -0,0 +1,13 @@
default:
scenarios:
input_dataclient_type: aws-s3
output_dataclient_type: local
scenario_data_filepaths:
# - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -18,13 +18,11 @@ default:
prepare_data:
input_dataclient_type: aws-s3
output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_with_differencing.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/floor_area_clean_test.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
train_proportion: 1
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet
@ -35,9 +33,35 @@ default:
subsample_seed: 0
target: sap_ending
identifier_columns: ["uprn"]
drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending"]
# retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
drop_columns: [
"heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms']
retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
# 'walls_energy_eff_ending', 'secondheat_description_ending',
# 'property_type', 'mainheatc_energy_eff_ending', 'built_form',
# 'walls_insulation_thickness_ending', 'potential_energy_efficiency',
# 'transaction_type_ending',
# 'floor_thermal_transmittance_ending',
# 'low_energy_lighting_ending', 'heat_demand_starting',
# 'photo_supply_ending', 'carbon_starting',
# 'walls_thermal_transmittance_ending',
# 'roof_insulation_thickness_ending',
# 'total_floor_area_ending', 'number_open_fireplaces_ending',
# 'windows_energy_eff_ending',
# 'floor_height_ending',
# 'extension_count_ending',
# 'has_air_source_heat_pump_ending',
# 'charging_system_ending', 'construction_age_band', 'glazed_type_ending',
# 'roof_thermal_transmittance_ending',
# 'floor_insulation_thickness_ending', 'has_mains_gas_ending',
# 'estimated_perimeter_starting', 'energy_consumption_potential',
# 'environment_impact_potential', 'heater_type_ending',
# 'multi_glaze_proportion_ending',
# 'lighting_energy_eff_ending', 'fixed_lighting_outlets_count']
generate_predictions:
input_dataclient_type: local

View file

@ -245,7 +245,8 @@ class LocalClient:
save_methods = {
".parquet": self._save_parquet,
".json": self._save_json
".json": self._save_json,
".md": self._save_md,
# "": _save_directory(**save_config),
# ADD MORE save_methods HERE
}
@ -294,3 +295,10 @@ class LocalClient:
# Write the contents of the buffer to the local file
with open(location, "wb") as f:
f.write(buffer.getvalue())
def _save_md(self, obj: pd.DataFrame, location: str, save_config: dict):
"""
Save object as markdown
"""
obj.to_markdown(location, **save_config)

View file

@ -25,7 +25,7 @@ def model_factory(model_type: str) -> MLModel:
models = {
"SKLearnLinearRegression": SKLearnLinearRegression(),
"SKLearnSVMRegression": SKLearnSVMRegression(),
"AutogluonAutoML": AutogluonAutoML()
"AutogluonAutoML": AutogluonAutoML(),
# ADD OTHER MODELS HERE
}
@ -151,6 +151,7 @@ class AutogluonAutoML:
"excluded_model_types",
"infer_limit",
"infer_limit_batch_size",
"ag_args_ensemble",
]
def load_model(self, path: Union[Path, str]) -> None:
@ -207,6 +208,7 @@ class AutogluonAutoML:
excluded_model_types=model_hyperparameters["excluded_model_types"],
infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
)
def predict(

View file

@ -1,12 +1,23 @@
schema: '2.0'
stages:
startup_cleanup:
cmd: python 0_startup_cleanup.py
deps:
- path: 0_startup_cleanup.py
hash: md5
md5: b1b12f6b6393fbf8b83d23684df0a3d4
size: 1220
params:
configs/settings.yaml:
default.startup_cleanup.artefacts: ./data
default.startup_cleanup.metrics: ./metrics
prepare_data:
cmd: python 1_prepare_data.py
deps:
- path: 1_prepare_data.py
hash: md5
md5: 1793a35e71751d3c84f9affc67ecb9a8
size: 4296
md5: 11a3b8bfdfe199ab7ecc39ccc5652649
size: 4298
params:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns:
@ -15,22 +26,31 @@ stages:
- rdsap_change
- heat_demand_ending
- carbon_ending
- days_to_starting
- days_to_ending
- number_habitable_rooms_starting
- number_habitable_rooms_ending
- number_heated_rooms_starting
- number_heated_rooms_ending
- number_habitable_rooms
- number_heated_rooms
default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 1
default.prepare_data.train_proportion: 0.9
outs:
- path: data/prepared_data/
hash: md5
md5: 84fa631bd02686b052d6a7144eafd38e.dir
size: 43859225
md5: 80c9e138146a1d96b9d16091c207e2e8.dir
size: 45056059
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -41,8 +61,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 84fa631bd02686b052d6a7144eafd38e.dir
size: 43859225
md5: 80c9e138146a1d96b9d16091c207e2e8.dir
size: 45056059
nfiles: 2
params:
configs/build_model.yaml:
@ -59,32 +79,33 @@ stages:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error
time_limit: 4000
time_limit: 1800
presets: medium_quality
excluded_model_types:
- RF
- FASTAI
- CAT
- NN_TORCH
- KNN
- XT
infer_limit: 0.05
infer_limit_batch_size: 10000
ag_args_ensemble:
num_folds_parallel: 2
outs:
- path: data/fit_predictions/
hash: md5
md5: ede187e9d0bffdef054f573f3c2bd222.dir
size: 3578590
md5: d9c9afc05e8780db47c0548b19bf7d19.dir
size: 3349989
nfiles: 1
- path: data/model/
hash: md5
md5: b2ad0b538dc4aef0de3d431fc9c40c4f.dir
size: 814720415
nfiles: 31
md5: 13c3100e1486c27a83a8a47491077842.dir
size: 773523079
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: c45b84f12971a0156e4f3d85d3e725f5
size: 218
md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a
size: 224
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -94,13 +115,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: b2ad0b538dc4aef0de3d431fc9c40c4f.dir
size: 814720415
nfiles: 31
md5: 13c3100e1486c27a83a8a47491077842.dir
size: 773523079
nfiles: 36
- path: data/prepared_data
hash: md5
md5: 84fa631bd02686b052d6a7144eafd38e.dir
size: 43859225
md5: 80c9e138146a1d96b9d16091c207e2e8.dir
size: 45056059
nfiles: 2
params:
configs/settings.yaml:
@ -112,8 +133,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 5e60ca251af51de6fef3d0c659f8bb27.dir
size: 627416
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
size: 463197
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -124,13 +145,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 5e60ca251af51de6fef3d0c659f8bb27.dir
size: 627416
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
size: 463197
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 84fa631bd02686b052d6a7144eafd38e.dir
size: 43859225
md5: 80c9e138146a1d96b9d16091c207e2e8.dir
size: 45056059
nfiles: 2
params:
configs/settings.yaml:
@ -140,16 +161,30 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 033efa4d4044b6b6fc92dd37194727fa
size: 225
startup_cleanup:
cmd: python 0_startup_cleanup.py
md5: 3e08df02fd5c5d094bcf936e1338d596
size: 223
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
- path: 0_startup_cleanup.py
- path: 5_generate_scenarios.py
hash: md5
md5: b1b12f6b6393fbf8b83d23684df0a3d4
size: 1220
md5: 40506749fefd926d47c60ff5b16db307
size: 5337
params:
configs/settings.yaml:
default.startup_cleanup.artefacts: ./data
default.startup_cleanup.metrics: ./metrics
configs/scenarios.yaml:
default.scenarios:
input_dataclient_type: aws-s3
output_dataclient_type: local
scenario_data_filepaths:
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: fa4d6d7bbd7818613800da5f8f37ea96
size: 363
- path: metrics/scenario_table.md
hash: md5
md5: d6baf100a1623cc2467c2f8221d314c9
size: 2133

View file

@ -71,6 +71,17 @@ stages:
outs:
- metrics/metrics.json
always_changed: true
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
- 5_generate_scenarios.py
params:
- configs/scenarios.yaml:
- default.scenarios
outs:
- metrics/scenario_table.md
- metrics/scenario_metrics.md
always_changed: true
metrics:
- metrics/metrics.json
- metrics/fit_metrics.json

View file

@ -1,2 +1,4 @@
/fit_metrics.json
/metrics.json
/scenario_table.md
/scenario_metrics.md

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
dynaconf==3.2.0
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1
pyarrow==13.0.0
pre-commit==3.3.3

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
dynaconf==3.2.0
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1
pyarrow==13.0.0
PyYAML==6.0.1

View file

@ -1,9 +1,10 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
dynaconf==3.2.0
alibi==0.9.4
pandas==2.1.4
autogluon.tabular[all]==1.0.0
ray==2.6.3
dynaconf==3.2.1
alibi==0.9.5
shap==0.42.1
pyarrow==13.0.0
pre-commit==3.3.3

View file

@ -1,4 +1,4 @@
boto3==1.28.41
pandas==1.5.3
autogluon==0.8.2
dynaconf==3.2.0
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1

View file

@ -1,4 +1,4 @@
dvc==3.36.0
dvc-s3==3.0.1
gto==1.6.1
dvc==3.51.0
dvc-s3==3.2.0
gto==1.7.1
pyOpenSSL==23.3.0