Compare commits

..

No commits in common. "master" and "sap@v0.6.0" have entirely different histories.

25 changed files with 83 additions and 370 deletions

View file

@ -1,9 +0,0 @@
modules/ml-pipeline/src/pipeline/data/predictions
modules/ml-pipeline/src/pipeline/data/fit_predictions
modules/ml-pipeline/src/pipeline/data/prepared_data
modules/ml-pipeline/src/pipeline/data/model/allmodels
modules/ml-pipeline/src/pipeline/metrics
modules/ml-pipeline/src/pipeline/__pycache__
modules/ml-pipeline/src/pipeline/.dvc
modules/ml-pipeline/src/pipeline/analysis
modules/ml-pipeline/src/pipeline/metrics

View file

@ -19,8 +19,8 @@ jobs:
- name: Install Serverless and plugins - name: Install Serverless and plugins
run: | run: |
npm install -g serverless@^3.38.0 npm install -g serverless
npm install -g serverless-domain-manager@^7.3.8 npm install -g serverless-domain-manager
- name: Install DVC - name: Install DVC
run: | run: |

View file

@ -98,16 +98,6 @@ jobs:
git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH} git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
echo "## Scenario comparison" >> report.md
cat metrics/scenario_table.md >> report.md
echo "" >> report.md
echo "## Scenario metrics" >> report.md
cat metrics/scenario_metrics.md >> report.md
cml comment create report.md cml comment create report.md
# echo "## Residuals plot from model" >> report.md # echo "## Residuals plot from model" >> report.md

View file

@ -8,25 +8,25 @@
"active": true "active": true
}, },
"sap": { "sap": {
"version": "v0.14.0", "version": "v0.5.0",
"stage": { "stage": {
"dev": "v0.14.0" "dev": "v0.5.0"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"heat": { "heat": {
"version": "v0.5.0", "version": "v0.3.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.3.0"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"carbon": { "carbon": {
"version": "v0.5.0", "version": "v0.3.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.3.0"
}, },
"registered": true, "registered": true,
"active": true "active": true

View file

@ -1,9 +1,4 @@
modules/ml-pipeline/src/pipeline/data/predictions modules/ml-pipeline/src/pipeline/data/predictions*
modules/ml-pipeline/src/pipeline/data/fit_predictions modules/ml-pipeline/src/pipeline/data/prepared_data*
modules/ml-pipeline/src/pipeline/data/prepared_data modules/ml-pipeline/src/pipeline/data/model/allmodels*
modules/ml-pipeline/src/pipeline/data/model/allmodels modules/ml-pipeline/src/pipeline/metrics*
modules/ml-pipeline/src/pipeline/metrics
modules/ml-pipeline/src/__pycache__
modules/ml-pipeline/src/.dvc
modules/ml-pipeline/src/analysis
modules/ml-pipeline/src/metrics

View file

@ -9,7 +9,7 @@ ARG RUNTIME_ENVIRONMENT
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT} ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
# Install necessary build tools - required to test locally # Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel gcc-c++ RUN yum install -y gcc python3-devel
# Install python packages # Install python packages
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt

View file

@ -1,8 +1,4 @@
pipeline/data/predictions pipeline/data/predictions*
pipeline/data/fit_predictions pipeline/data/prepared_data/train.parquet*
pipeline/data/prepared_data/train.parquet pipeline/data/model/allmodels*
pipeline/data/fit_predictions pipeline/metrics*
pipeline/data/model/allmodels
pipeline/metrics
pipeline/.dvc
pipeline/analysis

View file

@ -1,7 +1,7 @@
# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow) # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
FROM python:3.10.12-slim FROM python:3.10.12-slim
RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev RUN apt-get update && apt-get install -y libgomp1
COPY pipeline/requirements/predictions/requirements.txt requirements.txt COPY pipeline/requirements/predictions/requirements.txt requirements.txt

View file

@ -1,162 +0,0 @@
"""
Fourth part of the pipeline:
After the model is built and metrics are generated,
we want to test this model against known scenarios
"""
import os
import pandas as pd
from core.interface.InterfaceModels import MLModel
from core.interface.InterfaceDataClient import DataClient
from core.interface.InterfaceMetrics import MLMetrics
from configs.post_prediction_logic import post_prediction_logic
from core.DataClient import dataclient_factory
from core.MLModels import model_factory
from core.MLMetrics import metrics_factory
from core.Logger import logger
from config import settings
logger.info(f"--- Initiate Parameters ---")
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
client_params = settings.client
prepare_data_params = settings.prepare_data
build_model_params = settings.build_model
generate_predictions_params = settings.generate_predictions
generate_metrics_params = settings.generate_metrics
feature_process_params = settings.feature_processor
scenarios_params = settings.scenarios
model_filepath = build_model_params["model_save_filepath"]
target = feature_process_params["feature_processor_config"]["target"]
scenario_data_filepaths = scenarios_params["scenario_data_filepaths"]
predictions_column_name = generate_predictions_params["predictions_column_name"]
comparison_output_filepath = scenarios_params["comparison_output_filepath"]
metrics_output_filepath = scenarios_params["metrics_output_filepath"]
logger.info(f"--- Initiate MLModel ---")
model = model_factory(build_model_params["model_type"])
logger.info(f"--- Initiate DataClient ---")
# Use data client for input and output, as we use dvc to cache later to the cloud
input_dataclient_type = scenarios_params["input_dataclient_type"]
input_dataclient = dataclient_factory(
dataclient_type=input_dataclient_type,
dataclient_config=client_params[input_dataclient_type],
)
output_dataclient_type = scenarios_params["output_dataclient_type"]
output_dataclient = dataclient_factory(
dataclient_type=output_dataclient_type,
dataclient_config=client_params[output_dataclient_type],
)
logger.info(f"--- Initiate MLMetrics ---")
metrics = metrics_factory(generate_metrics_params["metrics_type"])
def generate_scenario_predictions(
input_dataclient: DataClient,
output_dataclient: DataClient,
model: MLModel,
metrics: MLMetrics,
model_filepath: str,
scenario_data_filepaths: list,
predictions_column_name: str,
comparison_output_filepath: str,
metrics_output_filepath: str,
):
"""
Given the new model, we generate prediction for expected scenarios
"""
logger.info("--- Loading Scenario Data ---")
scenario_data = pd.DataFrame()
# If we have no scenario data, we can save empty dataframes
if scenario_data_filepaths is None:
logger.info("No scenario data filepaths provided")
output_dataclient.save_data(
obj=scenario_data, location=comparison_output_filepath, save_config=None
)
output_dataclient.save_data(
obj=scenario_data, location=metrics_output_filepath, save_config=None
)
return
# Can have multiple scenario data files
for scenario_data_filepath in scenario_data_filepaths:
scenario_data = pd.concat(
[
scenario_data,
input_dataclient.load_data(scenario_data_filepath, load_config=None),
]
)
logger.info("--- Loading Model ---")
model.load_model(model_filepath)
logger.info("--- Generating Predictions ---")
predictions = model.predict(
data=scenario_data, post_prediction_logic=post_prediction_logic
)
logger.info("--- Generate Scenario Predicted Impact ---")
predictions_df = pd.DataFrame(predictions)
predictions_df.columns = [predictions_column_name]
scenario_data = pd.concat([scenario_data, predictions_df], axis=1)
scenario_data["predicted_impact"] = abs(
scenario_data[predictions_column_name] - scenario_data["sap_starting"]
)
logger.info("--- Generate Metrics ---")
metrics_dict = metrics.generate_metrics(
scenario_data["impact"], scenario_data["predicted_impact"]
)
metrics_df = pd.DataFrame(metrics_dict, index=[0]).T.reset_index()
metrics_df.columns = ["metric", "value"]
logger.info("--- Save prediction into metrics ---")
output_df = scenario_data[["uprn", "id", "impact", "predicted_impact"]]
output_dataclient.save_data(
obj=output_df, location=comparison_output_filepath, save_config=None
)
output_dataclient.save_data(
obj=metrics_df, location=metrics_output_filepath, save_config=None
)
if __name__ == "__main__":
logger.info(f"--- {__file__} - Start! ---")
logger.info(f"--- Generate Scenario Predictions ---")
generate_scenario_predictions(
input_dataclient=input_dataclient,
output_dataclient=output_dataclient,
model=model,
metrics=metrics,
model_filepath=model_filepath,
scenario_data_filepaths=scenario_data_filepaths,
predictions_column_name=predictions_column_name,
comparison_output_filepath=comparison_output_filepath,
metrics_output_filepath=metrics_output_filepath,
)
logger.info(f"--- {__file__} - Complete! ---")

View file

@ -37,4 +37,3 @@ Workflow:
- This experiment will have the corresponding .dvc files for the hashed model and data - This experiment will have the corresponding .dvc files for the hashed model and data
- Use version control as normal - Use version control as normal
- git add, git commit etc - git add, git commit etc
- To revert change, use `git checkout {COMMIT_HASH}`, followed by `git switch -c {NEW_BRANCH_NAME}`

View file

@ -7,7 +7,6 @@ settings = Dynaconf(
"./configs/settings.yaml", "./configs/settings.yaml",
"./configs/build_model.yaml", "./configs/build_model.yaml",
"./configs/analysis.yaml", "./configs/analysis.yaml",
"./configs/scenarios.yaml",
], ],
) )

View file

@ -14,9 +14,8 @@ default:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error eval_metric: mean_squared_error #mean_absolute_error
time_limit: 1800 time_limit: 400
presets: medium_quality presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT'] excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.05 infer_limit: 0.05
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
ag_args_ensemble: {'num_folds_parallel': 2}

View file

@ -1,13 +0,0 @@
default:
scenarios:
input_dataclient_type: aws-s3
output_dataclient_type: local
scenario_data_filepaths:
# - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -18,11 +18,13 @@ default:
prepare_data: prepare_data:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_with_differencing.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/floor_area_clean_test.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
train_proportion: 0.9 # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
train_proportion: 1
output_train_filepath: ./data/prepared_data/train.parquet output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet output_test_filepath: ./data/prepared_data/test.parquet
@ -33,35 +35,9 @@ default:
subsample_seed: 0 subsample_seed: 0
target: sap_ending target: sap_ending
identifier_columns: ["uprn"] identifier_columns: ["uprn"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
drop_columns: [ # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
"heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms']
retain_features: null retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
# 'walls_energy_eff_ending', 'secondheat_description_ending',
# 'property_type', 'mainheatc_energy_eff_ending', 'built_form',
# 'walls_insulation_thickness_ending', 'potential_energy_efficiency',
# 'transaction_type_ending',
# 'floor_thermal_transmittance_ending',
# 'low_energy_lighting_ending', 'heat_demand_starting',
# 'photo_supply_ending', 'carbon_starting',
# 'walls_thermal_transmittance_ending',
# 'roof_insulation_thickness_ending',
# 'total_floor_area_ending', 'number_open_fireplaces_ending',
# 'windows_energy_eff_ending',
# 'floor_height_ending',
# 'extension_count_ending',
# 'has_air_source_heat_pump_ending',
# 'charging_system_ending', 'construction_age_band', 'glazed_type_ending',
# 'roof_thermal_transmittance_ending',
# 'floor_insulation_thickness_ending', 'has_mains_gas_ending',
# 'estimated_perimeter_starting', 'energy_consumption_potential',
# 'environment_impact_potential', 'heater_type_ending',
# 'multi_glaze_proportion_ending',
# 'lighting_energy_eff_ending', 'fixed_lighting_outlets_count']
generate_predictions: generate_predictions:
input_dataclient_type: local input_dataclient_type: local

View file

@ -245,8 +245,7 @@ class LocalClient:
save_methods = { save_methods = {
".parquet": self._save_parquet, ".parquet": self._save_parquet,
".json": self._save_json, ".json": self._save_json
".md": self._save_md,
# "": _save_directory(**save_config), # "": _save_directory(**save_config),
# ADD MORE save_methods HERE # ADD MORE save_methods HERE
} }
@ -295,10 +294,3 @@ class LocalClient:
# Write the contents of the buffer to the local file # Write the contents of the buffer to the local file
with open(location, "wb") as f: with open(location, "wb") as f:
f.write(buffer.getvalue()) f.write(buffer.getvalue())
def _save_md(self, obj: pd.DataFrame, location: str, save_config: dict):
"""
Save object as markdown
"""
obj.to_markdown(location, **save_config)

View file

@ -25,7 +25,7 @@ def model_factory(model_type: str) -> MLModel:
models = { models = {
"SKLearnLinearRegression": SKLearnLinearRegression(), "SKLearnLinearRegression": SKLearnLinearRegression(),
"SKLearnSVMRegression": SKLearnSVMRegression(), "SKLearnSVMRegression": SKLearnSVMRegression(),
"AutogluonAutoML": AutogluonAutoML(), "AutogluonAutoML": AutogluonAutoML()
# ADD OTHER MODELS HERE # ADD OTHER MODELS HERE
} }
@ -151,7 +151,6 @@ class AutogluonAutoML:
"excluded_model_types", "excluded_model_types",
"infer_limit", "infer_limit",
"infer_limit_batch_size", "infer_limit_batch_size",
"ag_args_ensemble",
] ]
def load_model(self, path: Union[Path, str]) -> None: def load_model(self, path: Union[Path, str]) -> None:
@ -208,7 +207,6 @@ class AutogluonAutoML:
excluded_model_types=model_hyperparameters["excluded_model_types"], excluded_model_types=model_hyperparameters["excluded_model_types"],
infer_limit=model_hyperparameters["infer_limit"], infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"], infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
) )
def predict( def predict(

View file

@ -1,16 +1,5 @@
schema: '2.0' schema: '2.0'
stages: stages:
startup_cleanup:
cmd: python 0_startup_cleanup.py
deps:
- path: 0_startup_cleanup.py
hash: md5
md5: b1b12f6b6393fbf8b83d23684df0a3d4
size: 1220
params:
configs/settings.yaml:
default.startup_cleanup.artefacts: ./data
default.startup_cleanup.metrics: ./metrics
prepare_data: prepare_data:
cmd: python 1_prepare_data.py cmd: python 1_prepare_data.py
deps: deps:
@ -28,29 +17,22 @@ stages:
- carbon_ending - carbon_ending
- days_to_starting - days_to_starting
- days_to_ending - days_to_ending
- number_habitable_rooms_starting
- number_habitable_rooms_ending
- number_heated_rooms_starting
- number_heated_rooms_ending
- number_habitable_rooms
- number_heated_rooms
default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_config.target: sap_ending
default.feature_processor.feature_processor_type: dataframe default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath: default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 0.9 default.prepare_data.train_proportion: 1
outs: outs:
- path: data/prepared_data/ - path: data/prepared_data/
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: f85c36a5dfd31a897538b3934d5fb997.dir
size: 45056059 size: 41375196
nfiles: 2 nfiles: 2
build_model: build_model:
cmd: python 2_build_model.py cmd: python 2_build_model.py
@ -61,8 +43,8 @@ stages:
size: 4820 size: 4820
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: f85c36a5dfd31a897538b3934d5fb997.dir
size: 45056059 size: 41375196
nfiles: 2 nfiles: 2
params: params:
configs/build_model.yaml: configs/build_model.yaml:
@ -79,33 +61,32 @@ stages:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error eval_metric: mean_squared_error
time_limit: 1800 time_limit: 400
presets: medium_quality presets: medium_quality
excluded_model_types: excluded_model_types:
- RF - RF
- FASTAI
- CAT - CAT
- NN_TORCH - NN_TORCH
- KNN - KNN
- XT - XT
infer_limit: 0.05 infer_limit: 0.05
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
ag_args_ensemble:
num_folds_parallel: 2
outs: outs:
- path: data/fit_predictions/ - path: data/fit_predictions/
hash: md5 hash: md5
md5: d9c9afc05e8780db47c0548b19bf7d19.dir md5: 991e6c55826953aa7c2be573369ec96f.dir
size: 3349989 size: 3574047
nfiles: 1 nfiles: 1
- path: data/model/ - path: data/model/
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: f8a8b7462831bd46b1e2df47d73bb69d.dir
size: 773523079 size: 391430703
nfiles: 36 nfiles: 23
- path: metrics/fit_metrics.json - path: metrics/fit_metrics.json
hash: md5 hash: md5
md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a md5: 35a66a845854cc6fee9dd10860e216bb
size: 224 size: 225
generate_predictions: generate_predictions:
cmd: python 3_generate_predictions.py cmd: python 3_generate_predictions.py
deps: deps:
@ -115,13 +96,13 @@ stages:
size: 2464 size: 2464
- path: data/model - path: data/model
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: f8a8b7462831bd46b1e2df47d73bb69d.dir
size: 773523079 size: 391430703
nfiles: 36 nfiles: 23
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: f85c36a5dfd31a897538b3934d5fb997.dir
size: 45056059 size: 41375196
nfiles: 2 nfiles: 2
params: params:
configs/settings.yaml: configs/settings.yaml:
@ -133,8 +114,8 @@ stages:
outs: outs:
- path: data/predictions/ - path: data/predictions/
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: 94b7381ac318b1ca18e0bc086778f7ce.dir
size: 463197 size: 626160
nfiles: 1 nfiles: 1
generate_metrics: generate_metrics:
cmd: python 4_generate_metrics.py cmd: python 4_generate_metrics.py
@ -145,13 +126,13 @@ stages:
size: 3484 size: 3484
- path: data/predictions - path: data/predictions
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: 94b7381ac318b1ca18e0bc086778f7ce.dir
size: 463197 size: 626160
nfiles: 1 nfiles: 1
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: f85c36a5dfd31a897538b3934d5fb997.dir
size: 45056059 size: 41375196
nfiles: 2 nfiles: 2
params: params:
configs/settings.yaml: configs/settings.yaml:
@ -161,30 +142,16 @@ stages:
outs: outs:
- path: metrics/metrics.json - path: metrics/metrics.json
hash: md5 hash: md5
md5: 3e08df02fd5c5d094bcf936e1338d596 md5: 4d8681f7c0f41f97be52d6b1ae039c5b
size: 223 size: 224
generate_scenerio_metrics: startup_cleanup:
cmd: python 5_generate_scenarios.py cmd: python 0_startup_cleanup.py
deps: deps:
- path: 5_generate_scenarios.py - path: 0_startup_cleanup.py
hash: md5 hash: md5
md5: 40506749fefd926d47c60ff5b16db307 md5: b1b12f6b6393fbf8b83d23684df0a3d4
size: 5337 size: 1220
params: params:
configs/scenarios.yaml: configs/settings.yaml:
default.scenarios: default.startup_cleanup.artefacts: ./data
input_dataclient_type: aws-s3 default.startup_cleanup.metrics: ./metrics
output_dataclient_type: local
scenario_data_filepaths:
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: fa4d6d7bbd7818613800da5f8f37ea96
size: 363
- path: metrics/scenario_table.md
hash: md5
md5: d6baf100a1623cc2467c2f8221d314c9
size: 2133

View file

@ -71,17 +71,6 @@ stages:
outs: outs:
- metrics/metrics.json - metrics/metrics.json
always_changed: true always_changed: true
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
- 5_generate_scenarios.py
params:
- configs/scenarios.yaml:
- default.scenarios
outs:
- metrics/scenario_table.md
- metrics/scenario_metrics.md
always_changed: true
metrics: metrics:
- metrics/metrics.json - metrics/metrics.json
- metrics/fit_metrics.json - metrics/fit_metrics.json

View file

@ -1,4 +1,2 @@
/fit_metrics.json /fit_metrics.json
/metrics.json /metrics.json
/scenario_table.md
/scenario_metrics.md

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.3.2
boto3==1.28.17 boto3==1.28.17
pandas==2.1.4 pandas==1.5.3
autogluon.tabular[all]==1.0.0 autogluon==0.8.2
dynaconf==3.2.1 dynaconf==3.2.0
pyarrow==13.0.0 pyarrow==13.0.0
pre-commit==3.3.3 pre-commit==3.3.3

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.3.2
boto3==1.28.17 boto3==1.28.17
pandas==2.1.4 pandas==1.5.3
autogluon.tabular[all]==1.0.0 autogluon==0.8.2
dynaconf==3.2.1 dynaconf==3.2.0
pyarrow==13.0.0 pyarrow==13.0.0
PyYAML==6.0.1 PyYAML==6.0.1

View file

@ -1,10 +1,9 @@
joblib==1.3.2 joblib==1.3.2
boto3==1.28.17 boto3==1.28.17
pandas==2.1.4 pandas==1.5.3
autogluon.tabular[all]==1.0.0 autogluon==0.8.2
ray==2.6.3 dynaconf==3.2.0
dynaconf==3.2.1 alibi==0.9.4
alibi==0.9.5
shap==0.42.1 shap==0.42.1
pyarrow==13.0.0 pyarrow==13.0.0
pre-commit==3.3.3 pre-commit==3.3.3

View file

@ -1,4 +1,4 @@
boto3==1.28.41 boto3==1.28.41
pandas==2.1.4 pandas==1.5.3
autogluon.tabular[all]==1.0.0 autogluon==0.8.2
dynaconf==3.2.1 dynaconf==3.2.0

View file

@ -1,4 +1,4 @@
dvc==3.51.0 dvc==3.36.0
dvc-s3==3.2.0 dvc-s3==3.0.1
gto==1.7.1 gto==1.6.1
pyOpenSSL==23.3.0 pyOpenSSL==23.3.0