diff --git a/services/ml_training_data/src/ml_training_data/train_baseline.py b/services/ml_training_data/src/ml_training_data/train_baseline.py index da8a3ac0..7d6a1017 100644 --- a/services/ml_training_data/src/ml_training_data/train_baseline.py +++ b/services/ml_training_data/src/ml_training_data/train_baseline.py @@ -26,14 +26,13 @@ from ml_training_data.storage import Storage _CERT_NUM_COLUMN = "certificate_number" -# Per-target LightGBM objective overrides (ADR-0008, slice 16g). Defaults to -# 'regression' (MSE); we use 'mape' for sap_score and peui_ucl because the -# default MSE under-weights tail rows relative to the MAPE we report. -# co2_emissions cannot use 'mape' safely (some rows are ~0 from heavy PV). -_OBJECTIVE_OVERRIDES: dict[str, str] = { - "sap_score": "mape", - "peui_ucl": "mape", -} +# Per-target LightGBM objective overrides. Initially (slice 16g) we switched +# sap_score + peui_ucl to 'mape' to align objective with reporting metric; +# the 250k v16 ablation (slice 16h) showed 'mape' loses ~0.6 percentage +# points of global MAPE because it over-weights the low-SAP tail at the +# expense of the body. Reverted to the default 'regression' for all targets. +# Tail bias needs a different fix (sample weights / stratified loss) — slice 16i. +_OBJECTIVE_OVERRIDES: dict[str, str] = {} def train_baseline( diff --git a/services/ml_training_data/tests/unit/test_train_baseline.py b/services/ml_training_data/tests/unit/test_train_baseline.py index aae64092..cc816714 100644 --- a/services/ml_training_data/tests/unit/test_train_baseline.py +++ b/services/ml_training_data/tests/unit/test_train_baseline.py @@ -122,11 +122,12 @@ def test_train_baseline_writes_per_decile_residuals_per_target(tmp_path: Path) - assert true_mins == sorted(true_mins) -def test_train_baseline_uses_mape_objective_for_sap_score_and_peui_ucl(tmp_path: Path) -> None: - # Arrange — sap_score + peui_ucl should use objective="mape" per ADR-0008. - # We can't directly inspect LGBMRegressor.objective post-fit reliably, so - # instead we verify the per-target override map is wired and that training - # completes (LightGBM raises if the objective name is unknown). +def test_train_baseline_uses_default_regression_objective_per_slice_16h(tmp_path: Path) -> None: + # Arrange — slice 16g originally switched sap_score + peui_ucl to + # objective='mape'; slice 16h's 250k ablation showed that lost ~0.6 pts + # of global MAPE because mape over-weights the low-SAP tail. Reverted + # to default 'regression' for all targets; tail strategy moves to + # sample weights in slice 16i. storage = LocalStorage(root=tmp_path) df = _synthetic_dataset(n=300) df["peui_ucl"] = df["sap_score"].astype(float) + 5.0 @@ -140,14 +141,11 @@ def test_train_baseline_uses_mape_objective_for_sap_score_and_peui_ucl(tmp_path: seed=42, ) - # Assert — both targets fit successfully under the mape objective. + # Assert assert "sap_score" in metrics assert "peui_ucl" in metrics - # Verify the override map is present and contains both targets. from ml_training_data.train_baseline import _OBJECTIVE_OVERRIDES # noqa: PLC0415 - assert _OBJECTIVE_OVERRIDES.get("sap_score") == "mape" - assert _OBJECTIVE_OVERRIDES.get("peui_ucl") == "mape" - assert _OBJECTIVE_OVERRIDES.get("co2_emissions") is None + assert _OBJECTIVE_OVERRIDES == {} def test_train_baseline_residuals_emitted_per_target_independently(tmp_path: Path) -> None: