revert slice 16g: drop mape objective per 16h ablation

250k retrain showed objective='mape' loses ~0.6 percentage points of global sap_score MAPE (3.92% with regression vs 4.50% with mape) and ~0.7 pts on peui_ucl. The mape objective over-weights the low-SAP tail (weight ~1/y) and drags the body MAPE up by more than it gains in the tail. Body MAPE on v16 features is already strong (2.38% on deciles 1-8); the remaining tail bias at decile 0 (SAP<58, +3.1 bias) needs a different fix -- sample weights or stratified loss -- queued as slice 16i.
2026-07-27 23:35:01 +00:00 · 2026-05-17 14:34:04 +00:00 · 2026-05-17 14:34:04 +00:00 · ece1279475
commit ece1279475
parent 05ef54bb02
2 changed files with 15 additions and 18 deletions
--- a/services/ml_training_data/src/ml_training_data/train_baseline.py
+++ b/services/ml_training_data/src/ml_training_data/train_baseline.py
@ -26,14 +26,13 @@ from ml_training_data.storage import Storage

 _CERT_NUM_COLUMN = "certificate_number"

-# Per-target LightGBM objective overrides (ADR-0008, slice 16g).  Defaults to
-# 'regression' (MSE); we use 'mape' for sap_score and peui_ucl because the
-# default MSE under-weights tail rows relative to the MAPE we report.
-# co2_emissions cannot use 'mape' safely (some rows are ~0 from heavy PV).
-_OBJECTIVE_OVERRIDES: dict[str, str] = {
-    "sap_score": "mape",
-    "peui_ucl": "mape",
-}
+# Per-target LightGBM objective overrides.  Initially (slice 16g) we switched
+# sap_score + peui_ucl to 'mape' to align objective with reporting metric;
+# the 250k v16 ablation (slice 16h) showed 'mape' loses ~0.6 percentage
+# points of global MAPE because it over-weights the low-SAP tail at the
+# expense of the body. Reverted to the default 'regression' for all targets.
+# Tail bias needs a different fix (sample weights / stratified loss) — slice 16i.
+_OBJECTIVE_OVERRIDES: dict[str, str] = {}


 def train_baseline(
--- a/services/ml_training_data/tests/unit/test_train_baseline.py
+++ b/services/ml_training_data/tests/unit/test_train_baseline.py
@ -122,11 +122,12 @@ def test_train_baseline_writes_per_decile_residuals_per_target(tmp_path: Path) -
    assert true_mins == sorted(true_mins)


-def test_train_baseline_uses_mape_objective_for_sap_score_and_peui_ucl(tmp_path: Path) -> None:
-    # Arrange — sap_score + peui_ucl should use objective="mape" per ADR-0008.
-    # We can't directly inspect LGBMRegressor.objective post-fit reliably, so
-    # instead we verify the per-target override map is wired and that training
-    # completes (LightGBM raises if the objective name is unknown).
+def test_train_baseline_uses_default_regression_objective_per_slice_16h(tmp_path: Path) -> None:
+    # Arrange — slice 16g originally switched sap_score + peui_ucl to
+    # objective='mape'; slice 16h's 250k ablation showed that lost ~0.6 pts
+    # of global MAPE because mape over-weights the low-SAP tail. Reverted
+    # to default 'regression' for all targets; tail strategy moves to
+    # sample weights in slice 16i.
    storage = LocalStorage(root=tmp_path)
    df = _synthetic_dataset(n=300)
    df["peui_ucl"] = df["sap_score"].astype(float) + 5.0
@ -140,14 +141,11 @@ def test_train_baseline_uses_mape_objective_for_sap_score_and_peui_ucl(tmp_path:
        seed=42,
    )

-    # Assert — both targets fit successfully under the mape objective.
+    # Assert
    assert "sap_score" in metrics
    assert "peui_ucl" in metrics
-    # Verify the override map is present and contains both targets.
    from ml_training_data.train_baseline import _OBJECTIVE_OVERRIDES  # noqa: PLC0415
-    assert _OBJECTIVE_OVERRIDES.get("sap_score") == "mape"
-    assert _OBJECTIVE_OVERRIDES.get("peui_ucl") == "mape"
-    assert _OBJECTIVE_OVERRIDES.get("co2_emissions") is None
+    assert _OBJECTIVE_OVERRIDES == {}


 def test_train_baseline_residuals_emitted_per_target_independently(tmp_path: Path) -> None: