feat(modelling): Optimiser core — exact grouped knapsack (#1160)

Slice 1 of #1160. Recycles the GainOptimiser/CostOptimiser formulation (≤1 Option per Recommendation, maximise SAP gain subject to budget) as a clean typed DDD function — but as an exact pure-Python multiple-choice knapsack rather than the legacy `mip` MILP, since mip's CBC backend does not load on aarch64 (so the legacy solver path can't run / be tested here). At retrofit scale the candidate space Π(|group|+1) is tiny, so exhaustive enumeration is exact and instant; ADR-0016 only needs the knapsack as a warm-start signal anyway (the truthful figure comes from the whole-package re-score + repair, next slice). `optimise(groups, budget) -> list[ScoredOption]`: maximise total gain, tie-break toward lower cost, skip-per-group covers "select none". 6 tests (budget-bound selection, ≤1/group, unconstrained, budget-too-small, empty groups, partial-affordability); pyright strict clean. Multi-phase remains descoped (ADR-0005) — single-phase optimiser. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-27 23:35:01 +00:00 · 2026-06-03 12:39:47 +00:00 · 2026-06-03 12:39:47 +00:00 · 77983caed8
commit 77983caed8
parent c7e2aa3755
2 changed files with 197 additions and 0 deletions
--- a/domain/modelling/optimiser.py
+++ b/domain/modelling/optimiser.py
@ -0,0 +1,74 @@
+"""The Optimiser core — a grouped (multiple-choice) knapsack over per-Option
+role-1 scores (ADR-0016).
+
+Recycles the formulation of the legacy ``GainOptimiser`` / ``CostOptimiser``
+(``recommendations/optimiser/``): pick **at most one** Option per Recommendation
+(disjoint groups, no cross-group exclusion constraints — the Recommendation
+partition makes selected overlays collision-free), maximising total SAP gain
+subject to the Scenario budget. The legacy classes solve this as a `mip` MILP;
+here it is an exact pure-Python multiple-choice knapsack — no native solver
+dependency, so it runs everywhere and is deterministically testable.
+
+This is the warm-start **signal** only: per ADR-0016 the role-1 per-Option
+scores are approximate (independent-vs-baseline), so the truthful figure comes
+from the whole-package re-score + greedy repair, not from this selection. Exact
+enumeration is therefore more than adequate, and at retrofit scale (a handful
+of Recommendations, a few Options each) the candidate space — ``Π(|group|+1)``
+— is tiny.
+"""
+
+from __future__ import annotations
+
+import itertools
+from dataclasses import dataclass
+from typing import Optional
+
+from domain.modelling.recommendation import MeasureOption
+
+
+@dataclass(frozen=True)
+class ScoredOption:
+    """A candidate Measure Option paired with its role-1 (independent-vs-
+    baseline) SAP gain — the optimiser's input signal. Cost is read from the
+    Option; the gain is supplied by scoring."""
+
+    option: MeasureOption
+    sap_gain: float
+
+
+def _option_cost(option: MeasureOption) -> float:
+    if option.cost is None:
+        raise ValueError(
+            f"measure option {option.measure_type!r} has no cost; cannot optimise"
+        )
+    return option.cost.total
+
+
+def optimise(
+    groups: list[list[ScoredOption]], budget: Optional[float]
+) -> list[ScoredOption]:
+    """Select at most one ScoredOption per group to maximise total SAP gain
+    subject to ``budget`` (None = unconstrained). Exact: enumerates every
+    pick-one-or-skip-per-group package, keeps the affordable one with the
+    greatest gain, breaking ties toward lower cost. Returns the selected
+    ScoredOptions (empty if nothing affordable beats selecting none)."""
+    # Each group offers: skip it (None) or take exactly one of its Options.
+    choices_per_group: list[list[Optional[ScoredOption]]] = [
+        [None, *group] for group in groups
+    ]
+
+    best: list[ScoredOption] = []
+    best_gain: float = -1.0
+    best_cost: float = 0.0
+    for combo in itertools.product(*choices_per_group):
+        selected: list[ScoredOption] = [
+            choice for choice in combo if choice is not None
+        ]
+        total_cost: float = sum(_option_cost(s.option) for s in selected)
+        if budget is not None and total_cost > budget:
+            continue
+        total_gain: float = sum(s.sap_gain for s in selected)
+        # Maximise gain; on a tie prefer the cheaper package.
+        if (total_gain, -total_cost) > (best_gain, -best_cost):
+            best, best_gain, best_cost = selected, total_gain, total_cost
+    return best
--- a/tests/domain/modelling/test_optimiser.py
+++ b/tests/domain/modelling/test_optimiser.py
@ -0,0 +1,123 @@
+"""Behaviour of the Optimiser core: a grouped-knapsack MILP over per-Option
+role-1 scores (ADR-0016). Picks at most one Option per Recommendation (disjoint
+groups, no cross-group constraints) to maximise total SAP gain subject to the
+Scenario budget. This is the warm-start *signal* — the truthful figure comes
+from the whole-package re-score + repair (a later slice); here we test the
+selection with synthetic scores and no calculator.
+"""
+
+from __future__ import annotations
+
+from domain.modelling.optimiser import ScoredOption, optimise
+from domain.modelling.recommendation import Cost, MeasureOption
+from domain.modelling.simulation import EpcSimulation
+
+
+def _scored(measure_type: str, *, gain: float, cost: float) -> ScoredOption:
+    return ScoredOption(
+        option=MeasureOption(
+            measure_type=measure_type,
+            description=measure_type,
+            overlay=EpcSimulation(),
+            cost=Cost(total=cost, contingency_rate=0.0),
+        ),
+        sap_gain=gain,
+    )
+
+
+def _selected_types(selection: list[ScoredOption]) -> set[str]:
+    return {scored.option.measure_type for scored in selection}
+
+
+def test_grouped_knapsack_maximises_gain_within_budget() -> None:
+    # Arrange — wall group has two mutually-exclusive options; roof + floor one
+    # each. EWI has the best gain but is unaffordable alongside the rest.
+    groups: list[list[ScoredOption]] = [
+        [
+            _scored("external_wall_insulation", gain=10.0, cost=8000.0),
+            _scored("cavity_wall_insulation", gain=6.0, cost=1000.0),
+        ],
+        [_scored("loft_insulation", gain=4.0, cost=1500.0)],
+        [_scored("suspended_floor_insulation", gain=3.0, cost=2000.0)],
+    ]
+
+    # Act
+    selection: list[ScoredOption] = optimise(groups, budget=5000.0)
+
+    # Assert — cavity + loft + floor (cost 4500, gain 13) beats any package
+    # containing the 8000 EWI option within the 5000 budget.
+    assert _selected_types(selection) == {
+        "cavity_wall_insulation",
+        "loft_insulation",
+        "suspended_floor_insulation",
+    }
+
+
+def test_picks_at_most_one_option_per_group() -> None:
+    # Arrange — both wall options are individually affordable.
+    groups: list[list[ScoredOption]] = [
+        [
+            _scored("external_wall_insulation", gain=10.0, cost=2000.0),
+            _scored("cavity_wall_insulation", gain=6.0, cost=1000.0),
+        ],
+    ]
+
+    # Act
+    selection: list[ScoredOption] = optimise(groups, budget=10000.0)
+
+    # Assert — never both treatments of the same wall; the higher-gain one wins.
+    assert len(selection) == 1
+    assert _selected_types(selection) == {"external_wall_insulation"}
+
+
+def test_no_budget_picks_the_best_option_in_every_group() -> None:
+    # Arrange
+    groups: list[list[ScoredOption]] = [
+        [
+            _scored("external_wall_insulation", gain=10.0, cost=8000.0),
+            _scored("cavity_wall_insulation", gain=6.0, cost=1000.0),
+        ],
+        [_scored("loft_insulation", gain=4.0, cost=1500.0)],
+    ]
+
+    # Act — None budget = unconstrained.
+    selection: list[ScoredOption] = optimise(groups, budget=None)
+
+    # Assert
+    assert _selected_types(selection) == {
+        "external_wall_insulation",
+        "loft_insulation",
+    }
+
+
+def test_budget_too_small_for_any_option_selects_nothing() -> None:
+    # Arrange
+    groups: list[list[ScoredOption]] = [
+        [_scored("cavity_wall_insulation", gain=6.0, cost=1000.0)],
+        [_scored("loft_insulation", gain=4.0, cost=1500.0)],
+    ]
+
+    # Act
+    selection: list[ScoredOption] = optimise(groups, budget=500.0)
+
+    # Assert — nothing affordable; selecting none is the optimum.
+    assert selection == []
+
+
+def test_no_groups_selects_nothing() -> None:
+    # Act / Assert
+    assert optimise([], budget=10000.0) == []
+
+
+def test_within_budget_partial_selection_prefers_the_higher_gain_option() -> None:
+    # Arrange — only one of the two fits the budget; pick the affordable best.
+    groups: list[list[ScoredOption]] = [
+        [_scored("external_wall_insulation", gain=10.0, cost=8000.0)],
+        [_scored("loft_insulation", gain=4.0, cost=1500.0)],
+    ]
+
+    # Act
+    selection: list[ScoredOption] = optimise(groups, budget=2000.0)
+
+    # Assert — EWI is unaffordable; loft alone is the best within £2000.
+    assert _selected_types(selection) == {"loft_insulation"}