mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Migration of the SAP 10.2 calculator package from the uv-workspace
src-layout (`packages/domain/src/domain/sap`) to the root-level layout
(`domain/sap10_calculator`), matching the pattern already used by
`domain.addresses` / `domain.tasks` / `domain.postcode`.
Changes:
- `git mv packages/domain/src/domain/sap → domain/sap10_calculator`
(92 files; git auto-detected all as renames so blame/history is
preserved).
- Subpackage rename: `domain.sap` → `domain.sap10_calculator`. 48
Python files rewritten (`from domain.sap.X` → `from domain.sap10_
calculator.X`); zero remaining `domain.sap` refs after the sed pass.
- Path-string updates: 3 .py files (test fixtures + xlsx loader) +
6 markdown docs (CONTEXT.md, 2 ADRs, 3 sap-spec docs, sap10_
calculator/README.md) had hard-coded `packages/domain/src/domain/
sap/...` paths rewritten to `domain/sap10_calculator/...`.
- `Path(__file__).parents[N]` rebasing: the old tree was 3 levels
deeper than the new one (`packages/domain/src/`), so 4× `parents[7]`
became `parents[4]` and 1× `parents[6]` became `parents[3]` across
`tables/pcdb/{__init__.py, postcode_weather.py, etl.py}`,
`worksheet/tests/_xlsx_loader.py`, and `tests/test_pcdb_etl.py`.
- PEP 420 namespace package: deleted both `domain/__init__.py`
(root + workspace, both load-bearing only as empty/docstring) so
Python combines `domain.sap10_calculator` (root) and `domain.ml`
(workspace) into one namespace package. Confirmed via
`domain.__path__ == ['/workspaces/model/domain',
'/workspaces/model/packages/domain/src/domain']`. Without this,
the root `domain/__init__.py` shadowed the workspace one and
`domain.ml` was unreachable.
Verified:
- Full sweep (`backend/documents_parser/tests/test_summary_pdf_
mapper_chain.py + domain/sap10_calculator/worksheet/tests/test_
e2e_elmhurst_sap_score.py + domain/sap10_calculator/rdsap/tests/
test_golden_fixtures.py`): 99 passed / 19 failed — exact same
counts as pre-refactor. All 19 failures pre-existing (9 hand-built
001479 + 6 cohort diff + 4 cohort chain non-spec).
- Wider sweep (all sap10_calculator + domain.ml): 1654 passed /
20 failed (the +1 vs the focused sweep is the pre-existing
`test_roof_insulated_assumed_with_ni_thickness_uses_50mm_per_
section_5_11_4` which was already failing on the previous baseline).
- Pyright net-zero on the three load-bearing baselines:
`heat_transmission.py` 13, `cert_to_inputs.py` 35, `mapper.py` 33.
Lift-and-shift only — no semantic renames (`Sap10Calculator` stays
`Sap10Calculator`), no testpaths edits in pytest.ini (sap tests
continue to be invoked by explicit pytest paths).
Note: `domain.ml` still lives at `packages/domain/src/domain/ml/`.
Migrating it would close out the dual-`domain/` layout but is
out of scope for this commit.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
95 lines
3 KiB
Python
95 lines
3 KiB
Python
"""Parity-validation report for the deterministic SAP 10.2 calculator.
|
||
|
||
ADR-0009 Session B compares `Sap10Calculator.calculate(epc).sap_score`
|
||
to the cert's `energy_rating_current` across a 1000-cert stratified
|
||
sample. The success criterion is MAE ≤ 1.0 SAP-point on the *typical
|
||
subset* (cohort excluding catastrophic-tail certs, multi-heating,
|
||
conservatory, room-in-roof) — those edge cases are themselves the
|
||
backlog Session B iterates against.
|
||
|
||
This module is the pure aggregation step: given a list of per-cert
|
||
`ParityCase` records, it emits a typed `ParityReport` with global +
|
||
typical-subset MAE/RMSE/bias and the worst-N cases by |residual| for
|
||
investigation. The cert→case mapping itself (loading from the corpus,
|
||
running the calculator, looking up the cert's actual sap) lives at a
|
||
higher layer — keeps this report module trivial to test.
|
||
|
||
Reference: ADR-0009 §"Validation" + Session B plan.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from math import sqrt
|
||
from typing import Final
|
||
|
||
|
||
_DEFAULT_WORST_N: Final[int] = 25
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ParityCase:
|
||
"""One certificate's calculator-vs-cert SAP comparison.
|
||
|
||
`is_typical` marks whether the cert belongs to the typical subset
|
||
the Session B success criterion is measured against. Catastrophic-
|
||
tail certs (sap ≤ 5 or ≥ 100), multi-heating, conservatory, and
|
||
room-in-roof cases set this False — they show up in the global
|
||
aggregate but not the typical-subset MAE.
|
||
"""
|
||
|
||
certificate_number: str
|
||
actual_sap: int
|
||
predicted_sap: float
|
||
is_typical: bool
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ParityReport:
|
||
case_count: int
|
||
typical_case_count: int
|
||
global_mae: float
|
||
typical_mae: float
|
||
global_rmse: float
|
||
global_bias: float
|
||
worst_cases: tuple[ParityCase, ...]
|
||
|
||
|
||
def _residual(case: ParityCase) -> float:
|
||
"""Predicted − actual. Positive = calculator over-predicts."""
|
||
return case.predicted_sap - case.actual_sap
|
||
|
||
|
||
def _mean_abs(cases: list[ParityCase]) -> float:
|
||
if not cases:
|
||
return 0.0
|
||
return sum(abs(_residual(c)) for c in cases) / len(cases)
|
||
|
||
|
||
def _rmse(cases: list[ParityCase]) -> float:
|
||
if not cases:
|
||
return 0.0
|
||
return sqrt(sum(_residual(c) ** 2 for c in cases) / len(cases))
|
||
|
||
|
||
def _bias(cases: list[ParityCase]) -> float:
|
||
if not cases:
|
||
return 0.0
|
||
return sum(_residual(c) for c in cases) / len(cases)
|
||
|
||
|
||
def build_parity_report(
|
||
cases: list[ParityCase], *, worst_n: int = _DEFAULT_WORST_N
|
||
) -> ParityReport:
|
||
"""Aggregate a list of `ParityCase` into a typed `ParityReport`."""
|
||
typical = [c for c in cases if c.is_typical]
|
||
worst = tuple(sorted(cases, key=lambda c: abs(_residual(c)), reverse=True)[:worst_n])
|
||
return ParityReport(
|
||
case_count=len(cases),
|
||
typical_case_count=len(typical),
|
||
global_mae=_mean_abs(cases),
|
||
typical_mae=_mean_abs(typical),
|
||
global_rmse=_rmse(cases),
|
||
global_bias=_bias(cases),
|
||
worst_cases=worst,
|
||
)
|