test: recursive persistence-coverage guard sees nested fields 🟪

The ADR-0036 guard only inspected EpcPropertyData's top-level fields, so a
dropped field on a NESTED object (the PV-array list, the floor heat-loss
flags) slipped straight through. Generalise it to walk every domain
dataclass reachable from EpcPropertyData and check each field is
reconstructed by a _compose/_to_* mapper or allow-listed (per-field or
whole-class), keyed by Class.field.

Surfaced 14 pre-existing nested gaps the old guard was blind to: 7 are
calculator-read with no FE column (scoring-relevant silent-drop, same class
as the PV bug — tracked follow-up), the rest dormant or awaiting FE tables.
Each is now explicit and justified.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-26 16:53:04 +00:00
parent 09ba84edf9
commit 68055364a6

View file

@ -1,16 +1,23 @@
"""Structural guard: every `EpcPropertyData` field must round-trip.
"""Structural guard: every `EpcPropertyData` field — top-level AND nested — must
round-trip.
The deep-equality round-trip test (`test_epc_round_trip.py`) only catches a
dropped field when a *fixture populates it* the existing gaps (conservatory,
roof windows, solar-HW collector) were latent precisely because no fixture
exercised them (see `docs/migrations/epc-property-round-trip-fidelity.md`).
roof windows, solar-HW collector, **PV arrays**, **floor heat-loss flags**) were
latent precisely because no fixture exercised them (see
`docs/migrations/epc-property-round-trip-fidelity.md`).
This guard closes that blind spot structurally: it asserts that every field on
`EpcPropertyData` is either reconstructed by `EpcPostgresRepository._compose`
(the DB domain mapper) or listed with a reason in the allow-list below.
Adding a new domain field therefore forces a conscious persist-or-justify
decision; persisting a previously-gapped field means deleting its allow-list
entry. ADR-0036.
This guard closes that blind spot structurally. It walks every domain dataclass
reachable from `EpcPropertyData` (transitively, through Optionals and lists) and
asserts each field is either reconstructed by an `EpcPostgresRepository`
`_compose` / `_to_*` mapper or listed with a reason in the allow-list below.
The earlier version only inspected `EpcPropertyData`'s *top-level* fields, so a
dropped field on a *nested* object (`SapEnergySource.photovoltaic_arrays`,
`SapFloorDimension.is_above_partially_heated_space`) slipped straight through.
Adding a new domain field at any depth therefore forces a conscious
persist-or-justify decision; persisting a previously-gapped field means deleting
its allow-list entry. ADR-0036.
"""
from __future__ import annotations
@ -19,74 +26,166 @@ import ast
import dataclasses
import inspect
import textwrap
import typing
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from repositories.epc.epc_postgres_repository import EpcPostgresRepository
# Fields deliberately NOT reconstructed by `_compose`, each with its reason.
# Fields NOT reconstructed by the repo mappers, each with its reason. Keyed by
# ``ClassName.field`` (a single field) or ``ClassName`` (the whole dataclass is
# an un-persisted, tracked gap awaiting its FE columns / child table).
_UNPERSISTED_ALLOWLIST: dict[str, str] = {
# Redundant top-level field: the calculator reads
# `sap_ventilation.extract_fans_count` (which round-trips via
# `_to_ventilation`), never this top-level duplicate.
"extract_fans_count": "redundant; scoring uses sap_ventilation.extract_fans_count",
"EpcPropertyData.extract_fans_count": "redundant; scoring uses sap_ventilation.extract_fans_count",
# Not read by the calculator (dormant); no DB column yet.
"air_tightness": "dormant — not read by the calculator; no FE column",
"lzc_energy_sources": "dormant — not read by the calculator; no FE column",
# Scoring-relevant round-trip gaps awaiting FE columns / child table
"EpcPropertyData.air_tightness": "dormant — not read by the calculator; no FE column",
"EpcPropertyData.lzc_energy_sources": "dormant — not read by the calculator; no FE column",
# Scoring-relevant round-trip gaps awaiting FE columns / child tables
# (tracked follow-ups; see docs/migrations/epc-property-round-trip-fidelity.md).
"sap_roof_windows": "FE child table pending — tracked round-trip gap",
"solar_hw_collector_orientation": "FE column pending — tracked round-trip gap",
"solar_hw_collector_pitch_deg": "FE column pending — tracked round-trip gap",
"solar_hw_overshading": "FE column pending — tracked round-trip gap",
"EpcPropertyData.sap_roof_windows": "FE child table pending — tracked round-trip gap",
"SapRoofWindow": "FE child table pending — tracked round-trip gap (sap_roof_windows)",
"EpcPropertyData.solar_hw_collector_orientation": "FE column pending — tracked round-trip gap",
"EpcPropertyData.solar_hw_collector_pitch_deg": "FE column pending — tracked round-trip gap",
"EpcPropertyData.solar_hw_overshading": "FE column pending — tracked round-trip gap",
"SapAlternativeWall.u_value": "FE column pending — tracked round-trip gap",
"SapAlternativeWall.wall_thickness_mm": "FE column pending — tracked round-trip gap",
# Room-in-roof detail: only floor_area + age_band reconstruct from the
# inlined columns; the geometry / surface detail awaits an FE child table
# (tracked round-trip gap, docs/migrations §2).
"SapRoomInRoof.common_wall_height_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.common_wall_length_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.detailed_surfaces": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.gable_1_height_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.gable_1_length_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.gable_2_height_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoof.gable_2_length_m": "FE child table pending — room-in-roof detail",
"SapRoomInRoofSurface": "FE child table pending — room-in-roof surface detail",
# --- Nested gaps surfaced by the recursive guard (pre-existing; this guard
# only just became able to see them). The calculator READS these, so they are
# scoring-relevant silent-drop gaps awaiting FE columns — tracked follow-up,
# docs/migrations/epc-property-round-trip-fidelity.md §"recursive-guard gaps".
"MainHeatingDetail.community_heating_boiler_fuel_type": "calc-read; no FE column — tracked follow-up",
"MainHeatingDetail.community_heating_chp_fraction": "calc-read; no FE column — tracked follow-up",
"SapAlternativeWall.is_sheltered": "calc-read; no FE column — tracked follow-up",
"SapBuildingPart.wall_insulation_thermal_conductivity": "calc-read; no FE column — tracked follow-up",
"SapEnergySource.pv_diverter_present": "calc-read; no FE column — tracked follow-up",
"SapHeating.cylinder_volume_measured_l": "calc-read; no FE column — tracked follow-up",
"SapVentilation.air_permeability_ap50_m3_h_m2": "calc-read; no FE column — tracked follow-up",
# --- Nested gaps the calculator does NOT read (dormant); no FE column.
"SapAlternativeWall.is_basement": "dormant — not read by the calculator; no FE column",
"SapBuildingPart.floor_u_value": "dormant — not read by the calculator (column exists, unread)",
"SapBuildingPart.rafter_insulation_thickness": "dormant — not read by the calculator; no FE column",
"SapBuildingPart.roof_u_value": "dormant — not read by the calculator; no FE column",
"SapBuildingPart.wall_is_basement": "dormant — not read by the calculator; no FE column",
"SapBuildingPart.wall_u_value": "dormant — not read by the calculator; no FE column",
"SapHeating.cylinder_heat_loss": "dormant — not read by the calculator; no FE column",
}
def _compose_reconstructed_fields() -> set[str]:
"""The keyword names `_compose` passes to the top-level `EpcPropertyData(...)`
constructor i.e. the fields it reconstructs from the DB row."""
source = textwrap.dedent(inspect.getsource(EpcPostgresRepository._compose))
def _unwrap(annotation: object) -> list[object]:
"""The concrete types inside an annotation — peeling `Optional`, `Union` and
`list[...]` so a field typed `Optional[list[SapWindow]]` yields `SapWindow`."""
args = typing.get_args(annotation)
if not args:
return [annotation]
origin = typing.get_origin(annotation)
out: list[object] = []
if origin is list:
out.extend(_unwrap(args[0]))
else: # Union / Optional
for arg in args:
out.extend(_unwrap(arg))
return out
def _reachable_dataclasses() -> dict[str, type]:
"""Every domain dataclass reachable from `EpcPropertyData`, transitively,
keyed by class name."""
seen: dict[str, type] = {}
stack: list[type] = [EpcPropertyData]
while stack:
cls = stack.pop()
if not dataclasses.is_dataclass(cls) or cls.__name__ in seen:
continue
seen[cls.__name__] = cls
for field in dataclasses.fields(cls):
for inner in _unwrap(field.type):
if isinstance(inner, type) and dataclasses.is_dataclass(inner):
stack.append(inner)
return seen
def _reconstructed_by_class() -> dict[str, set[str]]:
"""For each domain-dataclass name, the keyword argument names passed to ANY
`ClassName(...)` construction inside `EpcPostgresRepository` i.e. the fields
its `_compose` / `_to_*` mappers reconstruct from the DB rows."""
source = textwrap.dedent(inspect.getsource(EpcPostgresRepository))
tree = ast.parse(source)
by_class: dict[str, set[str]] = {}
for node in ast.walk(tree):
if (
isinstance(node, ast.Call)
and isinstance(node.func, ast.Name)
and node.func.id == "EpcPropertyData"
):
return {kw.arg for kw in node.keywords if kw.arg is not None}
raise AssertionError("no EpcPropertyData(...) construction found in _compose")
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
kwargs = {kw.arg for kw in node.keywords if kw.arg is not None}
by_class.setdefault(node.func.id, set()).update(kwargs)
return by_class
def test_every_epc_property_data_field_is_persisted_or_allowlisted() -> None:
# Arrange
all_fields = {f.name for f in dataclasses.fields(EpcPropertyData)}
reconstructed = _compose_reconstructed_fields()
# Arrange — every field of every reachable domain dataclass, and the fields
# the repo mappers reconstruct.
reachable = _reachable_dataclasses()
reconstructed = _reconstructed_by_class()
# Act — fields neither reconstructed nor explicitly excused.
silently_dropped = all_fields - reconstructed - set(_UNPERSISTED_ALLOWLIST)
# Act — fields neither reconstructed nor explicitly excused (per-field or
# whole-class allow-list entry).
silently_dropped: list[str] = []
for name, cls in reachable.items():
if name in _UNPERSISTED_ALLOWLIST: # whole class excused
continue
recon = reconstructed.get(name, set())
for field in dataclasses.fields(cls):
key = f"{name}.{field.name}"
if field.name not in recon and key not in _UNPERSISTED_ALLOWLIST:
silently_dropped.append(key)
# Assert — nothing falls through the round-trip unnoticed.
assert silently_dropped == set(), (
# Assert — nothing falls through the round-trip unnoticed, at any depth.
assert silently_dropped == [], (
"EpcPropertyData field(s) are dropped on DB round-trip without a "
f"documented reason: {sorted(silently_dropped)}. Either reconstruct them "
"in EpcPostgresRepository._compose, or add them to "
"in EpcPostgresRepository (_compose / a _to_* mapper), or add them to "
"_UNPERSISTED_ALLOWLIST with a justification."
)
def test_allowlist_has_no_stale_entries() -> None:
# Arrange
all_fields = {f.name for f in dataclasses.fields(EpcPropertyData)}
reconstructed = _compose_reconstructed_fields()
reachable = _reachable_dataclasses()
reconstructed = _reconstructed_by_class()
# Act — allow-list entries that are now persisted, or no longer fields.
redundant = {
name
for name in _UNPERSISTED_ALLOWLIST
if name in reconstructed or name not in all_fields
}
# Act — allow-list entries that are now fully reconstructed, or name a
# class/field that no longer exists.
redundant: list[str] = []
for entry in _UNPERSISTED_ALLOWLIST:
class_name, _, field_name = entry.partition(".")
cls = reachable.get(class_name)
if cls is None:
redundant.append(entry)
continue
if not field_name:
# Whole-class entry: stale once ANY field is reconstructed (the class
# is now wired into the round-trip).
if reconstructed.get(class_name):
redundant.append(entry)
continue
field_names = {f.name for f in dataclasses.fields(cls)}
if field_name not in field_names or field_name in reconstructed.get(
class_name, set()
):
redundant.append(entry)
# Assert — the allow-list stays honest as gaps are closed.
assert redundant == set(), (
assert redundant == [], (
f"stale _UNPERSISTED_ALLOWLIST entries (now persisted or removed): "
f"{sorted(redundant)} — delete them."
)