Merge pull request #1333 from Hestia-Homes/fix/baseline-downgrades

fix(epc-mapper): complete full-SAP 17.1 mapper so pre-SAP10 certs rebaseline (no more impossible downgrade)
This commit is contained in:
Daniel Roth 2026-06-26 10:44:52 +01:00 committed by GitHub
commit 04b3cb240a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 597 additions and 20 deletions

View file

@ -35,6 +35,10 @@ _Avoid_: energy rating, EPC grade, EPC score
The versioned RdSAP or SAP schema that describes the structure of an EPC's raw data (e.g. `RdSAP-Schema-21.0.1`).
_Avoid_: schema version, EPC format
**Full-SAP Assessment**:
A cert produced by a **full SAP** calculation rather than the reduced **RdSAP** procedure — lodged with `assessment_type = "SAP"` and a measured-openings structure (`sap_opening_types`, per-opening U-values), typically on an older spec (`SAP-Schema-16.x/17.x`, `sap_version` ~9.9x = SAP 2012). Structurally distinct from an **RdSAP Assessment** (reduced categorical fabric, no measured openings), so it has its own mapper (`from_sap_schema_17_1`), routed to by **structure not label** (a structurally full-SAP cert is mapped here regardless of a mislabelled `schema_type`). Because full-SAP certs lodge pre-10.2 figures they are **rebaselined** to SAP 10.2 like any pre-SAP10 cert (Rebaselining trigger (a)); a mapper that drops their `sap_version` silently suppresses that rebaseline and leaves **Effective Performance** stuck on the lodged value (ADR-0037).
_Avoid_: SAP cert (ambiguous — every EPC is a SAP-derived cert), on-construction EPC (overlaps but not synonymous)
**Domestic Certificate**:
An EPC issued for a residential dwelling, as opposed to a commercial one.
_Avoid_: residential EPC, home EPC

View file

@ -734,6 +734,26 @@ class EpcPropertyDataMapper:
door_count, insulated_door_u = _sap_door_aggregates(schema)
return EpcPropertyData(
uprn=schema.uprn,
assessment_type=schema.assessment_type,
# SAP 2012 (~9.92) for this family — carried so CalculatorRebaseliner
# fires trigger (a) (sap_version < 10.2 → Effective = the SAP-10.2
# calc) instead of keeping the lodged SAP-2012 score (ADR-0037).
sap_version=schema.sap_version,
# gov property_type code → str, mirroring from_rdsap_schema_17_1.
# Feeds the calculator's house/flat heat-transmission split.
property_type=(
str(schema.property_type)
if schema.property_type is not None
else None
),
built_form=(
str(schema.built_form) if schema.built_form is not None else None
),
secondary_heating=(
EpcPropertyDataMapper._map_energy_element(schema.secondary_heating)
if schema.secondary_heating is not None
else None
),
dwelling_type=(
schema.dwelling_type
if isinstance(schema.dwelling_type, str)
@ -791,7 +811,35 @@ class EpcPropertyDataMapper:
roofs=EpcPropertyDataMapper._map_energy_elements(schema.roofs),
walls=EpcPropertyDataMapper._map_energy_elements(schema.walls),
floors=EpcPropertyDataMapper._map_energy_elements(schema.floors),
main_heating=[],
# Display elements for the FE property-details panel (the calculator
# reads the measured sap_* fields below, not these). Parity with
# from_rdsap_schema_17_1; full-SAP lodges windows under plural key.
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=(
EpcPropertyDataMapper._map_energy_element(schema.windows)
if schema.windows is not None
else None
),
lighting=(
EpcPropertyDataMapper._map_energy_element(schema.lighting)
if schema.lighting is not None
else None
),
hot_water=(
EpcPropertyDataMapper._map_energy_element(schema.hot_water)
if schema.hot_water is not None
else None
),
# D2: vertical-window openings (opening-type 4) → sap_windows;
# roof-window openings (opening-type 5) → sap_roof_windows.
sap_windows=EpcPropertyDataMapper._sap_17_1_windows(schema),
@ -1046,6 +1094,9 @@ class EpcPropertyDataMapper:
# ADR-0028: sheltered_sides from built_form, else the calculator
# assumes mid-terrace (2) for every dwelling.
sap_ventilation=SapVentilation(
mechanical_ventilation_kind=_api_mechanical_ventilation_kind(
schema.mechanical_ventilation
),
sheltered_sides=_api_sheltered_sides(schema.built_form),
),
# ADR-0028: total + low-energy OUTLET counts, not a bulb split.
@ -1062,6 +1113,14 @@ class EpcPropertyDataMapper:
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=EpcPropertyDataMapper._map_energy_element(schema.window),
lighting=EpcPropertyDataMapper._map_energy_element(schema.lighting),
hot_water=EpcPropertyDataMapper._map_energy_element(schema.hot_water),
@ -1236,6 +1295,9 @@ class EpcPropertyDataMapper:
# ADR-0028: sheltered_sides must come from built_form, else the
# calculator assumes mid-terrace (2) for every dwelling.
sap_ventilation=SapVentilation(
mechanical_ventilation_kind=_api_mechanical_ventilation_kind(
schema.mechanical_ventilation
),
sheltered_sides=_api_sheltered_sides(schema.built_form),
),
# ADR-0028: 18.0 gives total + low-energy OUTLET counts, not an
@ -1254,6 +1316,14 @@ class EpcPropertyDataMapper:
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=EpcPropertyDataMapper._map_energy_element(schema.window),
lighting=EpcPropertyDataMapper._map_energy_element(schema.lighting),
hot_water=EpcPropertyDataMapper._map_energy_element(schema.hot_water),
@ -1470,6 +1540,14 @@ class EpcPropertyDataMapper:
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=EpcPropertyDataMapper._map_energy_element(schema.window),
lighting=EpcPropertyDataMapper._map_energy_element(schema.lighting),
hot_water=EpcPropertyDataMapper._map_energy_element(schema.hot_water),
@ -1674,6 +1752,9 @@ class EpcPropertyDataMapper:
# them via RdSAP Table 5), but sheltered_sides must come from
# built_form — else the calculator assumes mid-terrace (2) for all.
sap_ventilation=SapVentilation(
mechanical_ventilation_kind=_api_mechanical_ventilation_kind(
schema.mechanical_ventilation
),
sheltered_sides=_api_sheltered_sides(schema.built_form),
),
# ADR-0027: 20.0.0 gives total + low-energy OUTLET counts, not an
@ -1693,6 +1774,14 @@ class EpcPropertyDataMapper:
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=EpcPropertyDataMapper._map_energy_element(schema.window),
lighting=EpcPropertyDataMapper._map_energy_element(schema.lighting),
hot_water=EpcPropertyDataMapper._map_energy_element(schema.hot_water),
@ -1929,6 +2018,14 @@ class EpcPropertyDataMapper:
main_heating=EpcPropertyDataMapper._map_energy_elements(
schema.main_heating
),
# First control system if multiple — mirrors from_rdsap_schema_21_0_1.
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(
schema.main_heating_controls[0]
)
if schema.main_heating_controls
else None
),
window=EpcPropertyDataMapper._map_energy_element(schema.window),
lighting=EpcPropertyDataMapper._map_energy_element(schema.lighting),
hot_water=EpcPropertyDataMapper._map_energy_element(schema.hot_water),

View file

@ -2653,3 +2653,79 @@ def test_gov_mappers_split_non_separated_conservatory(fixture: str) -> None:
)
assert len(epc.sap_building_parts) == base_parts
assert all(bp.construction_age_band is not None for bp in epc.sap_building_parts)
@pytest.mark.parametrize(
"schema_cls, mapper, fixture, expected",
[
(
RdSapSchema17_1,
EpcPropertyDataMapper.from_rdsap_schema_17_1,
"17_1.json",
"Programmer and room thermostat",
),
(
RdSapSchema18_0,
EpcPropertyDataMapper.from_rdsap_schema_18_0,
"18_0.json",
"Programmer, room thermostat and TRVs",
),
(
RdSapSchema19_0,
EpcPropertyDataMapper.from_rdsap_schema_19_0,
"19_0.json",
"Programmer, room thermostat and TRVs",
),
(
RdSapSchema20_0_0,
EpcPropertyDataMapper.from_rdsap_schema_20_0_0,
"20_0_0.json",
"Programmer, room thermostat and TRVs",
),
(
RdSapSchema21_0_0,
EpcPropertyDataMapper.from_rdsap_schema_21_0_0,
"21_0_0.json",
"Programmer, room thermostat and TRVs",
),
],
)
def test_rdsap_mappers_carry_main_heating_controls_display(
schema_cls: Any, mapper: Any, fixture: str, expected: str
) -> None:
# "Heating Control" panel row. 17.0 and 21.0.1 already map it; these five
# dropped it, leaving the FE "Unknown" despite the cert lodging it. Display
# only — the calculator reads the control CODE separately (ADR-0037 family).
schema = from_dict(schema_cls, load(fixture))
result = mapper(schema)
assert result.main_heating_controls is not None
assert result.main_heating_controls.description == expected
@pytest.mark.parametrize(
"schema_cls, mapper, fixture",
[
(RdSapSchema17_1, EpcPropertyDataMapper.from_rdsap_schema_17_1, "17_1.json"),
(RdSapSchema18_0, EpcPropertyDataMapper.from_rdsap_schema_18_0, "18_0.json"),
(RdSapSchema20_0_0, EpcPropertyDataMapper.from_rdsap_schema_20_0_0, "20_0_0.json"),
],
)
def test_rdsap_mappers_map_mechanical_ventilation_kind(
schema_cls: Any, mapper: Any, fixture: str
) -> None:
# Calc-facing: an MVHR cert (mechanical_ventilation=4) must reach the §2
# ventilation cascade as MVHR, not be silently treated as natural. These
# three build a bare SapVentilation(sheltered_sides=…) and dropped the
# top-level mechanical_ventilation; 17.0 + 21.0.1 + full-SAP already map it
# (via _sap_17_1_ventilation / their own wiring). 19.0 + 21.0.0 set no
# sap_ventilation at all — a bigger, separate gap (see followups). Natural
# certs (code 0/5 → None) are unchanged, so the fix only moves genuine
# MEV/MVHR certs (ADR-0037 follow-up).
data = load(fixture)
data["mechanical_ventilation"] = 4
result = mapper(from_dict(schema_cls, data))
assert result.sap_ventilation.mechanical_ventilation_kind == "MVHR"

View file

@ -78,6 +78,104 @@ class TestFromSapSchema17_1Tracer:
assert result.total_floor_area_m2 == 68.0
class TestFromSapSchema17_1RebaselineFields:
"""The calc-affecting categoricals the WIP mapper dropped (ADR-0037). Their
absence left the full-SAP family un-rebaselined and mis-scored, producing the
portfolio-796 "impossible downgrade"."""
def test_maps_sap_version_so_pre_sap10_rebaseline_can_fire(self) -> None:
# The full-SAP family lodges SAP 2012 (sap_version 9.92). Without it on
# the EpcPropertyData, CalculatorRebaseliner cannot fire trigger (a)
# (`sap_version < 10.2` → Effective = Calculated), so Effective
# Performance wrongly stays equal to the lodged SAP-2012 score while the
# plan models the SAP-10.2 calc — the impossible downgrade.
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
result = EpcPropertyDataMapper.from_sap_schema_17_1(schema)
assert result.sap_version == 9.92
def test_maps_property_type_as_gov_code_string(self) -> None:
# property_type feeds the calculator (heat_transmission `_is_house` /
# `_is_flat_or_maisonette` drive party-wall + heat-loss handling). Left
# None, the dwelling is classified as neither → degraded SAP. The fixture
# is a flat (gov code 2); mapped as a string like from_rdsap_schema_17_1.
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
result = EpcPropertyDataMapper.from_sap_schema_17_1(schema)
assert result.property_type == "2"
def test_maps_built_form_as_gov_code_string(self) -> None:
# built_form (gov code 1 in the fixture) → str, mirroring
# from_rdsap_schema_17_1; carried for the calculator's exposed-side /
# sheltered handling and the property-details panel.
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
result = EpcPropertyDataMapper.from_sap_schema_17_1(schema)
assert result.built_form == "1"
def test_maps_secondary_heating_element(self) -> None:
# secondary_heating is read by the calculator (cert_to_inputs) — dropping
# it under-counts a real secondary heater. The fixture lodges "None" (no
# secondary); assert it's carried through rather than silently dropped.
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
result = EpcPropertyDataMapper.from_sap_schema_17_1(schema)
assert result.secondary_heating is not None
assert result.secondary_heating.description == "None"
def test_maps_assessment_type(self) -> None:
# Full-SAP certs lodge assessment_type "SAP"; carried for provenance and
# the property-details panel, mirroring from_rdsap_schema_17_1.
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
result = EpcPropertyDataMapper.from_sap_schema_17_1(schema)
assert result.assessment_type == "SAP"
class TestFromSapSchema17_1DisplayElements:
"""Display EnergyElements the WIP mapper dropped, leaving the FE
property-details panel "Unknown" for full-SAP certs (ADR-0037). Brings
full-SAP to parity with from_rdsap_schema_17_1's display coverage."""
@pytest.fixture
def result(self) -> EpcPropertyData:
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
return EpcPropertyDataMapper.from_sap_schema_17_1(schema)
def test_maps_main_heating_display_list(self, result: EpcPropertyData) -> None:
assert [e.description for e in result.main_heating] == [
"Boiler and radiators, mains gas"
]
def test_maps_window_display_element(self, result: EpcPropertyData) -> None:
assert result.window is not None
assert result.window.description == "High performance glazing"
def test_maps_lighting_display_element(self, result: EpcPropertyData) -> None:
assert result.lighting is not None
assert result.lighting.description == "Low energy lighting in all fixed outlets"
def test_maps_hot_water_display_element(self, result: EpcPropertyData) -> None:
assert result.hot_water is not None
assert result.hot_water.description == "From main system"
def test_maps_main_heating_controls_display_element(
self, result: EpcPropertyData
) -> None:
# "Heating Control" panel row. Full-SAP lodges it as a list; mirror the
# 21.0.1 mapper and take the first control system.
assert result.main_heating_controls is not None
assert (
result.main_heating_controls.description
== "Time and temperature zone control"
)
class TestFullSapHasHotWaterCylinderFallback:
"""Some full-SAP certs (e.g. SAP-Schema-17.0 cert 8265-7433-3220-9736-7902)
omit the top-level `has_hot_water_cylinder` and lodge it only under

View file

@ -198,6 +198,32 @@ class SapSchema17_1:
sap_heating: SapHeating
sap_energy_source: SapEnergySource = field(default_factory=SapEnergySource)
sap_ventilation: SapVentilation = field(default_factory=SapVentilation)
# SAP spec the cert was lodged under (~9.92 = SAP 2012 for this family).
# Drives Rebaselining trigger (a): sap_version < 10.2 → Effective = the
# SAP-10.2 calc, not the lodged figure (ADR-0037). Optional defensively.
sap_version: Optional[float] = None
# gov property_type code (0 house, 1 bungalow, 2 flat, 3 maisonette, 4 park
# home). Feeds the calculator's house/flat heat-transmission split; left
# absent the dwelling is classified as neither → mis-scored (ADR-0037).
property_type: Optional[int] = None
# gov built_form code (1 detached … 6 enclosed mid-terrace). Carried for the
# property-details panel and exposed-side handling (ADR-0037).
built_form: Optional[int] = None
# Lodged secondary heating element (description + ratings); "None" when the
# cert has no secondary. Read by the calculator's cert_to_inputs — dropping
# it under-counts a real secondary heater (ADR-0037).
secondary_heating: Optional[EnergyElement] = None
# "SAP" for this family (vs "RdSAP"); carried for provenance / the panel.
assessment_type: Optional[str] = None
# Human-facing display elements (description + ratings) for the property-
# details panel — distinct from the measured/structural fields the calculator
# reads. Dropped by the WIP mapper → all "Unknown" on the FE (ADR-0037).
# Full-SAP lodges windows under the plural key (a single element, not a list).
main_heating: List[EnergyElement] = field(default_factory=list)
main_heating_controls: List[EnergyElement] = field(default_factory=list)
windows: Optional[EnergyElement] = None
lighting: Optional[EnergyElement] = None
hot_water: Optional[EnergyElement] = None
# measured living-room area (m²); the engine consumes it via a back-solved
# habitable_rooms_count (Table 27). Optional — 100% present in the corpus.
living_area: Optional[Union[int, float]] = None

View file

@ -0,0 +1,26 @@
# Full-SAP cert family rebaselines to SAP 10.2, validated by growing Elmhurst-anchored coverage
## Context
Certs come in two structural families: **RdSAP** (reduced-data) and **full-SAP** (measured openings, `assessment_type = "SAP"`, e.g. `SAP-Schema-16.x/17.x`, lodged under SAP 2012 / `sap_version` ~9.9x). Full-SAP certs route through `_from_full_sap``from_sap_schema_17_1`, a WIP mapper that left `sap_version`, `property_type`, `built_form`, `secondary_heating` and the display `EnergyElement`s (`main_heating`/`window`/`lighting`/`hot_water`) unmapped — even though the raw cert carries them all.
Because `sap_version` was `None`, **Rebaselining** trigger (a) (`sap_version < 10.2` → Effective = Calculated, see CONTEXT.md / ADR-0013) could not fire, so **Effective Performance wrongly stayed equal to the lodged SAP-2012 value** while the Plan modelled the SAP-10.2 calc. Result: an "impossible" post-works downgrade (effective B/84 vs post-works C/80.14) on ≈**3,824 of 34,917** portfolio-796 plans (100% `effective == lodged` signature), an all-"Unknown" property-details panel, and zero recommendations. `property_type = None` also mis-classifies the dwelling in `heat_transmission` (`_is_house`/`_is_flat_or_maisonette`), so the calc value itself was degraded.
## Decision
Complete `SapSchema17_1` + `from_sap_schema_17_1` for the **calc-affecting** fields (`sap_version`, `property_type`, `built_form`, `secondary_heating`) **and** the display `EnergyElement`s in **one slice**, mirroring `from_rdsap_schema_17_1` inline (the repo's one-mapper-per-schema convention; equivalence guarded by test, not a shared helper). This makes the full-SAP family rebaseline to SAP 10.2 exactly as pre-10.2 RdSAP certs already do — **restoring documented behaviour, not a new policy**.
The `sap_version` flip and the categorical completion are **coupled**: neither ships alone, because flipping Effective to a calc fed a `None` `property_type` would replace a wrong-high lodged score with a confidently-wrong-low calc one.
## Validation
The full-SAP path has **no accuracy corpus** (the gauge is RdSAP-21.0.1 only) and the lodged SAP-2012 value is not a valid SAP-10.2 target (Validation Cohort rule). We accept the path on **sufficient, growing coverage** rather than full corpus parity:
- mapper-completeness unit tests (SAP-17.1 fixture → fields mapped),
- the RdSAP-21.0.1 accuracy corpus staying green (the main path is untouched — ratchet not loosened),
- **≥1 Elmhurst-anchored full-SAP `RealCertExpectation` gating the merge** (Jun-te's Elmhurst skill provides the accredited SAP-10.2 ground truth), coverage growing cert-by-cert over time,
- a post-fix **population sanity sweep** flagging survivors (Effective ≥2 bands / >15 SAP below Lodged) for separate triage — surfaced, never silently shipped.
## Consequences
Deploying and re-running flips ~3,824 portfolio-796 Effective baselines from lodged-SAP-2012 to our-SAP-10.2; many legitimately drop a band. FE/product should expect a **visible population-wide baseline shift** — this is correct Rebaselining, not a regression. The RdSAP-path downgrades (Case B: ~8,160 plans, avg drop 0.7) are a **separate cause**, not addressed here. Tracked under the `fix/baseline-downgrades` umbrella, this being the first (full-SAP) sub-branch.

View file

@ -0,0 +1,68 @@
# Follow-ups from the full-SAP baseline-downgrade work (ADR-0037)
Open items surfaced while fixing the full-SAP mapper / portfolio-796 downgrades.
Each is **separate from** the full-SAP fix (`fix/baseline-downgrades`).
## 1. RdSAP ventilation mapping is inconsistent across schemas
**Severity: accuracy bug for MEV/MVHR dwellings.** The mappers handle
`sap_ventilation` four different ways:
- `17_0` + full-SAP — build via `_sap_17_1_ventilation`, which **maps**
`mechanical_ventilation_kind`. ✅
- `21_0_1` — rich inline `SapVentilation(...)` incl. the kind. ✅
- **`17_1` / `18_0` / `20_0_0`** — built `SapVentilation(sheltered_sides=…)` and
**dropped** the kind. **Fixed in this PR** (mirror `_api_mechanical_ventilation_kind`).
- **`19_0` / `21_0_0`** — set **no `sap_ventilation` at all** → the dataclass
default empty object. They drop the **entire** ventilation block (sheltered
sides + kind + everything), not just the kind. **Still open** — a bigger,
separate consistency fix (give them a proper `sap_ventilation` construction,
mirroring 21.0.1), not a one-liner.
Either way, an MEV/MVHR cert (`mechanical_ventilation ≠ 0`) is treated as
**natural** by the affected mappers — wrong §2 ventilation cascade (and heat
recovery). Natural certs (code `0`/`5``None`) are unaffected.
- For **natural-ventilation** certs (`mechanical_ventilation = 0`, e.g. UPRN
100020603823 / property 726605) it's **benign** — unmapped → `None` defaults to
natural in the §2 cascade.
- For **MEV/MVHR** certs (`mechanical_ventilation ≠ 0`) the calculator treats the
dwelling as **naturally ventilated**, getting the SAP §2 ventilation cascade
(and any heat recovery) wrong → mis-scored SAP.
The granular **counts** (fans/flues/vents) are *not* a bug: older RdSAP open-data
certs don't lodge them, and the calc correctly uses RdSAP Table-5 age defaults.
`percent_draughtproofed` is mapped (top-level) and read by the calc.
**Remaining fix (19.0 / 21.0.0):** give them a proper `sap_ventilation`
construction mirroring 21.0.1. **Calc-facing → validate** with the RdSAP-21.0.1
corpus (must hold 73.3% / MAE 0.774) plus an **Elmhurst-anchored MEV/MVHR
`RealCertExpectation`** (the corpus is natural-vent-dominated, so the kind change
isn't exercised by it). Quantify blast radius: count older-RdSAP certs with
`mechanical_ventilation ≠ 0`. The 17.1/18.0/20.0.0 fix in this PR is guarded by a
mapper-level MVHR test + the corpus/mapper-corpus staying green, with the Elmhurst
MEV/MVHR anchor as the SAP-accuracy fast-follow.
## 2. FE "Main Fuel: Unknown" is FE-side, not a Model mapper gap
`main_fuel_type` (the gov fuel **code**) **is** populated Model-side — UPRN
10093412452 (709791) → `1`, UPRN 100020603823 (726605) → `26` — and is persisted
(`epc_main_heating_detail.main_fuel_type`). So the panel's "Main Fuel: Unknown"
is the **front-end** either not mapping the code → fuel name or reading a field we
don't populate. Needs an FE-repo (Drizzle/Next) check, not a Model change.
## 3. Survivor clusters from the post-fix population sweep
`scripts/hyde/case_a_population_sweep.py`: the representative sample rebaselined
cleanly (0 survivors), but the worst-old-drop sample held **28 survivors**
(lodged A/B → new C/D, 1525 SAP) in tight UPRN clusters (new-build blocks),
spanning multiple schemas (16.0/16.1/17.1/18.0.0) and heating types. No single
mapper-gap signature → likely genuine SAP-2012→10.2 drops for very-high-lodged
new-builds, but **triage one in Elmhurst** to confirm genuine vs a residual calc
issue before trusting the cohort.
## 4. Predicted-property display path (e.g. property 721167)
721167 has **no lodged EPC** (predicted). Its Heating-Control / Main-Fuel /
Ventilation Unknowns come from the prediction + landlord-override **overlay** not
populating the display fields — a separate path from the lodged-cert mappers.

View file

@ -0,0 +1,125 @@
"""Local population sanity sweep for the full-SAP baseline-downgrade fix (ADR-0037).
For a sample of Case A properties (portfolio 796, full-SAP path: epc_property
assessment_type+sap_version NULL), re-map the real cert with the NEW mapper, run
the SAP-10.2 calculator, and compute the post-fix Effective SAP/band. Compare to
the lodged figures to (a) confirm the fix flips Effective off the stale lodged
value and (b) flag survivors drops too large to be a 201210.2 methodology
shift (>15 SAP or >=2 bands below lodged), candidate deeper-calc/cert bugs.
Read-only. No DB writes.
"""
from __future__ import annotations
import os
import random # noqa: F401 — only used with a fixed seed below
from scripts.e2e_common import load_env, build_engine
from sqlalchemy import text
load_env()
from infrastructure.epc_client.epc_client_service import EpcClientService
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import Sap10Calculator
from datatypes.epc.domain.epc import Epc
_BAND_IDX = {b: i for i, b in enumerate(["G", "F", "E", "D", "C", "B", "A"])}
def _band_letter(epc_band: object) -> str:
s = str(epc_band)
return s.split(".")[-1] if "." in s else s
def main() -> None:
eng = build_engine()
with eng.connect() as c:
# 30 worst old-drops + 30 spread across the cohort (deterministic order).
worst = c.execute(text("""
SELECT p.uprn, pbp.lodged_sap_score, pbp.lodged_epc_band,
pbp.effective_sap_score AS old_eff, pl.post_sap_points AS old_post
FROM property p
JOIN epc_property ep ON ep.uprn=p.uprn
JOIN property_baseline_performance pbp ON pbp.property_id=p.id
JOIN plan pl ON pl.property_id=p.id
WHERE p.portfolio_id=796 AND ep.assessment_type IS NULL
AND ep.sap_version IS NULL
ORDER BY (pbp.effective_sap_score - pl.post_sap_points) DESC
LIMIT 30
""")).fetchall()
spread = c.execute(text("""
SELECT p.uprn, pbp.lodged_sap_score, pbp.lodged_epc_band,
pbp.effective_sap_score AS old_eff, pl.post_sap_points AS old_post
FROM property p
JOIN epc_property ep ON ep.uprn=p.uprn
JOIN property_baseline_performance pbp ON pbp.property_id=p.id
JOIN plan pl ON pl.property_id=p.id
WHERE p.portfolio_id=796 AND ep.assessment_type IS NULL
AND ep.sap_version IS NULL AND (p.uprn % 7) = 0
ORDER BY p.uprn LIMIT 30
""")).fetchall()
rows = {r._mapping["uprn"]: dict(r._mapping) for r in worst}
for r in spread:
rows.setdefault(r._mapping["uprn"], dict(r._mapping))
svc = EpcClientService(os.environ["OPEN_EPC_API_TOKEN"])
calc = Sap10Calculator()
n = 0
flipped = 0
calc_errors = 0
survivors = []
drops = []
for uprn, row in rows.items():
n += 1
try:
results = svc._search(uprn=int(uprn))
if not results:
print(f" uprn={uprn} NO_CERT")
continue
latest = max(results, key=lambda x: x.registration_date)
raw = svc._fetch_certificate(latest.certificate_number)
epc = EpcPropertyDataMapper.from_api_response(raw)
new_sap = calc.calculate(epc).sap_score
except Exception as e: # noqa: BLE001 — sweep tolerates per-cert failure
calc_errors += 1
print(f" uprn={uprn} CALC_ERROR {type(e).__name__}: {str(e)[:90]}")
continue
new_band = _band_letter(Epc.from_sap_score(new_sap))
lodged = row["lodged_sap_score"]
lodged_band = row["lodged_epc_band"]
drop = lodged - new_sap
drops.append(drop)
if epc.sap_version is not None and epc.sap_version < 10.2:
flipped += 1
band_drop = _BAND_IDX.get(lodged_band, 0) - _BAND_IDX.get(new_band, 0)
survivor = drop > 15 or band_drop >= 2
tag = " *** SURVIVOR" if survivor else ""
if survivor:
survivors.append((uprn, lodged, lodged_band, new_sap, new_band, drop))
print(
f" uprn={uprn} lodged={lodged}/{lodged_band} "
f"new_eff={new_sap}/{new_band} drop={drop} "
f"(old_eff={row['old_eff']} old_post={round(float(row['old_post']),1)}){tag}"
)
computed = len(drops)
print("\n==== SUMMARY ====")
print(f"sampled={n} computed={computed} calc_errors={calc_errors}")
print(f"rebaseline flipped (sap_version<10.2 now set) = {flipped}/{computed}")
if drops:
drops_sorted = sorted(drops)
print(
f"lodged-new_eff drop: min={min(drops)} median="
f"{drops_sorted[len(drops_sorted)//2]} max={max(drops)} "
f"mean={round(sum(drops)/len(drops),1)}"
)
print(f"survivors (>15 SAP or >=2 bands below lodged) = {len(survivors)}")
for s in survivors:
print(f" SURVIVOR uprn={s[0]} lodged={s[1]}/{s[2]} new={s[3]}/{s[4]} drop={s[5]}")
if __name__ == "__main__":
main()

View file

@ -41,27 +41,42 @@ _FIXTURE = Path(__file__).parents[3] / "tests" / "fixtures" / "epc_prediction"
# mode tipped, and it tipped entirely inside one near-tie pre-1900↔1900-29 (A↔B)
# cohort. wall_insulation_type / floor_construction / has_hot_water_cylinder / has_pv
# moved 3-6pp the same way. The tighten-only ratchet resumes from these new values.
#
# Re-baselined again under ADR-0037 (full-SAP mapper completion): full-SAP
# (on-construction) certs previously mapped property_type=None, so the hard cohort
# filter (comparable_properties.py — `c.epc.property_type == target.property_type`)
# silently excluded them from EVERY cohort, as donors and as targets. Mapping
# property_type correctly admits these real lodged EPCs as comparables — another
# ground-truth-method change. Net effect over the n=36 fixture: **16 components
# better, 4 worse, 6 unchanged**. The gains are concentrated in the physical /
# geometric characteristics full-SAP certs measure accurately — window_count
# residual 3.83->1.69, total_window_area 3.82->3.72, building_parts 0.33->0.12,
# floor_construction 0.78->0.91, construction_age_band 0.50->0.78, modal_glazing
# 0.56->0.84, walls/room-in-roof/heating-control all up. The 4 that fell are the
# new-build-vs-old-stock service mismatch on 1-2 targets each (heating_main_fuel
# 0.9722->0.9394, water_heating_fuel ->0.9495, cylinder_insulation_type 0.6667->
# 0.3333) plus floor_area (+0.31 MAE). Tighten-only resumes from these values.
_RATE_FLOORS: dict[str, float] = {
"wall_construction": 0.8889,
"wall_insulation_type": 0.7778,
"construction_age_band": 0.5000,
"construction_age_band_pm1": 0.8333,
"wall_construction": 0.9091,
"wall_insulation_type": 0.8687,
"construction_age_band": 0.7778,
"construction_age_band_pm1": 0.9091,
"roof_construction": 0.7222,
"floor_construction": 0.7812,
"heating_main_fuel": 0.9722,
"heating_main_category": 0.9444,
"heating_main_control": 0.8056,
"water_heating_fuel": 0.9722,
"water_heating_code": 0.9444,
"has_hot_water_cylinder": 0.8333,
"cylinder_insulation_type": 0.5000,
"floor_construction": 0.9053,
"heating_main_fuel": 0.9394,
"heating_main_category": 0.9596,
"heating_main_control": 0.9091,
"water_heating_fuel": 0.9495,
"water_heating_code": 0.9798,
"has_hot_water_cylinder": 0.8687,
"cylinder_insulation_type": 0.3333,
"secondary_heating_type": 0.0000,
"roof_insulation_thickness": 0.4118,
"roof_insulation_thickness_pm1": 0.4118,
"floor_insulation": 0.9375,
"has_room_in_roof": 0.8333,
"modal_glazing_type": 0.5556,
"has_pv": 0.9444,
"has_room_in_roof": 0.9495,
"modal_glazing_type": 0.8384,
"has_pv": 0.9798,
"solar_water_heating": 1.0000,
}
@ -77,11 +92,16 @@ _RATE_FLOORS: dict[str, float] = {
# the other way as small-sample noise (one target's shift moves an n=36 MAE more
# than that). The ceiling still pins the new deterministic value exactly, so the
# tighten-only ratchet resumes from here.
# total_window_area / building_parts / door_count all tightened under ADR-0037
# (full-SAP certs admitted as donors — their measured geometry sharpens the
# dimensional predictions); floor_area loosened 12.0378 -> 12.0586 as the one
# physical residual that fell (1-2 targets picking a new-build donor). See the
# _RATE_FLOORS note above.
_RESIDUAL_CEILINGS: dict[str, float] = {
"floor_area": 12.0378,
"total_window_area": 4.4067,
"building_parts": 0.3333,
"door_count": 0.6389,
"floor_area": 12.0586,
"total_window_area": 3.7184,
"building_parts": 0.1212,
"door_count": 0.3131,
}
_TOLERANCE = 1e-3

View file

@ -145,3 +145,40 @@ def test_a_calculator_raise_propagates_and_aborts() -> None:
# Act / Assert
with pytest.raises(UnmappedSapCode):
rebaseliner.rebaseline(property_id=10, effective_epc=epc, lodged=_lodged())
def test_full_sap_mapped_cert_rebaselines_off_the_lodged_sap_2012_value() -> None:
# Regression for the portfolio-796 "impossible downgrade" (ADR-0037). A
# full-SAP cert lodges SAP 2012 (sap_version 9.92). The WIP mapper dropped
# sap_version, so the rebaseliner couldn't fire trigger (a) and Effective
# stayed stuck on the lodged SAP-2012 value while the plan modelled SAP-10.2.
# End-to-end: a real full-SAP fixture, once mapped, now carries sap_version
# so Effective becomes the calc output (not lodged).
import json
import os
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from datatypes.epc.schema.sap_schema_17_1 import SapSchema17_1
from datatypes.epc.schema.tests.helpers import from_dict
fixtures = os.path.join(
os.path.dirname(__file__),
"../../../datatypes/epc/schema/tests/fixtures",
)
with open(os.path.join(fixtures, "sap_17_1.json")) as f:
raw = json.load(f)
effective_epc = EpcPropertyDataMapper.from_sap_schema_17_1(
from_dict(SapSchema17_1, raw)
)
# The mapped cert carries the lodged SAP 2012 version, gating the flip.
assert effective_epc.sap_version == 9.92
rebaseliner = CalculatorRebaseliner(_StubCalculator(_sap_result(sap_score=70)))
# Act
result = rebaseliner.rebaseline(
property_id=1, effective_epc=effective_epc, lodged=_lodged()
)
# Assert — Effective is the SAP-10.2 calc (70), NOT the lodged SAP-2012 (72).
assert result.reason == "pre_sap10"
assert result.effective.sap_score == 70