diff --git a/tests/domain/epc_prediction/test_component_accuracy_gate.py b/tests/domain/epc_prediction/test_component_accuracy_gate.py index 2b0ee9fb..cc262c17 100644 --- a/tests/domain/epc_prediction/test_component_accuracy_gate.py +++ b/tests/domain/epc_prediction/test_component_accuracy_gate.py @@ -41,27 +41,42 @@ _FIXTURE = Path(__file__).parents[3] / "tests" / "fixtures" / "epc_prediction" # mode tipped, and it tipped entirely inside one near-tie pre-1900↔1900-29 (A↔B) # cohort. wall_insulation_type / floor_construction / has_hot_water_cylinder / has_pv # moved 3-6pp the same way. The tighten-only ratchet resumes from these new values. +# +# Re-baselined again under ADR-0037 (full-SAP mapper completion): full-SAP +# (on-construction) certs previously mapped property_type=None, so the hard cohort +# filter (comparable_properties.py — `c.epc.property_type == target.property_type`) +# silently excluded them from EVERY cohort, as donors and as targets. Mapping +# property_type correctly admits these real lodged EPCs as comparables — another +# ground-truth-method change. Net effect over the n=36 fixture: **16 components +# better, 4 worse, 6 unchanged**. The gains are concentrated in the physical / +# geometric characteristics full-SAP certs measure accurately — window_count +# residual 3.83->1.69, total_window_area 3.82->3.72, building_parts 0.33->0.12, +# floor_construction 0.78->0.91, construction_age_band 0.50->0.78, modal_glazing +# 0.56->0.84, walls/room-in-roof/heating-control all up. The 4 that fell are the +# new-build-vs-old-stock service mismatch on 1-2 targets each (heating_main_fuel +# 0.9722->0.9394, water_heating_fuel ->0.9495, cylinder_insulation_type 0.6667-> +# 0.3333) plus floor_area (+0.31 MAE). Tighten-only resumes from these values. _RATE_FLOORS: dict[str, float] = { - "wall_construction": 0.8889, - "wall_insulation_type": 0.7778, - "construction_age_band": 0.5000, - "construction_age_band_pm1": 0.8333, + "wall_construction": 0.9091, + "wall_insulation_type": 0.8687, + "construction_age_band": 0.7778, + "construction_age_band_pm1": 0.9091, "roof_construction": 0.7222, - "floor_construction": 0.7812, - "heating_main_fuel": 0.9722, - "heating_main_category": 0.9444, - "heating_main_control": 0.8056, - "water_heating_fuel": 0.9722, - "water_heating_code": 0.9444, - "has_hot_water_cylinder": 0.8333, - "cylinder_insulation_type": 0.5000, + "floor_construction": 0.9053, + "heating_main_fuel": 0.9394, + "heating_main_category": 0.9596, + "heating_main_control": 0.9091, + "water_heating_fuel": 0.9495, + "water_heating_code": 0.9798, + "has_hot_water_cylinder": 0.8687, + "cylinder_insulation_type": 0.3333, "secondary_heating_type": 0.0000, "roof_insulation_thickness": 0.4118, "roof_insulation_thickness_pm1": 0.4118, "floor_insulation": 0.9375, - "has_room_in_roof": 0.8333, - "modal_glazing_type": 0.5556, - "has_pv": 0.9444, + "has_room_in_roof": 0.9495, + "modal_glazing_type": 0.8384, + "has_pv": 0.9798, "solar_water_heating": 1.0000, } @@ -77,11 +92,16 @@ _RATE_FLOORS: dict[str, float] = { # the other way as small-sample noise (one target's shift moves an n=36 MAE more # than that). The ceiling still pins the new deterministic value exactly, so the # tighten-only ratchet resumes from here. +# total_window_area / building_parts / door_count all tightened under ADR-0037 +# (full-SAP certs admitted as donors — their measured geometry sharpens the +# dimensional predictions); floor_area loosened 12.0378 -> 12.0586 as the one +# physical residual that fell (1-2 targets picking a new-build donor). See the +# _RATE_FLOORS note above. _RESIDUAL_CEILINGS: dict[str, float] = { - "floor_area": 12.0378, - "total_window_area": 4.4067, - "building_parts": 0.3333, - "door_count": 0.6389, + "floor_area": 12.0586, + "total_window_area": 3.7184, + "building_parts": 0.1212, + "door_count": 0.3131, } _TOLERANCE = 1e-3