mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Re-baseline prediction component-accuracy gate for full-SAP donors (ADR-0037)
Full-SAP certs mapped property_type=None, so the hard cohort filter silently excluded them as comparables. Correctly typing them admits real lodged EPCs as donors — a ground-truth-method change (cf #1245). Net over the n=36 fixture: 16 components better, 4 worse, 6 unchanged; gains concentrated in the physical characteristics full-SAP certs measure (window_count 3.83->1.69, building_parts, total_window_area, floor_construction, construction_age_band, glazing, walls). The 4 that fell are new-build-vs-old-stock service mismatch on 1-2 targets each (heating/water fuel, cylinder insulation) + floor_area. Tighten 16, loosen 4. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b2c74dbf5b
commit
e405928516
1 changed files with 39 additions and 19 deletions
|
|
@ -41,27 +41,42 @@ _FIXTURE = Path(__file__).parents[3] / "tests" / "fixtures" / "epc_prediction"
|
|||
# mode tipped, and it tipped entirely inside one near-tie pre-1900↔1900-29 (A↔B)
|
||||
# cohort. wall_insulation_type / floor_construction / has_hot_water_cylinder / has_pv
|
||||
# moved 3-6pp the same way. The tighten-only ratchet resumes from these new values.
|
||||
#
|
||||
# Re-baselined again under ADR-0037 (full-SAP mapper completion): full-SAP
|
||||
# (on-construction) certs previously mapped property_type=None, so the hard cohort
|
||||
# filter (comparable_properties.py — `c.epc.property_type == target.property_type`)
|
||||
# silently excluded them from EVERY cohort, as donors and as targets. Mapping
|
||||
# property_type correctly admits these real lodged EPCs as comparables — another
|
||||
# ground-truth-method change. Net effect over the n=36 fixture: **16 components
|
||||
# better, 4 worse, 6 unchanged**. The gains are concentrated in the physical /
|
||||
# geometric characteristics full-SAP certs measure accurately — window_count
|
||||
# residual 3.83->1.69, total_window_area 3.82->3.72, building_parts 0.33->0.12,
|
||||
# floor_construction 0.78->0.91, construction_age_band 0.50->0.78, modal_glazing
|
||||
# 0.56->0.84, walls/room-in-roof/heating-control all up. The 4 that fell are the
|
||||
# new-build-vs-old-stock service mismatch on 1-2 targets each (heating_main_fuel
|
||||
# 0.9722->0.9394, water_heating_fuel ->0.9495, cylinder_insulation_type 0.6667->
|
||||
# 0.3333) plus floor_area (+0.31 MAE). Tighten-only resumes from these values.
|
||||
_RATE_FLOORS: dict[str, float] = {
|
||||
"wall_construction": 0.8889,
|
||||
"wall_insulation_type": 0.7778,
|
||||
"construction_age_band": 0.5000,
|
||||
"construction_age_band_pm1": 0.8333,
|
||||
"wall_construction": 0.9091,
|
||||
"wall_insulation_type": 0.8687,
|
||||
"construction_age_band": 0.7778,
|
||||
"construction_age_band_pm1": 0.9091,
|
||||
"roof_construction": 0.7222,
|
||||
"floor_construction": 0.7812,
|
||||
"heating_main_fuel": 0.9722,
|
||||
"heating_main_category": 0.9444,
|
||||
"heating_main_control": 0.8056,
|
||||
"water_heating_fuel": 0.9722,
|
||||
"water_heating_code": 0.9444,
|
||||
"has_hot_water_cylinder": 0.8333,
|
||||
"cylinder_insulation_type": 0.5000,
|
||||
"floor_construction": 0.9053,
|
||||
"heating_main_fuel": 0.9394,
|
||||
"heating_main_category": 0.9596,
|
||||
"heating_main_control": 0.9091,
|
||||
"water_heating_fuel": 0.9495,
|
||||
"water_heating_code": 0.9798,
|
||||
"has_hot_water_cylinder": 0.8687,
|
||||
"cylinder_insulation_type": 0.3333,
|
||||
"secondary_heating_type": 0.0000,
|
||||
"roof_insulation_thickness": 0.4118,
|
||||
"roof_insulation_thickness_pm1": 0.4118,
|
||||
"floor_insulation": 0.9375,
|
||||
"has_room_in_roof": 0.8333,
|
||||
"modal_glazing_type": 0.5556,
|
||||
"has_pv": 0.9444,
|
||||
"has_room_in_roof": 0.9495,
|
||||
"modal_glazing_type": 0.8384,
|
||||
"has_pv": 0.9798,
|
||||
"solar_water_heating": 1.0000,
|
||||
}
|
||||
|
||||
|
|
@ -77,11 +92,16 @@ _RATE_FLOORS: dict[str, float] = {
|
|||
# the other way as small-sample noise (one target's shift moves an n=36 MAE more
|
||||
# than that). The ceiling still pins the new deterministic value exactly, so the
|
||||
# tighten-only ratchet resumes from here.
|
||||
# total_window_area / building_parts / door_count all tightened under ADR-0037
|
||||
# (full-SAP certs admitted as donors — their measured geometry sharpens the
|
||||
# dimensional predictions); floor_area loosened 12.0378 -> 12.0586 as the one
|
||||
# physical residual that fell (1-2 targets picking a new-build donor). See the
|
||||
# _RATE_FLOORS note above.
|
||||
_RESIDUAL_CEILINGS: dict[str, float] = {
|
||||
"floor_area": 12.0378,
|
||||
"total_window_area": 4.4067,
|
||||
"building_parts": 0.3333,
|
||||
"door_count": 0.6389,
|
||||
"floor_area": 12.0586,
|
||||
"total_window_area": 3.7184,
|
||||
"building_parts": 0.1212,
|
||||
"door_count": 0.3131,
|
||||
}
|
||||
|
||||
_TOLERANCE = 1e-3
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue