mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
P2.1: extract predict_sap_for_cert; swap probe to SAP 10.2 spec prices
ADR-0010 P2: cert-calibration layer is deleted, the probe uses SAP_10_2_SPEC_PRICES (already defined in cert_to_inputs.py). Extracts a pure predict_sap_for_cert(cert_document, *, prices) -> int helper out of main()'s inline pipeline so the spec-prices path is unit- testable in isolation; the helper is also reusable for P3's cohort- filtered probe variant. The pinned regression value (SAP=67 for cert 6035-7729 under spec prices, vs the cert's lodged SAP of 73 under cert-cal prices) lives in services/ml_training_data/tests/unit/test_sap_parity_probe.py. It will drift as P4 (PCDB) and the section sweep land their fixes; that's expected. cert_calibration_prices is still imported by test_golden_fixtures.py and the table_12_cert_calibration module is intact. P2.2/P2.3 retire those. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
377962f8bd
commit
ac1aa56ab1
2 changed files with 73 additions and 4 deletions
|
|
@ -23,8 +23,11 @@ import pandas as pd
|
|||
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.sap.calculator import calculate_sap_from_inputs
|
||||
from domain.sap.rdsap.cert_to_inputs import cert_to_inputs
|
||||
from domain.sap.tables.table_12_cert_calibration import cert_calibration_prices
|
||||
from domain.sap.rdsap.cert_to_inputs import (
|
||||
PriceTable,
|
||||
SAP_10_2_SPEC_PRICES,
|
||||
cert_to_inputs,
|
||||
)
|
||||
from ml_training_data.bulk_zip_reader import BulkZipReader
|
||||
from ml_training_data.storage import LocalStorage
|
||||
|
||||
|
|
@ -35,6 +38,19 @@ _BULK = _REPO / "data" / "ml_training" / "bulk"
|
|||
_ZIP_KEYS = ("certificates-2025.json.zip", "certificates-2026.json.zip")
|
||||
|
||||
|
||||
def predict_sap_for_cert(
|
||||
cert_document: dict[str, Any], *, prices: PriceTable
|
||||
) -> int:
|
||||
"""Run the mapper → cert_to_inputs → calculator pipeline on a single
|
||||
cert document and return the rounded RdSAP-style SAP score. The
|
||||
pure-function seam the corpus probe and any future per-cert dev
|
||||
tools share."""
|
||||
epc = EpcPropertyDataMapper.from_api_response(cert_document)
|
||||
inputs = cert_to_inputs(epc, prices=prices)
|
||||
result = calculate_sap_from_inputs(inputs)
|
||||
return result.sap_score
|
||||
|
||||
|
||||
def _sample_certs(n: int, seed: int) -> dict[str, int]:
|
||||
df = pd.read_parquet(_PARQUET, columns=["certificate_number", "sap_score"])
|
||||
# Wide range so the sample includes full-SAP new-builds (sap_score 90+)
|
||||
|
|
@ -52,9 +68,12 @@ def main(argv: list[str] | None = None) -> None:
|
|||
seed = int(args[1]) if len(args) > 1 else 7
|
||||
|
||||
targets = _sample_certs(n, seed)
|
||||
print(f"Sampling {len(targets)} certs (seed={seed}) — using cert-calibration prices")
|
||||
print(
|
||||
f"Sampling {len(targets)} certs (seed={seed}) — using SAP 10.2 "
|
||||
f"(14-03-2025) spec prices per ADR-0010"
|
||||
)
|
||||
storage = LocalStorage(_BULK)
|
||||
prices = cert_calibration_prices()
|
||||
prices = SAP_10_2_SPEC_PRICES
|
||||
results: list[dict[str, Any]] = []
|
||||
errors: list[dict[str, Any]] = []
|
||||
remaining = set(targets)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
"""Tests for sap_parity_probe — the per-cert SAP10 prediction pipeline
|
||||
exercised by the corpus-wide parity probe.
|
||||
|
||||
P2.1 (ADR-0010): the probe now uses SAP 10.2 (14-03-2025) spec prices
|
||||
exclusively. The extracted `predict_sap_for_cert` pure function exists
|
||||
so the spec-prices path is unit-testable in isolation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, cast
|
||||
|
||||
from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES
|
||||
from ml_training_data.sap_parity_probe import predict_sap_for_cert
|
||||
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
_GOLDEN_FIXTURES = (
|
||||
_REPO_ROOT
|
||||
/ "packages/domain/src/domain/sap/rdsap/tests/fixtures/golden"
|
||||
)
|
||||
|
||||
|
||||
def _load_cert_document(cert_number: str) -> dict[str, Any]:
|
||||
path = _GOLDEN_FIXTURES / f"{cert_number}.json"
|
||||
return cast(dict[str, Any], json.loads(path.read_text()))
|
||||
|
||||
|
||||
def test_predict_sap_for_cert_returns_spec_prices_score_for_6035_7729() -> None:
|
||||
# Arrange
|
||||
# Cert 6035-7729-2309-0879-2296: mid-terrace, age band A, gas combi
|
||||
# boiler code 104, TFA 128 m². One of the previously-retired golden
|
||||
# fixtures; the cert JSON is preserved on disk as reference data
|
||||
# (ADR-0010 §10), the test against it is new and pins the
|
||||
# spec-prices SAP score (not the cert SAP).
|
||||
cert_document = _load_cert_document("6035-7729-2309-0879-2296")
|
||||
|
||||
# Act
|
||||
score = predict_sap_for_cert(cert_document, prices=SAP_10_2_SPEC_PRICES)
|
||||
|
||||
# Assert
|
||||
# Pinned in GREEN of P2.1. If this drifts, either the calculator's
|
||||
# spec-correct output for this fixture has genuinely moved (a real
|
||||
# behavioural change worth investigating) or the SAP 10.2 spec
|
||||
# prices in table_12.py have changed.
|
||||
assert score == _EXPECTED_SCORE_6035_7729
|
||||
|
||||
|
||||
_EXPECTED_SCORE_6035_7729: int = 67
|
||||
Loading…
Add table
Reference in a new issue