mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
P2.1: extract predict_sap_for_cert; swap probe to SAP 10.2 spec prices
ADR-0010 P2: cert-calibration layer is deleted, the probe uses SAP_10_2_SPEC_PRICES (already defined in cert_to_inputs.py). Extracts a pure predict_sap_for_cert(cert_document, *, prices) -> int helper out of main()'s inline pipeline so the spec-prices path is unit- testable in isolation; the helper is also reusable for P3's cohort- filtered probe variant. The pinned regression value (SAP=67 for cert 6035-7729 under spec prices, vs the cert's lodged SAP of 73 under cert-cal prices) lives in services/ml_training_data/tests/unit/test_sap_parity_probe.py. It will drift as P4 (PCDB) and the section sweep land their fixes; that's expected. cert_calibration_prices is still imported by test_golden_fixtures.py and the table_12_cert_calibration module is intact. P2.2/P2.3 retire those. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
377962f8bd
commit
ac1aa56ab1
2 changed files with 73 additions and 4 deletions
|
|
@ -23,8 +23,11 @@ import pandas as pd
|
||||||
|
|
||||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||||
from domain.sap.calculator import calculate_sap_from_inputs
|
from domain.sap.calculator import calculate_sap_from_inputs
|
||||||
from domain.sap.rdsap.cert_to_inputs import cert_to_inputs
|
from domain.sap.rdsap.cert_to_inputs import (
|
||||||
from domain.sap.tables.table_12_cert_calibration import cert_calibration_prices
|
PriceTable,
|
||||||
|
SAP_10_2_SPEC_PRICES,
|
||||||
|
cert_to_inputs,
|
||||||
|
)
|
||||||
from ml_training_data.bulk_zip_reader import BulkZipReader
|
from ml_training_data.bulk_zip_reader import BulkZipReader
|
||||||
from ml_training_data.storage import LocalStorage
|
from ml_training_data.storage import LocalStorage
|
||||||
|
|
||||||
|
|
@ -35,6 +38,19 @@ _BULK = _REPO / "data" / "ml_training" / "bulk"
|
||||||
_ZIP_KEYS = ("certificates-2025.json.zip", "certificates-2026.json.zip")
|
_ZIP_KEYS = ("certificates-2025.json.zip", "certificates-2026.json.zip")
|
||||||
|
|
||||||
|
|
||||||
|
def predict_sap_for_cert(
|
||||||
|
cert_document: dict[str, Any], *, prices: PriceTable
|
||||||
|
) -> int:
|
||||||
|
"""Run the mapper → cert_to_inputs → calculator pipeline on a single
|
||||||
|
cert document and return the rounded RdSAP-style SAP score. The
|
||||||
|
pure-function seam the corpus probe and any future per-cert dev
|
||||||
|
tools share."""
|
||||||
|
epc = EpcPropertyDataMapper.from_api_response(cert_document)
|
||||||
|
inputs = cert_to_inputs(epc, prices=prices)
|
||||||
|
result = calculate_sap_from_inputs(inputs)
|
||||||
|
return result.sap_score
|
||||||
|
|
||||||
|
|
||||||
def _sample_certs(n: int, seed: int) -> dict[str, int]:
|
def _sample_certs(n: int, seed: int) -> dict[str, int]:
|
||||||
df = pd.read_parquet(_PARQUET, columns=["certificate_number", "sap_score"])
|
df = pd.read_parquet(_PARQUET, columns=["certificate_number", "sap_score"])
|
||||||
# Wide range so the sample includes full-SAP new-builds (sap_score 90+)
|
# Wide range so the sample includes full-SAP new-builds (sap_score 90+)
|
||||||
|
|
@ -52,9 +68,12 @@ def main(argv: list[str] | None = None) -> None:
|
||||||
seed = int(args[1]) if len(args) > 1 else 7
|
seed = int(args[1]) if len(args) > 1 else 7
|
||||||
|
|
||||||
targets = _sample_certs(n, seed)
|
targets = _sample_certs(n, seed)
|
||||||
print(f"Sampling {len(targets)} certs (seed={seed}) — using cert-calibration prices")
|
print(
|
||||||
|
f"Sampling {len(targets)} certs (seed={seed}) — using SAP 10.2 "
|
||||||
|
f"(14-03-2025) spec prices per ADR-0010"
|
||||||
|
)
|
||||||
storage = LocalStorage(_BULK)
|
storage = LocalStorage(_BULK)
|
||||||
prices = cert_calibration_prices()
|
prices = SAP_10_2_SPEC_PRICES
|
||||||
results: list[dict[str, Any]] = []
|
results: list[dict[str, Any]] = []
|
||||||
errors: list[dict[str, Any]] = []
|
errors: list[dict[str, Any]] = []
|
||||||
remaining = set(targets)
|
remaining = set(targets)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
"""Tests for sap_parity_probe — the per-cert SAP10 prediction pipeline
|
||||||
|
exercised by the corpus-wide parity probe.
|
||||||
|
|
||||||
|
P2.1 (ADR-0010): the probe now uses SAP 10.2 (14-03-2025) spec prices
|
||||||
|
exclusively. The extracted `predict_sap_for_cert` pure function exists
|
||||||
|
so the spec-prices path is unit-testable in isolation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, cast
|
||||||
|
|
||||||
|
from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES
|
||||||
|
from ml_training_data.sap_parity_probe import predict_sap_for_cert
|
||||||
|
|
||||||
|
|
||||||
|
_REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||||
|
_GOLDEN_FIXTURES = (
|
||||||
|
_REPO_ROOT
|
||||||
|
/ "packages/domain/src/domain/sap/rdsap/tests/fixtures/golden"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_cert_document(cert_number: str) -> dict[str, Any]:
|
||||||
|
path = _GOLDEN_FIXTURES / f"{cert_number}.json"
|
||||||
|
return cast(dict[str, Any], json.loads(path.read_text()))
|
||||||
|
|
||||||
|
|
||||||
|
def test_predict_sap_for_cert_returns_spec_prices_score_for_6035_7729() -> None:
|
||||||
|
# Arrange
|
||||||
|
# Cert 6035-7729-2309-0879-2296: mid-terrace, age band A, gas combi
|
||||||
|
# boiler code 104, TFA 128 m². One of the previously-retired golden
|
||||||
|
# fixtures; the cert JSON is preserved on disk as reference data
|
||||||
|
# (ADR-0010 §10), the test against it is new and pins the
|
||||||
|
# spec-prices SAP score (not the cert SAP).
|
||||||
|
cert_document = _load_cert_document("6035-7729-2309-0879-2296")
|
||||||
|
|
||||||
|
# Act
|
||||||
|
score = predict_sap_for_cert(cert_document, prices=SAP_10_2_SPEC_PRICES)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
# Pinned in GREEN of P2.1. If this drifts, either the calculator's
|
||||||
|
# spec-correct output for this fixture has genuinely moved (a real
|
||||||
|
# behavioural change worth investigating) or the SAP 10.2 spec
|
||||||
|
# prices in table_12.py have changed.
|
||||||
|
assert score == _EXPECTED_SCORE_6035_7729
|
||||||
|
|
||||||
|
|
||||||
|
_EXPECTED_SCORE_6035_7729: int = 67
|
||||||
Loading…
Add table
Reference in a new issue