Model/domain/sap10_calculator/tables/pcdb/__init__.py
Khalim Conn-Kowlessar 34cbd7d66c feat(pcdb): parse Table 323 (Centralised MEV / MVHR) + Table 329 efficiency IUF
MVHR (24a) heat-recovery support, part 1: the PCDB data layer.

PCDB Table 323 (PCDF Spec Rev 6b §A.18, Format 426; pcdb10.dat carries
Format 431, header `$323,431,...`) holds the per-wet-room SFP + heat-
exchanger efficiency for centralised MEV / MVHR units. Added
`MvhrRecord` / `MvhrDataPoint`, `parse_centralised_mv_row` /
`parse_table_323`, the ETL step, the committed jsonl, and the
`mvhr_record(pcdb_id)` runtime lookup (mirrors Table 322).

SAP 10.2 §2.6.4/§2.6.6: "MVHR ... SFP is a single value depending on the
number of wet rooms" — each test group's leading field is the wet-room
count; callers select the group matching the dwelling lodgement.
Worksheet-proven on simulated case 49 (000565, 2 wet rooms, Vent Axia
Sentinel Kinetic B 500140 → flow 21.0, SFP 0.88, efficiency 91%).

Also decoded the MVHR heat-recovery efficiency in-use factor from Table
329 (Format 432): system_type 3 ducts-inside-envelope = 0.90 (case-49
(23c) = 91 × 0.90 = 81.9%), cross-checked against system_type 10 = 0.70
(= SAP 10.2 Table 4g default heat-recovery in-use factor). "Table 4h is
no longer used – data now stored in the PCDB" (SAP 10.2 p.176).

The outside-envelope efficiency columns + with-scheme SFP blocks are
preserved verbatim in `raw` (no fixture exercises them yet).

Note: pyright strict type gate not run locally (pyright not installed).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 19:29:17 +00:00

254 lines
10 KiB
Python

"""BRE Product Characteristics Database (PCDB) lookups.
The PCDB (pcdb10.dat) lists manufacturer-declared performance data for
heating and ventilation equipment, keyed by an integer "Index Number"
that RdSAP certs lodge in `MainHeatingDetail.main_heating_index_number`.
Where a cert references a PCDB record, SAP 10.2 Appendix D2.1 mandates
that the PCDB winter seasonal efficiency overrides the Table 4b
category default — closing most of the cert-vs-rating efficiency gap
documented in [ADR-0010 §4](../../../../../../../docs/adr/0010-sap10-calculator-spec-target-and-validation.md#4-pcdb-integration-is-promoted-from-session-c-to-a-prerequisite).
Public surface:
- `gas_oil_boiler_record(pcdb_id)`: Table 105 lookup.
- `GasOilBoilerRecord`: typed record dataclass.
- `parser.py`: per-table row parsers (Table 105 typed; raw walker for the
other 7 tables).
- `etl.py`: walks the multi-table `pcdb10.dat` source and writes one
newline-delimited JSON file per table under `domain/sap10_calculator/tables/pcdb/data/`.
Reference: BRE PCDB pcdb10.dat (April 2026 revision); SAP 10.2
specification (14-03-2025) Appendix D2.1.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Final, Optional
from domain.sap10_calculator.tables.pcdb.parser import (
DecentralisedMevRecord,
GasOilBoilerRecord,
HeatPumpRecord,
MevFanConfig,
MvhrDataPoint,
MvhrRecord,
MvInUseFactorsRecord,
parse_centralised_mv_row,
parse_decentralised_mev_row,
parse_heat_pump_row_raw,
parse_mv_in_use_factors_row,
)
__all__ = [
"DecentralisedMevRecord",
"GasOilBoilerRecord",
"HeatPumpRecord",
"MevFanConfig",
"MvhrDataPoint",
"MvhrRecord",
"MvInUseFactorsRecord",
"decentralised_mev_record",
"gas_oil_boiler_record",
"heat_pump_record",
"mv_in_use_factors_record",
"mvhr_record",
]
_PCDB_DATA_DIR: Final[Path] = Path(__file__).resolve().parent / "data"
_TABLE_105_JSONL: Final[Path] = (
_PCDB_DATA_DIR / "pcdb_table_105_gas_oil_boilers.jsonl"
)
_TABLE_322_JSONL: Final[Path] = (
_PCDB_DATA_DIR / "pcdb_table_322_decentralised_mev.jsonl"
)
_TABLE_323_JSONL: Final[Path] = (
_PCDB_DATA_DIR / "pcdb_table_323_centralised_mev_mvhr.jsonl"
)
_TABLE_329_JSONL: Final[Path] = (
_PCDB_DATA_DIR / "pcdb_table_329_mv_in_use_factors.jsonl"
)
_TABLE_362_JSONL: Final[Path] = (
_PCDB_DATA_DIR / "pcdb_table_362_heat_pumps.jsonl"
)
def _load_table_105() -> dict[int, GasOilBoilerRecord]:
"""Read the Table 105 NDJSON at import time and build a by-pcdb-id
dict. ~5MB / ~4000 rows; one-off ~50ms cost. The Python runtime
caches the dict so repeated lookups are O(1)."""
records_by_id: dict[int, GasOilBoilerRecord] = {}
with _TABLE_105_JSONL.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
record = GasOilBoilerRecord(
pcdb_id=data["pcdb_id"],
brand_name=data["brand_name"],
model_name=data["model_name"],
model_qualifier=data["model_qualifier"],
winter_efficiency_pct=data["winter_efficiency_pct"],
summer_efficiency_pct=data["summer_efficiency_pct"],
comparative_hot_water_efficiency_pct=data["comparative_hot_water_efficiency_pct"],
output_kw_max=data["output_kw_max"],
final_year_of_manufacture=data["final_year_of_manufacture"],
subsidiary_type=data.get("subsidiary_type"),
store_type=data.get("store_type"),
separate_dhw_tests=data.get("separate_dhw_tests"),
rejected_energy_proportion_r1=data.get("rejected_energy_proportion_r1"),
loss_factor_f1_kwh_per_day=data.get("loss_factor_f1_kwh_per_day"),
loss_factor_f2_kwh_per_day=data.get("loss_factor_f2_kwh_per_day"),
rejected_factor_f3_per_litre=data.get("rejected_factor_f3_per_litre"),
keep_hot_facility=data.get("keep_hot_facility"),
keep_hot_timer=data.get("keep_hot_timer"),
raw=tuple(data["raw"]),
)
records_by_id[record.pcdb_id] = record
return records_by_id
_TABLE_105_BY_ID: Final[dict[int, GasOilBoilerRecord]] = _load_table_105()
def gas_oil_boiler_record(pcdb_id: int) -> Optional[GasOilBoilerRecord]:
"""Table 105 lookup by `main_heating_index_number`. Returns None when
the cert's index number is not in Table 105 — caller falls back to
Table 4a/4b category defaults via `seasonal_efficiency(...)`."""
return _TABLE_105_BY_ID.get(pcdb_id)
def _load_table_362() -> dict[int, HeatPumpRecord]:
"""Read the Table 362 NDJSON at import time and build a by-pcdb-id
dict of typed `HeatPumpRecord`s. Each NDJSON row carries the raw
field tuple parsed once at PCDB ETL time; we decode the format-465
positions here via the same `parse_heat_pump_row_raw` helper that
the parser-layer tests pin."""
records_by_id: dict[int, HeatPumpRecord] = {}
with _TABLE_362_JSONL.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
raw = tuple(data["raw"])
record = parse_heat_pump_row_raw(raw)
records_by_id[record.pcdb_id] = record
return records_by_id
_TABLE_362_BY_ID: Final[dict[int, HeatPumpRecord]] = _load_table_362()
def heat_pump_record(pcdb_id: int) -> Optional[HeatPumpRecord]:
"""Table 362 lookup by `main_heating_index_number`. Returns None when
the cert's index number is not in Table 362 — caller falls back to a
Table 4a heat-pump category default (which in turn requires gateway
work elsewhere in the cascade)."""
return _TABLE_362_BY_ID.get(pcdb_id)
def _load_table_322() -> dict[int, DecentralisedMevRecord]:
"""Read the Table 322 NDJSON at import time and build a by-pcdb-id
dict of typed `DecentralisedMevRecord`s. Each NDJSON row carries the
raw field tuple parsed once at PCDB ETL time; we re-decode via
`parse_decentralised_mev_row` here for consistency with the Table
362 pattern (typed-on-load from raw tuple).
Returns an empty dict when the jsonl file is missing — this lets
the ETL bootstrap from scratch (the ETL re-imports this module
before the jsonl exists on first ingest). The file is committed
in-repo so production callers always observe a populated dict.
"""
records_by_id: dict[int, DecentralisedMevRecord] = {}
if not _TABLE_322_JSONL.exists():
return records_by_id
with _TABLE_322_JSONL.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
raw_fields = tuple(data["raw"])
record = parse_decentralised_mev_row(",".join(raw_fields))
records_by_id[record.pcdb_id] = record
return records_by_id
_TABLE_322_BY_ID: Final[dict[int, DecentralisedMevRecord]] = _load_table_322()
def decentralised_mev_record(pcdb_id: int) -> Optional[DecentralisedMevRecord]:
"""Table 322 lookup by `MV PCDF Reference Number` (cert lodgement
field). Returns None when the index is not in Table 322 — caller
falls back to the SAP 10.2 Table 4g default SFP (0.8 W/(litre/sec)
for MEV centralised or decentralised) per the spec's first-tier
cascade rule (§2.6.3 / Table 4g note 1)."""
return _TABLE_322_BY_ID.get(pcdb_id)
def _load_table_323() -> dict[int, MvhrRecord]:
"""Read the Table 323 NDJSON at import time and build a by-pcdb-id
dict of typed `MvhrRecord`s (centralised MEV + MVHR). Returns an
empty dict when the jsonl is missing (ETL bootstrap concession;
production callers always observe the committed file)."""
records_by_id: dict[int, MvhrRecord] = {}
if not _TABLE_323_JSONL.exists():
return records_by_id
with _TABLE_323_JSONL.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
raw_fields = tuple(data["raw"])
record = parse_centralised_mv_row(",".join(raw_fields))
records_by_id[record.pcdb_id] = record
return records_by_id
_TABLE_323_BY_ID: Final[dict[int, MvhrRecord]] = _load_table_323()
def mvhr_record(pcdb_id: int) -> Optional[MvhrRecord]:
"""Table 323 lookup by `MV PCDF Reference Number` (cert lodgement
field) for centralised MEV / MVHR systems. Returns None when the
index is not in Table 323 — caller falls back to the SAP 10.2 Table
4g default data (MVHR raw SFP 2.0, efficiency 66%)."""
return _TABLE_323_BY_ID.get(pcdb_id)
def _load_table_329() -> dict[int, MvInUseFactorsRecord]:
"""Read the Table 329 NDJSON at import time and build a by-system-
type dict of typed `MvInUseFactorsRecord`s. Returns empty when the
jsonl is missing (ETL bootstrap concession; production callers
always observe the committed file)."""
records_by_type: dict[int, MvInUseFactorsRecord] = {}
if not _TABLE_329_JSONL.exists():
return records_by_type
with _TABLE_329_JSONL.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
raw_fields = tuple(data["raw"])
record = parse_mv_in_use_factors_row(",".join(raw_fields))
records_by_type[record.system_type] = record
return records_by_type
_TABLE_329_BY_SYSTEM_TYPE: Final[dict[int, MvInUseFactorsRecord]] = _load_table_329()
def mv_in_use_factors_record(system_type: int) -> Optional[MvInUseFactorsRecord]:
"""Table 329 lookup by SAP 10.2 ventilation system type (1, 2, 3,
5, 10 per PCDF Spec §A.20). Returns None when the system_type is
not in Table 329 — caller can fall back to SAP 10.2 Table 4g
defaults (system_type=10) or skip the IUF adjustment per spec
Note: "If there is no applicable approved installation scheme the
values for with and without scheme are the same"."""
return _TABLE_329_BY_SYSTEM_TYPE.get(system_type)