"""BRE Product Characteristics Database (PCDB) lookups. The PCDB (pcdb10.dat) lists manufacturer-declared performance data for heating and ventilation equipment, keyed by an integer "Index Number" that RdSAP certs lodge in `MainHeatingDetail.main_heating_index_number`. Where a cert references a PCDB record, SAP 10.2 Appendix D2.1 mandates that the PCDB winter seasonal efficiency overrides the Table 4b category default — closing most of the cert-vs-rating efficiency gap documented in [ADR-0010 §4](../../../../../../../docs/adr/0010-sap10-calculator-spec-target-and-validation.md#4-pcdb-integration-is-promoted-from-session-c-to-a-prerequisite). Public surface: - `gas_oil_boiler_record(pcdb_id)`: Table 105 lookup. - `GasOilBoilerRecord`: typed record dataclass. - `parser.py`: per-table row parsers (Table 105 typed; raw walker for the other 7 tables). - `etl.py`: walks the multi-table `pcdb10.dat` source and writes one newline-delimited JSON file per table under `domain/sap10_calculator/tables/pcdb/data/`. Reference: BRE PCDB pcdb10.dat (April 2026 revision); SAP 10.2 specification (14-03-2025) Appendix D2.1. """ from __future__ import annotations import json from pathlib import Path from typing import Final, Optional from domain.sap10_calculator.tables.pcdb.parser import ( DecentralisedMevRecord, GasOilBoilerRecord, HeatPumpRecord, MevFanConfig, MvInUseFactorsRecord, parse_decentralised_mev_row, parse_heat_pump_row_raw, parse_mv_in_use_factors_row, ) __all__ = [ "DecentralisedMevRecord", "GasOilBoilerRecord", "HeatPumpRecord", "MevFanConfig", "MvInUseFactorsRecord", "decentralised_mev_record", "gas_oil_boiler_record", "heat_pump_record", "mv_in_use_factors_record", ] _PCDB_DATA_DIR: Final[Path] = Path(__file__).resolve().parent / "data" _TABLE_105_JSONL: Final[Path] = ( _PCDB_DATA_DIR / "pcdb_table_105_gas_oil_boilers.jsonl" ) _TABLE_322_JSONL: Final[Path] = ( _PCDB_DATA_DIR / "pcdb_table_322_decentralised_mev.jsonl" ) _TABLE_329_JSONL: Final[Path] = ( _PCDB_DATA_DIR / "pcdb_table_329_mv_in_use_factors.jsonl" ) _TABLE_362_JSONL: Final[Path] = ( _PCDB_DATA_DIR / "pcdb_table_362_heat_pumps.jsonl" ) def _load_table_105() -> dict[int, GasOilBoilerRecord]: """Read the Table 105 NDJSON at import time and build a by-pcdb-id dict. ~5MB / ~4000 rows; one-off ~50ms cost. The Python runtime caches the dict so repeated lookups are O(1).""" records_by_id: dict[int, GasOilBoilerRecord] = {} with _TABLE_105_JSONL.open(encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue data = json.loads(line) record = GasOilBoilerRecord( pcdb_id=data["pcdb_id"], brand_name=data["brand_name"], model_name=data["model_name"], model_qualifier=data["model_qualifier"], winter_efficiency_pct=data["winter_efficiency_pct"], summer_efficiency_pct=data["summer_efficiency_pct"], comparative_hot_water_efficiency_pct=data["comparative_hot_water_efficiency_pct"], output_kw_max=data["output_kw_max"], final_year_of_manufacture=data["final_year_of_manufacture"], subsidiary_type=data.get("subsidiary_type"), store_type=data.get("store_type"), separate_dhw_tests=data.get("separate_dhw_tests"), rejected_energy_proportion_r1=data.get("rejected_energy_proportion_r1"), loss_factor_f1_kwh_per_day=data.get("loss_factor_f1_kwh_per_day"), loss_factor_f2_kwh_per_day=data.get("loss_factor_f2_kwh_per_day"), rejected_factor_f3_per_litre=data.get("rejected_factor_f3_per_litre"), keep_hot_facility=data.get("keep_hot_facility"), keep_hot_timer=data.get("keep_hot_timer"), raw=tuple(data["raw"]), ) records_by_id[record.pcdb_id] = record return records_by_id _TABLE_105_BY_ID: Final[dict[int, GasOilBoilerRecord]] = _load_table_105() def gas_oil_boiler_record(pcdb_id: int) -> Optional[GasOilBoilerRecord]: """Table 105 lookup by `main_heating_index_number`. Returns None when the cert's index number is not in Table 105 — caller falls back to Table 4a/4b category defaults via `seasonal_efficiency(...)`.""" return _TABLE_105_BY_ID.get(pcdb_id) def _load_table_362() -> dict[int, HeatPumpRecord]: """Read the Table 362 NDJSON at import time and build a by-pcdb-id dict of typed `HeatPumpRecord`s. Each NDJSON row carries the raw field tuple parsed once at PCDB ETL time; we decode the format-465 positions here via the same `parse_heat_pump_row_raw` helper that the parser-layer tests pin.""" records_by_id: dict[int, HeatPumpRecord] = {} with _TABLE_362_JSONL.open(encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue data = json.loads(line) raw = tuple(data["raw"]) record = parse_heat_pump_row_raw(raw) records_by_id[record.pcdb_id] = record return records_by_id _TABLE_362_BY_ID: Final[dict[int, HeatPumpRecord]] = _load_table_362() def heat_pump_record(pcdb_id: int) -> Optional[HeatPumpRecord]: """Table 362 lookup by `main_heating_index_number`. Returns None when the cert's index number is not in Table 362 — caller falls back to a Table 4a heat-pump category default (which in turn requires gateway work elsewhere in the cascade).""" return _TABLE_362_BY_ID.get(pcdb_id) def _load_table_322() -> dict[int, DecentralisedMevRecord]: """Read the Table 322 NDJSON at import time and build a by-pcdb-id dict of typed `DecentralisedMevRecord`s. Each NDJSON row carries the raw field tuple parsed once at PCDB ETL time; we re-decode via `parse_decentralised_mev_row` here for consistency with the Table 362 pattern (typed-on-load from raw tuple). Returns an empty dict when the jsonl file is missing — this lets the ETL bootstrap from scratch (the ETL re-imports this module before the jsonl exists on first ingest). The file is committed in-repo so production callers always observe a populated dict. """ records_by_id: dict[int, DecentralisedMevRecord] = {} if not _TABLE_322_JSONL.exists(): return records_by_id with _TABLE_322_JSONL.open(encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue data = json.loads(line) raw_fields = tuple(data["raw"]) record = parse_decentralised_mev_row(",".join(raw_fields)) records_by_id[record.pcdb_id] = record return records_by_id _TABLE_322_BY_ID: Final[dict[int, DecentralisedMevRecord]] = _load_table_322() def decentralised_mev_record(pcdb_id: int) -> Optional[DecentralisedMevRecord]: """Table 322 lookup by `MV PCDF Reference Number` (cert lodgement field). Returns None when the index is not in Table 322 — caller falls back to the SAP 10.2 Table 4g default SFP (0.8 W/(litre/sec) for MEV centralised or decentralised) per the spec's first-tier cascade rule (§2.6.3 / Table 4g note 1).""" return _TABLE_322_BY_ID.get(pcdb_id) def _load_table_329() -> dict[int, MvInUseFactorsRecord]: """Read the Table 329 NDJSON at import time and build a by-system- type dict of typed `MvInUseFactorsRecord`s. Returns empty when the jsonl is missing (ETL bootstrap concession; production callers always observe the committed file).""" records_by_type: dict[int, MvInUseFactorsRecord] = {} if not _TABLE_329_JSONL.exists(): return records_by_type with _TABLE_329_JSONL.open(encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue data = json.loads(line) raw_fields = tuple(data["raw"]) record = parse_mv_in_use_factors_row(",".join(raw_fields)) records_by_type[record.system_type] = record return records_by_type _TABLE_329_BY_SYSTEM_TYPE: Final[dict[int, MvInUseFactorsRecord]] = _load_table_329() def mv_in_use_factors_record(system_type: int) -> Optional[MvInUseFactorsRecord]: """Table 329 lookup by SAP 10.2 ventilation system type (1, 2, 3, 5, 10 per PCDF Spec §A.20). Returns None when the system_type is not in Table 329 — caller can fall back to SAP 10.2 Table 4g defaults (system_type=10) or skip the IUF adjustment per spec Note: "If there is no applicable approved installation scheme the values for with and without scheme are the same".""" return _TABLE_329_BY_SYSTEM_TYPE.get(system_type)