mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
MVHR (24a) heat-recovery support, part 1: the PCDB data layer. PCDB Table 323 (PCDF Spec Rev 6b §A.18, Format 426; pcdb10.dat carries Format 431, header `$323,431,...`) holds the per-wet-room SFP + heat- exchanger efficiency for centralised MEV / MVHR units. Added `MvhrRecord` / `MvhrDataPoint`, `parse_centralised_mv_row` / `parse_table_323`, the ETL step, the committed jsonl, and the `mvhr_record(pcdb_id)` runtime lookup (mirrors Table 322). SAP 10.2 §2.6.4/§2.6.6: "MVHR ... SFP is a single value depending on the number of wet rooms" — each test group's leading field is the wet-room count; callers select the group matching the dwelling lodgement. Worksheet-proven on simulated case 49 (000565, 2 wet rooms, Vent Axia Sentinel Kinetic B 500140 → flow 21.0, SFP 0.88, efficiency 91%). Also decoded the MVHR heat-recovery efficiency in-use factor from Table 329 (Format 432): system_type 3 ducts-inside-envelope = 0.90 (case-49 (23c) = 91 × 0.90 = 81.9%), cross-checked against system_type 10 = 0.70 (= SAP 10.2 Table 4g default heat-recovery in-use factor). "Table 4h is no longer used – data now stored in the PCDB" (SAP 10.2 p.176). The outside-envelope efficiency columns + with-scheme SFP blocks are preserved verbatim in `raw` (no fixture exercises them yet). Note: pyright strict type gate not run locally (pyright not installed). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
148 lines
6 KiB
Python
148 lines
6 KiB
Python
"""ETL: parse BRE PCDB pcdb10.dat into per-table JSON files.
|
|
|
|
Idempotent. Re-run when BRE publishes an updated pcdb10.dat. JSON files
|
|
are committed in-repo alongside the source .dat so callers can load
|
|
without a build step. Run via `python -m domain.sap10_calculator.tables.pcdb.etl`.
|
|
|
|
Reference: BRE PCDB pcdb10.dat (April 2026 revision).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import asdict
|
|
from pathlib import Path
|
|
|
|
from domain.sap10_calculator.tables.pcdb.parser import (
|
|
DecentralisedMevRecord,
|
|
GasOilBoilerRecord,
|
|
MvhrRecord,
|
|
MvInUseFactorsRecord,
|
|
RawPcdbRecord,
|
|
parse_table_105,
|
|
parse_table_322,
|
|
parse_table_323,
|
|
parse_table_329,
|
|
parse_table_raw,
|
|
)
|
|
|
|
|
|
_TABLE_105_OUTPUT_FILENAME: str = "pcdb_table_105_gas_oil_boilers.jsonl"
|
|
_TABLE_322_OUTPUT_FILENAME: str = "pcdb_table_322_decentralised_mev.jsonl"
|
|
_TABLE_323_OUTPUT_FILENAME: str = "pcdb_table_323_centralised_mev_mvhr.jsonl"
|
|
_TABLE_329_OUTPUT_FILENAME: str = "pcdb_table_329_mv_in_use_factors.jsonl"
|
|
# Tables ingested as `RawPcdbRecord` (pcdb_id + raw) — per-field typing is
|
|
# deferred to follow-up slices when the cert-side wiring for each table
|
|
# lands.
|
|
_RAW_TABLES: dict[str, str] = {
|
|
"122": "pcdb_table_122_solid_fuel_boilers.jsonl",
|
|
"143": "pcdb_table_143_micro_cogen.jsonl",
|
|
"313": "pcdb_table_313_flue_gas_heat_recovery.jsonl",
|
|
"353": "pcdb_table_353_waste_water_heat_recovery.jsonl",
|
|
"362": "pcdb_table_362_heat_pumps.jsonl",
|
|
"391": "pcdb_table_391_high_heat_retention_storage_heaters.jsonl",
|
|
"506": "pcdb_table_506_heat_interface_units.jsonl",
|
|
}
|
|
|
|
|
|
def _gas_oil_record_to_jsonable(record: GasOilBoilerRecord) -> dict[str, object]:
|
|
"""Serialise a typed Table 105 record into a JSON-safe dict."""
|
|
serialisable = asdict(record)
|
|
serialisable["raw"] = list(record.raw)
|
|
return serialisable
|
|
|
|
|
|
def _raw_record_to_jsonable(record: RawPcdbRecord) -> dict[str, object]:
|
|
"""Serialise a generic raw PCDB record into a JSON-safe dict."""
|
|
return {"pcdb_id": record.pcdb_id, "raw": list(record.raw)}
|
|
|
|
|
|
def _write_ndjson(*, output_path: Path, records: list[dict[str, object]]) -> None:
|
|
"""Newline-delimited JSON: one record per line, no top-level array,
|
|
no indent. Diffs are line-granular when records are added/changed."""
|
|
lines = [json.dumps(record, ensure_ascii=False) for record in records]
|
|
output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
|
|
|
|
def run_etl(*, source: Path, output_dir: Path) -> None:
|
|
"""Read `source` (pcdb10.dat), parse Table 105 (typed) plus the raw
|
|
tables enumerated in `_RAW_TABLES`, and write one newline-delimited
|
|
JSON file (`.jsonl`) per table under `output_dir/`. Idempotent;
|
|
record order preserves source order for diff-friendliness."""
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
dat_text = source.read_text(encoding="latin-1")
|
|
|
|
_write_ndjson(
|
|
output_path=output_dir / _TABLE_105_OUTPUT_FILENAME,
|
|
records=[_gas_oil_record_to_jsonable(r) for r in parse_table_105(dat_text)],
|
|
)
|
|
# Table 322 (Decentralised MEV) — typed via `parse_table_322` so the
|
|
# per-fan-configuration block (config_code, flow, SFP triplets) is
|
|
# exposed for the SAP 10.2 §2.6.4 SFPav cascade. Stored as raw row +
|
|
# typed-on-load (consistent with Table 362 pattern at `__init__.py`).
|
|
_write_ndjson(
|
|
output_path=output_dir / _TABLE_322_OUTPUT_FILENAME,
|
|
records=[
|
|
_decentralised_mev_record_to_jsonable(r)
|
|
for r in parse_table_322(dat_text)
|
|
],
|
|
)
|
|
# Table 323 (Centralised MEV and MVHR) — typed via `parse_table_323`,
|
|
# exposing the per-wet-room SFP + heat-recovery-efficiency test points
|
|
# for the SAP 10.2 §2.6.4/§2.6.6 MVHR cascade. Stored as raw row +
|
|
# typed-on-load (consistent with Table 322).
|
|
_write_ndjson(
|
|
output_path=output_dir / _TABLE_323_OUTPUT_FILENAME,
|
|
records=[_mvhr_record_to_jsonable(r) for r in parse_table_323(dat_text)],
|
|
)
|
|
# Table 329 (MV In-Use Factors) — typed via `parse_table_329`,
|
|
# exposing the per-ducting-type SFP IUF multipliers for "no
|
|
# approved scheme" installations (the only variant our cohort
|
|
# exercises). Stored as raw row + typed-on-load.
|
|
_write_ndjson(
|
|
output_path=output_dir / _TABLE_329_OUTPUT_FILENAME,
|
|
records=[
|
|
_mv_in_use_factors_record_to_jsonable(r)
|
|
for r in parse_table_329(dat_text)
|
|
],
|
|
)
|
|
for table_id, filename in _RAW_TABLES.items():
|
|
_write_ndjson(
|
|
output_path=output_dir / filename,
|
|
records=[_raw_record_to_jsonable(r) for r in parse_table_raw(dat_text, table_id)],
|
|
)
|
|
|
|
|
|
def _decentralised_mev_record_to_jsonable(
|
|
record: DecentralisedMevRecord,
|
|
) -> dict[str, object]:
|
|
"""Serialise a typed Table 322 record as `{pcdb_id, raw}` — same
|
|
shape as `_raw_record_to_jsonable` so the on-disk format is
|
|
identical between raw and typed tables. The lookup re-decodes via
|
|
`parse_decentralised_mev_row` at import time."""
|
|
return {"pcdb_id": record.pcdb_id, "raw": list(record.raw)}
|
|
|
|
|
|
def _mvhr_record_to_jsonable(record: MvhrRecord) -> dict[str, object]:
|
|
"""Serialise a typed Table 323 record as `{pcdb_id, raw}` — same
|
|
shape as the other typed tables; the lookup re-decodes via
|
|
`parse_centralised_mv_row` at import time."""
|
|
return {"pcdb_id": record.pcdb_id, "raw": list(record.raw)}
|
|
|
|
|
|
def _mv_in_use_factors_record_to_jsonable(
|
|
record: MvInUseFactorsRecord,
|
|
) -> dict[str, object]:
|
|
"""Serialise a typed Table 329 record. Table 329 is keyed by
|
|
`system_type` rather than `pcdb_id`, so this dict uses `system_type`
|
|
as the primary identifier; lookup callers `mv_in_use_factors(
|
|
system_type)` resolve via the same key."""
|
|
return {"system_type": record.system_type, "raw": list(record.raw)}
|
|
|
|
|
|
if __name__ == "__main__": # pragma: no cover — manual ETL invocation
|
|
data_dir = Path(__file__).resolve().parent / "data"
|
|
run_etl(
|
|
source=data_dir / "pcdb10.dat",
|
|
output_dir=data_dir,
|
|
)
|