Model/domain/sap10_calculator/tables/pcdb/parser.py
Khalim Conn-Kowlessar 74240f8c44 Slice 102f-prep.1: PCDB Table 362 heating_duration_code field
SAP 10.2 Appendix N3.5 (PDF p.105 line 6099) — heat-pump packages
lodge a "Daily heating duration" field encoded as "24" / "16" / "9"
/ "V" (Variable). Footnote 48 (PDF p.105): "Daily heating durations
of 24, 16 and 9 hours are retained for legacy purposes" — modern
records always lodge "V".

Format-465 position 48 holds the code; cohort ground truth: "V" on
Mitsubishi PUZ-WM50VHA (104568) and Daikin EDLQ05CAV3 (102421).
The field drives Appendix N3.5 + Table N4/N5 day allocation for the
extended-heating MIT cascade (slice 102f-prep.2 onward).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 16:28:46 +00:00

397 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Per-table row parsers for BRE PCDB pcdb10.dat records.
Each PCDB table has its own CSV-shaped record format documented by BRE
(format codes in `$<table>,<format>,...` headers of pcdb10.dat). Field
positions are reverse-engineered from sample records and cross-checked
against ground-truth records published at https://www.ncm-pcdb.org.uk.
The parsers expose two layers per record:
- Typed high-confidence fields (pcdb_id, manufacturer, model, winter/
summer efficiency, etc.) named per BRE's web entry vocabulary.
- The full raw row as a tuple of strings, for forensics on undecoded
fields and audit trails when BRE bumps the format version.
Reference: BRE PCDB pcdb10.dat April 2026; user-verified web records.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Final, Optional
def _parse_optional_float(value: str) -> Optional[float]:
"""Empty PCDB fields are blank strings, not 'null'. Treat blank or
non-numeric (e.g. '>70kW' range indicator on output-power fields) as
None — the raw value is preserved on the record's `raw` tuple."""
value = value.strip()
if not value:
return None
try:
return float(value)
except ValueError:
return None
def _parse_optional_int(value: str) -> Optional[int]:
"""Some PCDB fields carry status strings ('obsolete', 'discontinued')
where a year would otherwise live. Treat any non-numeric value as
missing rather than erroring — the status is preserved on `raw`."""
value = value.strip()
if not value:
return None
try:
return int(value)
except ValueError:
return None
@dataclass(frozen=True)
class GasOilBoilerRecord:
"""SAP 10.2 Appendix D2.1 PCDB record — Table 105 (Gas and Oil Boilers).
Field positions verified against the ncm-pcdb.org.uk web entry for
pcdb_id 000098 (Baxi Heating Wm 20/3rs): winter eff = 66.0%, summer
eff = 56.0%, comparative HW = 40.8%, output 5.86 kW, final-year 1990.
"""
pcdb_id: int
brand_name: str
model_name: str
model_qualifier: str
winter_efficiency_pct: Optional[float]
summer_efficiency_pct: Optional[float]
comparative_hot_water_efficiency_pct: Optional[float]
output_kw_max: Optional[float]
final_year_of_manufacture: Optional[int]
# SAP10.2 Appendix J Table 3b/3c — combi-loss fields per BRE PCDF Spec
# Rev 6b (12 May 2021), Gas and Oil Boiler Table, fields 48 / 51 / 52
# / 56 / 57 (see `domain/sap10_calculator/docs/specs/PCDF_Spec_Rev-06b_12_May_2021.pdf`
# pp. 14-15). Populated only for boilers EN 13203-2 / OPS 26 tested;
# SAP-default boilers leave them all blank → `separate_dhw_tests=0`
# and (61)m falls back to Table 3a. Field 48 encodes the test
# schedules: 0=none, 1=schedule 2 only (profile M → Table 3b row 1),
# 2=schedules 2 and 3 (profiles M+L → Table 3c), 3=schedules 2 and 1
# (profiles M+S → Table 3c). Field 55 (r2) is lodged but explicitly
# excluded from SAP assessments ("only r1") so it is not surfaced.
# PCDF Spec Rev 6b field 16 (0-idx 15): 0=normal, 1=integral FGHRS,
# 2=combined HP+boiler, 3=combined HP+boiler+FGHRS. Gates the Table
# 3b/3c row selection — only `subsidiary_type=0` exercises the
# "Instantaneous with non-storage FGHRS or without FGHRS" row 1.
subsidiary_type: Optional[int]
# PCDF Spec Rev 6b field 39 (0-idx 38): 0=not storage combi, 1=primary
# water store, 2=secondary store, 3=CPSU. Gates storage-combi rows in
# Table 3b/3c (deferred until a fixture exercises).
store_type: Optional[int]
separate_dhw_tests: Optional[int]
rejected_energy_proportion_r1: Optional[float]
loss_factor_f1_kwh_per_day: Optional[float]
loss_factor_f2_kwh_per_day: Optional[float]
rejected_factor_f3_per_litre: Optional[float]
raw: tuple[str, ...]
_TABLE_HEADER_PREFIX: str = "$"
_COMMENT_PREFIX: str = "#"
_TABLE_105_HEADER_ID: str = "105"
def _walk_table_records(dat_text: str, table_id: str) -> list[str]:
"""Yield record rows inside the named PCDB table section.
The .dat file demarcates each table with a `$<id>,<format>,...` header
on its own line. Records run from that header until the next `$<id>`
header or end-of-input. `#`-prefixed lines are comments; blank lines
are skipped too.
"""
inside_target_table = False
rows: list[str] = []
for raw_line in dat_text.splitlines():
line = raw_line.rstrip("\r")
stripped = line.strip()
if not stripped or stripped.startswith(_COMMENT_PREFIX):
continue
if stripped.startswith(_TABLE_HEADER_PREFIX):
inside_target_table = stripped[1:].split(",", 1)[0] == table_id
continue
if inside_target_table:
rows.append(line)
return rows
@dataclass(frozen=True)
class RawPcdbRecord:
"""Untyped PCDB record — pcdb_id keyed lookup + raw row for future
per-table typed refinement. Used for tables (122/143/362/391/313/353/
506) where field positions have not yet been ground-truth verified."""
pcdb_id: int
raw: tuple[str, ...]
@dataclass(frozen=True)
class PsrEfficiencyGroup:
"""One PSR-dependent group from a Table 362 heat-pump record.
Format 465 stores each group as 9 raw fields; the three populated
positions are tabulated here for SAP 10.2 Appendix N interpolation:
psr plant size ratio (decimal, e.g. 0.2, 0.5, 1.0)
eta_space_1_pct space heating thermal efficiency (% gross)
— used by N3.6: (206) = 0.95 × eta_space_1
eta_water_3_pct calculated water heating thermal efficiency
(% gross) for HPs providing both space + water
— used by N3.7(a) + footnote 49: (217) =
in_use_factor × eta_water_3 (in_use_factor per
N3.7 table — 0.95 or 0.60 depending on whether
the cert's cylinder meets the PCDB-lodged
criteria of volume / HX area / heat loss).
"""
psr: float
eta_space_1_pct: float
eta_water_3_pct: float
@dataclass(frozen=True)
class HeatPumpRecord:
"""SAP 10.2 Appendix N PCDB record — Table 362 (Heat Pumps).
Format 465 of pcdb10.dat (April 2026 revision) extends the published
PCDF Spec Rev 6b §A.23 format 464 with additional header fields and
a larger PSR-group set (up to 14 groups). Field positions are
reverse-engineered against the BRE web entry at
https://www.ncm-pcdb.org.uk/sap/pcdbdetails.jsp?type=362&id=<pcdb_id>;
Mitsubishi PUZ-WM50VHA (104568) and Daikin EDLQ05CAV3 (102421)
provide the cohort ground-truth.
Encoded fields per format 464 §A.23 docs (vocabulary preserved):
fuel 39 = electricity (Note: SAP 10.2 spec line 5901
allows non-electric heat pumps too)
service_provision 1 = space + water heating all year
2 = space + water during heating season only
3 = space heating only
4 = water heating only
hw_vessel_mode 1 = integral vessel
2 = separate and specified vessel (fields 19-21)
3 = separate but unspecified vessel
4 = none (service provision code 3)
vessel_volume_l, vessel_heat_loss_kwh_per_day,
vessel_heat_exchanger_area_m2: per spec §A.23 field 19/20/21 —
only populated when `hw_vessel_mode in {1, 2}`.
`max_output_kw` (spec §A.23 field 30) is the PSR-denominator per
PDF p.100 line 5946 ("maximum nominal output of the package").
`heating_duration_code` (format-465 position 48) encodes the
package's daily heating duration per SAP 10.2 Appendix N3.5 (PDF
p.105 line 6099): "24", "16", "9", or "V" (Variable). Drives the
extended-heating-schedule day allocation via Table N4/N5. Per
footnote 48, modern records always lodge "V"; the fixed durations
are retained for legacy purposes.
`psr_groups` carries the PSR-dependent efficiency table (up to 14
rows) used by SAP 10.2 Appendix N3.6 (space heating) and N3.7(a)
(water heating), interpolated at the dwelling's PSR per spec PDF
p.100 line 5957.
"""
pcdb_id: int
brand_name: str
model_name: str
model_qualifier: str
fuel: Optional[int]
service_provision: Optional[int]
hw_vessel_mode: Optional[int]
vessel_volume_l: Optional[float]
vessel_heat_loss_kwh_per_day: Optional[float]
vessel_heat_exchanger_area_m2: Optional[float]
max_output_kw: Optional[float]
heating_duration_code: Optional[str]
psr_groups: tuple[PsrEfficiencyGroup, ...]
raw: tuple[str, ...]
# Format 465 field offsets in the raw row (0-indexed). Derived by
# cross-referencing pcdb10.dat record 104568 (Mitsubishi Ecodan 5.0 kW)
# with the BRE web entry's labelled values.
_HP_IDX_BRAND_NAME: Final[int] = 6
_HP_IDX_MODEL_NAME: Final[int] = 7
_HP_IDX_MODEL_QUALIFIER: Final[int] = 8
_HP_IDX_FUEL: Final[int] = 16
_HP_IDX_SERVICE_PROVISION: Final[int] = 22
_HP_IDX_HW_VESSEL_MODE: Final[int] = 23
_HP_IDX_VESSEL_VOLUME_L: Final[int] = 24
_HP_IDX_VESSEL_HEAT_LOSS_KWH_PER_DAY: Final[int] = 25
_HP_IDX_VESSEL_HEAT_EXCHANGER_AREA_M2: Final[int] = 26
_HP_IDX_MAX_OUTPUT_KW: Final[int] = 47
# Format 465 position 48 — daily heating duration code per SAP 10.2
# Appendix N3.5 (PDF p.105 line 6099). Cohort ground-truth: "V" lodged
# on Mitsubishi PUZ-WM50VHA (104568) and Daikin EDLQ05CAV3 (102421).
_HP_IDX_HEATING_DURATION_CODE: Final[int] = 48
# Format 465 PSR-group block: idx[58] is the group count; groups start
# at idx[59], 9 fields wide, with PSR / η_space,1 / η_water,3 at the
# offsets below within each group.
_HP_IDX_NUM_PSR_GROUPS: Final[int] = 58
_HP_PSR_GROUP_START: Final[int] = 59
_HP_PSR_GROUP_STRIDE: Final[int] = 9
_HP_PSR_GROUP_OFFSET_PSR: Final[int] = 0
_HP_PSR_GROUP_OFFSET_ETA_SPACE_1: Final[int] = 2
_HP_PSR_GROUP_OFFSET_ETA_WATER_3: Final[int] = 6
def _parse_psr_groups(raw: tuple[str, ...]) -> tuple[PsrEfficiencyGroup, ...]:
"""Decode the variable-length PSR-dependent block of a format-465
heat-pump record. The count comes from `idx[58]`; each subsequent
group spans 9 raw fields with PSR / η_space,1 / η_water,3 at
offsets 0 / 2 / 6 within the group.
"""
if _HP_IDX_NUM_PSR_GROUPS >= len(raw):
return ()
count = _parse_optional_int(raw[_HP_IDX_NUM_PSR_GROUPS])
if count is None or count <= 0:
return ()
groups: list[PsrEfficiencyGroup] = []
for group_idx in range(count):
base = _HP_PSR_GROUP_START + group_idx * _HP_PSR_GROUP_STRIDE
if base + _HP_PSR_GROUP_OFFSET_ETA_WATER_3 >= len(raw):
break
psr = _parse_optional_float(raw[base + _HP_PSR_GROUP_OFFSET_PSR])
eta_space_1 = _parse_optional_float(
raw[base + _HP_PSR_GROUP_OFFSET_ETA_SPACE_1]
)
eta_water_3 = _parse_optional_float(
raw[base + _HP_PSR_GROUP_OFFSET_ETA_WATER_3]
)
if psr is None or eta_space_1 is None or eta_water_3 is None:
continue
groups.append(
PsrEfficiencyGroup(
psr=psr,
eta_space_1_pct=eta_space_1,
eta_water_3_pct=eta_water_3,
)
)
return tuple(groups)
def interpolate_heat_pump_efficiency_at_psr(
psr_groups: tuple[PsrEfficiencyGroup, ...],
*,
target_psr: float,
) -> tuple[float, float]:
"""SAP 10.2 PDF p.100 line 5957 — linear interpolation between the
two PSR rows enclosing `target_psr`. Returns `(eta_space_1_pct,
eta_water_3_pct)` at the dwelling's PSR.
Per spec PDF p.101 lines 6007-6008: clamp to the smallest PSR
in the record when `target_psr` is below it, and to the largest
when above ("if the PSR is greater than the largest PSR in the
database record then the heat pump space and water heating
fractions for the largest PSR should be used, and if the PSR is
less than the smallest PSR in the database record then the heat
pump space and water heating fractions for the smallest PSR
should be used").
"""
if not psr_groups:
raise ValueError("PSR groups required for interpolation")
if target_psr <= psr_groups[0].psr:
first = psr_groups[0]
return (first.eta_space_1_pct, first.eta_water_3_pct)
if target_psr >= psr_groups[-1].psr:
last = psr_groups[-1]
return (last.eta_space_1_pct, last.eta_water_3_pct)
for low_group, high_group in zip(psr_groups, psr_groups[1:]):
if low_group.psr <= target_psr <= high_group.psr:
span = high_group.psr - low_group.psr
t = (target_psr - low_group.psr) / span if span > 0 else 0.0
eta_space_1 = (
low_group.eta_space_1_pct
+ (high_group.eta_space_1_pct - low_group.eta_space_1_pct) * t
)
eta_water_3 = (
low_group.eta_water_3_pct
+ (high_group.eta_water_3_pct - low_group.eta_water_3_pct) * t
)
return (eta_space_1, eta_water_3)
# Unreachable: target_psr is between min and max so a bracket exists.
raise AssertionError("PSR bracket not found despite range check")
def parse_heat_pump_row_raw(raw: tuple[str, ...]) -> HeatPumpRecord:
"""Decode a Table 362 format-465 raw row into a typed `HeatPumpRecord`.
Tolerates missing trailing fields (older partially-populated records)
by reading via index helpers that return None for short rows.
"""
def at(idx: int) -> str:
return raw[idx] if idx < len(raw) else ""
duration_raw = at(_HP_IDX_HEATING_DURATION_CODE).strip()
return HeatPumpRecord(
pcdb_id=int(raw[0]),
brand_name=at(_HP_IDX_BRAND_NAME),
model_name=at(_HP_IDX_MODEL_NAME),
model_qualifier=at(_HP_IDX_MODEL_QUALIFIER),
fuel=_parse_optional_int(at(_HP_IDX_FUEL)),
service_provision=_parse_optional_int(at(_HP_IDX_SERVICE_PROVISION)),
hw_vessel_mode=_parse_optional_int(at(_HP_IDX_HW_VESSEL_MODE)),
vessel_volume_l=_parse_optional_float(at(_HP_IDX_VESSEL_VOLUME_L)),
vessel_heat_loss_kwh_per_day=_parse_optional_float(
at(_HP_IDX_VESSEL_HEAT_LOSS_KWH_PER_DAY)
),
vessel_heat_exchanger_area_m2=_parse_optional_float(
at(_HP_IDX_VESSEL_HEAT_EXCHANGER_AREA_M2)
),
max_output_kw=_parse_optional_float(at(_HP_IDX_MAX_OUTPUT_KW)),
heating_duration_code=duration_raw if duration_raw else None,
psr_groups=_parse_psr_groups(raw),
raw=raw,
)
def parse_table_raw(dat_text: str, table_id: str) -> list[RawPcdbRecord]:
"""Generic positional walker: extract pcdb_id + raw row for any PCDB
table, no per-field decoding. Future typed parsers (e.g. Table 362
heat pumps) refine specific fields without changing this contract.
"""
rows = _walk_table_records(dat_text, table_id)
return [
RawPcdbRecord(pcdb_id=int(fields[0]), raw=fields)
for row in rows
for fields in (tuple(row.split(",")),)
]
def parse_table_105(dat_text: str) -> list[GasOilBoilerRecord]:
"""Walk a PCDB dat string, yielding parsed Table 105 (Gas and Oil
Boilers) records via `parse_table_105_row`."""
return [parse_table_105_row(row) for row in _walk_table_records(dat_text, _TABLE_105_HEADER_ID)]
def parse_table_105_row(row: str) -> GasOilBoilerRecord:
"""Decode one Table 105 (Gas and Oil Boilers) record row into a typed
record. Field positions (1-indexed): 1 pcdb_id, 6 brand_name,
7 model_name, 8 model_qualifier, 11 final_year, 23 output_kw_max,
26 winter_efficiency_pct, 27 summer_efficiency_pct, 29 comparative
hot water efficiency. Trailing fields preserved verbatim in `raw`."""
fields = tuple(row.rstrip("\r\n").split(","))
return GasOilBoilerRecord(
pcdb_id=int(fields[0]),
brand_name=fields[5],
model_name=fields[6],
model_qualifier=fields[7],
final_year_of_manufacture=_parse_optional_int(fields[10]),
output_kw_max=_parse_optional_float(fields[22]),
winter_efficiency_pct=_parse_optional_float(fields[25]),
summer_efficiency_pct=_parse_optional_float(fields[26]),
comparative_hot_water_efficiency_pct=_parse_optional_float(fields[28]),
subsidiary_type=_parse_optional_int(fields[15]),
store_type=_parse_optional_int(fields[38]),
separate_dhw_tests=_parse_optional_int(fields[47]),
rejected_energy_proportion_r1=_parse_optional_float(fields[50]),
loss_factor_f1_kwh_per_day=_parse_optional_float(fields[51]),
loss_factor_f2_kwh_per_day=_parse_optional_float(fields[55]),
rejected_factor_f3_per_litre=_parse_optional_float(fields[56]),
raw=fields,
)