mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
The calculator tests lived under domain/sap10_calculator/{tests,worksheet/
tests,rdsap/tests,climate/tests,validation/tests}, none of which are in
pytest.ini testpaths — so CI (which collects tests/) never ran them. Relocate
all five dirs to tests/domain/sap10_calculator/{,worksheet,rdsap,climate,
validation}, mirroring the tests/domain/property_baseline/ convention, so the
cascade-pin / golden / e2e conformance suites run in CI.
Mechanics:
- git mv preserves history (110 files).
- Flattening the trailing /tests keeps each file's depth-to-repo-root
identical, so all 16 repo-root parents[4] fixture refs stay valid. Only
test_pcdb_etl.py's parents[1] (→ pcdb data) and one hardcoded absolute
golden-fixture path in test_cert_to_inputs.py needed rebasing.
- Cross-imports rewritten domain.sap10_calculator.worksheet.tests →
tests.domain.sap10_calculator.worksheet (21 files incl. the external
importer backend/documents_parser/tests/test_summary_pdf_mapper_chain.py).
- Golden-fixture path strings in test_summary_pdf_mapper_chain.py +
scripts/fetch_cohort2_api_jsons.py updated to the new location (the JSONs
moved with the rdsap tests).
load_cells / gitignored worksheet xlsx: the xlsx-pinned tests (test_dimensions
/ ventilation / water_heating) read 2026-05-19-17-18 RdSap10Worksheet.xlsx,
which is gitignored (.gitignore `*.xlsx`) and so absent in CI. _xlsx_loader.
load_cells now pytest.skip()s when the file is absent, so those tests run
locally and skip cleanly in CI instead of erroring — no new CI failures from
the move, and the gitignore policy is respected.
Verified: tests/domain/sap10_calculator + backend/documents_parser +
tests/domain/property_baseline = 2248 pass, 1 skipped; pyright resolves the
new import paths with zero import-resolution errors.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
327 lines
12 KiB
Python
327 lines
12 KiB
Python
"""Tests for the BRE PCDB (pcdb10.dat) ETL parser.
|
||
|
||
The PCDB is a multi-table comma-separated data file published by BRE.
|
||
Each table has its own format (`$<table_id>,<format>,...`) and its own
|
||
field schema. This module verifies that the per-table parsers decode
|
||
records into typed dicts matching ground-truth records the user
|
||
verified against https://www.ncm-pcdb.org.uk.
|
||
|
||
Reference: BRE Product Characteristics Database — pcdb10.dat (April 2026).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
from domain.sap10_calculator.tables.pcdb.etl import run_etl
|
||
from domain.sap10_calculator.tables.pcdb.parser import (
|
||
parse_table_105,
|
||
parse_table_105_row,
|
||
parse_table_raw,
|
||
)
|
||
|
||
|
||
_PCDB_DAT_PATH: Path = (
|
||
Path(__file__).resolve().parents[3]
|
||
/ "domain" / "sap10_calculator" / "tables" / "pcdb" / "data" / "pcdb10.dat"
|
||
)
|
||
|
||
|
||
# Verified by user against ncm-pcdb.org.uk: Baxi Heating Wm 20/3rs.
|
||
_BAXI_98_RAW: str = (
|
||
"000098,000005,0,2010/Sep/13 17:03,Baxi Heating,Baxi Heating,Wm,20/3rs,"
|
||
"4107739,,1990,1,0,0,1,0,,,1,2,1,5.86,5.86,,,66.0,56.0,,40.8,,3,,,0,2,0,"
|
||
",,0,,0,,0,,,,,0,,,,,,,,,,,,,0000,,,,,,,,,,,,,,,"
|
||
)
|
||
|
||
# Verified by ground-truth arithmetic against PDF Σ(61) = 337.19 for 000474
|
||
# Elmhurst fixture (Vaillant ecoTEC pro 28 VUW GB 286/5-3, pcdb_id 16839):
|
||
# Table 3b row 1 → Σ(61) = (45) × r1 × fu + F1 × 365
|
||
# = 1680.84 × 0.0025 × 1.0 + 0.91251 × 365 = 337.27.
|
||
# Combi-loss fields (BRE PCDF Spec v1.0 §7.11 fields 48/51/52/56/57):
|
||
# separate_dhw_tests = 1 (one test, profile M → Table 3b)
|
||
# rejected_energy_proportion_r1 = 0.0025
|
||
# loss_factor_f1_kwh_per_day = 0.91251
|
||
# loss_factor_f2 / rejected_factor_f3 = blank (Table 3c not used)
|
||
_VAILLANT_16839_RAW: str = (
|
||
"016839,000031,0,2019/Mar/04 10:28,Vaillant,Vaillant,ecoTEC pro 28,"
|
||
"VUW GB 286/5-3,GC 47-044-45,2005,2015,1,2,1,2,0,,,2,2,2,24.4,24.4,,,"
|
||
"88.7,87.0,,75.1,,2,,,104,1,2,105,2,0,,,,0,,,,,1,7.012,0.133,0.0025,"
|
||
"0.91251,,,,,,1,1,,0045,,,,,,,,,89.0,98.0,,,,,96.3"
|
||
)
|
||
|
||
|
||
def test_table_105_parser_extracts_baxi_98_known_fields() -> None:
|
||
"""Decode the user-verified Baxi 000098 Wm 20/3rs record. Field positions
|
||
cross-checked against the ncm-pcdb.org.uk web entry: pcdb_id 98 = Baxi
|
||
Heating brand "Baxi Heating", model "Wm", qualifier "20/3rs", SAP winter
|
||
seasonal efficiency 66.0%, SAP summer seasonal efficiency 56.0%,
|
||
comparative hot water 40.8%, output 5.86 kW, final year 1990."""
|
||
# Arrange
|
||
raw_row = _BAXI_98_RAW
|
||
|
||
# Act
|
||
record = parse_table_105_row(raw_row)
|
||
|
||
# Assert
|
||
assert record.pcdb_id == 98
|
||
assert record.brand_name == "Baxi Heating"
|
||
assert record.model_name == "Wm"
|
||
assert record.model_qualifier == "20/3rs"
|
||
assert record.winter_efficiency_pct == 66.0
|
||
assert record.summer_efficiency_pct == 56.0
|
||
assert record.comparative_hot_water_efficiency_pct == 40.8
|
||
assert record.output_kw_max == 5.86
|
||
assert record.final_year_of_manufacture == 1990
|
||
|
||
|
||
# (raw_row, expected fields). Three additional user-verified records — same
|
||
# field positions, different manufacturers + output power + final year.
|
||
_POTTERTON_619_RAW: str = (
|
||
"000619,000034,0,2010/Sep/13 17:03,Potterton Myson,Potterton Myson,"
|
||
"Flamingo 2,cf20/30,4160516,,1986,1,0,0,1,0,,,1,1,1,8.8,8.8,,,66.0,56.0,"
|
||
",40.8,,3,,,0,2,0,,,0,,0,,0,,,,,0,,,,,,,,,,,,,0000,,,,,,,,,,,,,,,"
|
||
)
|
||
_SAUNIER_732_RAW: str = (
|
||
"000732,000035,0,2010/Sep/13 17:03,Saunier Duval,Saunier Duval,500,30c,"
|
||
"4192007,,1992,1,0,0,1,0,,,1,1,1,8.8,8.8,,,66.0,56.0,,40.8,,3,,,0,2,0,"
|
||
",,0,,0,,0,,,,,0,,,,,,,,,,,,,0000,,,,,,,,,,,,,,,"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"raw_row, expected",
|
||
[
|
||
(
|
||
_POTTERTON_619_RAW,
|
||
{
|
||
"pcdb_id": 619,
|
||
"brand_name": "Potterton Myson",
|
||
"model_name": "Flamingo 2",
|
||
"model_qualifier": "cf20/30",
|
||
"output_kw_max": 8.8,
|
||
"final_year_of_manufacture": 1986,
|
||
},
|
||
),
|
||
(
|
||
_SAUNIER_732_RAW,
|
||
{
|
||
"pcdb_id": 732,
|
||
"brand_name": "Saunier Duval",
|
||
"model_name": "500",
|
||
"model_qualifier": "30c",
|
||
"output_kw_max": 8.8,
|
||
"final_year_of_manufacture": 1992,
|
||
},
|
||
),
|
||
],
|
||
)
|
||
def test_table_105_parser_extracts_other_user_verified_records(
|
||
raw_row: str, expected: dict[str, object]
|
||
) -> None:
|
||
"""Confirms field positions hold across distinct manufacturers + output
|
||
powers + final years. All three records ship with the same 66/56/40.8
|
||
SAP-default efficiency — they're the same "estimated (ie SAP default)"
|
||
PCDB rows used to verify the parser's shape against ncm-pcdb.org.uk."""
|
||
# Arrange
|
||
# Act
|
||
record = parse_table_105_row(raw_row)
|
||
|
||
# Assert
|
||
for key, value in expected.items():
|
||
assert getattr(record, key) == value, f"field {key}"
|
||
|
||
|
||
def test_table_105_parser_extracts_separate_dhw_tests_profile_flag() -> None:
|
||
"""BRE PCDF Spec v1.0 §7.11 field 48 (0-indexed 47) "Separate DHW
|
||
tests" encodes the profile-flag for PCDB Table 3b/3c combi-loss
|
||
selection: 0 = none / not applicable, 1 = one test profile M
|
||
(Table 3b), 2 = two tests profiles M+L (Table 3c), 3 = two tests
|
||
profiles M+S (Table 3c). 16839 lodges flag=1 → Table 3b path."""
|
||
# Arrange
|
||
raw_row = _VAILLANT_16839_RAW
|
||
|
||
# Act
|
||
record = parse_table_105_row(raw_row)
|
||
|
||
# Assert
|
||
assert record.separate_dhw_tests == 1
|
||
|
||
|
||
def test_table_105_parser_extracts_table_3b_3c_combi_loss_coefficients() -> None:
|
||
"""BRE PCDF Spec v1.0 §7.11 fields 51 / 52 / 56 / 57 (0-indexed
|
||
50 / 51 / 55 / 56) carry the Table 3b/3c combi-loss coefficients:
|
||
rejected energy r1, loss factor F1 (Table 3b), loss factor F2
|
||
(Table 3c), rejected factor F3 (Table 3c, can be negative).
|
||
16839 lodges profile M only, so F2/F3 are absent (blank). Cross-
|
||
verified by arithmetic: Σ(61) = (45) × r1 × fu + F1 × 365
|
||
= 1680.84 × 0.0025 × 1.0 + 0.91251 × 365 = 337.27 kWh/yr against
|
||
the 000474 worksheet's PDF pin Σ(61) = 337.19 (Δ 0.02%)."""
|
||
# Arrange
|
||
raw_row = _VAILLANT_16839_RAW
|
||
|
||
# Act
|
||
record = parse_table_105_row(raw_row)
|
||
|
||
# Assert
|
||
assert record.rejected_energy_proportion_r1 == 0.0025
|
||
assert record.loss_factor_f1_kwh_per_day == 0.91251
|
||
assert record.loss_factor_f2_kwh_per_day is None
|
||
assert record.rejected_factor_f3_per_litre is None
|
||
|
||
|
||
def test_table_105_parser_leaves_combi_loss_fields_none_for_sap_default_boilers() -> None:
|
||
"""Baxi 000098 is a SAP-default boiler (no EN 13203-2 / OPS 26 tests),
|
||
so the Table 3b/3c combi-loss fields are blank in pcdb10.dat. The
|
||
parser exposes them as None to signal Table 3a fallback (the
|
||
pre-§4-HW default 600 kWh/yr behaviour)."""
|
||
# Arrange
|
||
raw_row = _BAXI_98_RAW
|
||
|
||
# Act
|
||
record = parse_table_105_row(raw_row)
|
||
|
||
# Assert
|
||
assert record.separate_dhw_tests == 0
|
||
assert record.rejected_energy_proportion_r1 is None
|
||
assert record.loss_factor_f1_kwh_per_day is None
|
||
assert record.loss_factor_f2_kwh_per_day is None
|
||
assert record.rejected_factor_f3_per_litre is None
|
||
|
||
|
||
def test_parse_table_105_walks_section_skipping_headers_and_comments() -> None:
|
||
"""The .dat file demarcates each table with a `$<id>,<format>,...`
|
||
header line, intersperses `#`-prefixed comments, and ends the table
|
||
with a `# ... end of Table <id>` marker before the next section. The
|
||
walker yields parsed records only for rows inside the Table 105
|
||
section, ignoring comments, headers, and rows from other tables."""
|
||
# Arrange
|
||
dat_section = (
|
||
"# noise before\n"
|
||
"$105,211,2,2025,11,28,2\n"
|
||
"# Table 105 (Gas and Oil Boilers) follows ...\n"
|
||
"#\n"
|
||
f"{_BAXI_98_RAW}\n"
|
||
f"{_POTTERTON_619_RAW}\n"
|
||
"#\n"
|
||
"# ... end of Table 105 Format 211\n"
|
||
"#\n"
|
||
"$362,360,1,2025,11,28,1\n"
|
||
"ignored,record,from,heat,pump,table\n"
|
||
)
|
||
|
||
# Act
|
||
records = parse_table_105(dat_section)
|
||
|
||
# Assert
|
||
assert [r.pcdb_id for r in records] == [98, 619]
|
||
assert records[0].brand_name == "Baxi Heating"
|
||
assert records[1].brand_name == "Potterton Myson"
|
||
|
||
|
||
def test_parse_table_105_extracts_user_verified_records_from_real_pcdb_dat() -> None:
|
||
"""End-to-end against the real BRE pcdb10.dat (7.9 MB, ~23k lines,
|
||
CRLF endings). Cross-references all four ground-truth records the user
|
||
verified against ncm-pcdb.org.uk — surfaces any drift between the
|
||
parser's field positions and real-world data."""
|
||
# Arrange — BRE PCDB ships in latin-1 (cp1252 superset; manufacturer
|
||
# addresses occasionally carry non-ASCII characters such as the degree
|
||
# sign).
|
||
dat_text = _PCDB_DAT_PATH.read_text(encoding="latin-1")
|
||
|
||
# Act
|
||
records = parse_table_105(dat_text)
|
||
by_id = {r.pcdb_id: r for r in records}
|
||
|
||
# Assert
|
||
assert by_id[98].brand_name == "Baxi Heating"
|
||
assert by_id[98].model_name == "Wm"
|
||
assert by_id[98].model_qualifier == "20/3rs"
|
||
assert by_id[98].winter_efficiency_pct == 66.0
|
||
assert by_id[98].summer_efficiency_pct == 56.0
|
||
assert by_id[98].comparative_hot_water_efficiency_pct == 40.8
|
||
assert by_id[98].final_year_of_manufacture == 1990
|
||
assert by_id[619].brand_name == "Potterton Myson"
|
||
assert by_id[619].winter_efficiency_pct == 66.0
|
||
assert by_id[732].brand_name == "Saunier Duval"
|
||
assert by_id[732].winter_efficiency_pct == 66.0
|
||
|
||
|
||
def test_run_etl_writes_table_105_jsonl_with_decoded_and_raw_fields(tmp_path: Path) -> None:
|
||
"""End-to-end ETL: read the real pcdb10.dat, parse Table 105, write a
|
||
newline-delimited JSON file (`.jsonl`). Each line is one record; reader
|
||
parses line-by-line. Verifies the decoded fields and that the raw row
|
||
is preserved alongside."""
|
||
# Arrange
|
||
import json
|
||
|
||
output_dir = tmp_path / "pcdb_json"
|
||
|
||
# Act
|
||
run_etl(source=_PCDB_DAT_PATH, output_dir=output_dir)
|
||
|
||
# Assert
|
||
table_105_jsonl = output_dir / "pcdb_table_105_gas_oil_boilers.jsonl"
|
||
assert table_105_jsonl.exists()
|
||
records = [
|
||
json.loads(line)
|
||
for line in table_105_jsonl.read_text().splitlines()
|
||
if line
|
||
]
|
||
by_id = {r["pcdb_id"]: r for r in records}
|
||
assert by_id[98]["brand_name"] == "Baxi Heating"
|
||
assert by_id[98]["winter_efficiency_pct"] == 66.0
|
||
assert by_id[98]["summer_efficiency_pct"] == 56.0
|
||
assert by_id[98]["raw"][0] == "000098" # raw[0] = pcdb_id (left-padded)
|
||
|
||
|
||
def test_parse_table_raw_extracts_heat_pump_records_from_real_pcdb_dat() -> None:
|
||
"""Generic positional walker against Table 362 (Heat Pumps). Per-field
|
||
typing is deferred to a future slice once heat-pump records are ground-
|
||
truth verified; for now the parser only commits to pcdb_id + raw row.
|
||
Asserts the walker handles a table other than 105 and produces non-
|
||
empty output with the expected shape."""
|
||
# Arrange
|
||
dat_text = _PCDB_DAT_PATH.read_text(encoding="latin-1")
|
||
|
||
# Act
|
||
records = parse_table_raw(dat_text, table_id="362")
|
||
|
||
# Assert
|
||
assert len(records) > 0
|
||
first = records[0]
|
||
assert isinstance(first.pcdb_id, int)
|
||
assert first.pcdb_id > 0
|
||
assert first.raw[0].lstrip("0") == str(first.pcdb_id) or first.raw[0] == "000000"
|
||
assert len(first.raw) > 1 # multi-field row
|
||
|
||
|
||
def test_run_etl_writes_all_pcdb_table_jsonl_files(tmp_path: Path) -> None:
|
||
"""Per the user-chosen scope-D ingestion: ETL produces JSONL for
|
||
every PCDB table of interest (105 typed; 322 typed via
|
||
`parse_table_322`; 122/143/313/353/362/391/506 as untyped pcdb_id
|
||
+ raw). Per-table typed refinement is the job of follow-up slices
|
||
when their cert-side wiring lands."""
|
||
# Arrange
|
||
expected_filenames = {
|
||
"pcdb_table_105_gas_oil_boilers.jsonl",
|
||
"pcdb_table_122_solid_fuel_boilers.jsonl",
|
||
"pcdb_table_143_micro_cogen.jsonl",
|
||
"pcdb_table_313_flue_gas_heat_recovery.jsonl",
|
||
"pcdb_table_322_decentralised_mev.jsonl",
|
||
"pcdb_table_329_mv_in_use_factors.jsonl",
|
||
"pcdb_table_353_waste_water_heat_recovery.jsonl",
|
||
"pcdb_table_362_heat_pumps.jsonl",
|
||
"pcdb_table_391_high_heat_retention_storage_heaters.jsonl",
|
||
"pcdb_table_506_heat_interface_units.jsonl",
|
||
}
|
||
output_dir = tmp_path / "pcdb_json"
|
||
|
||
# Act
|
||
run_etl(source=_PCDB_DAT_PATH, output_dir=output_dir)
|
||
|
||
# Assert
|
||
written = {p.name for p in output_dir.iterdir()}
|
||
assert expected_filenames.issubset(written)
|