mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
The bulk endpoint 302-redirects to a 15.7 GB S3 ZIP with one NDJSON member per year; each line wraps the per-cert payload in a stringified 'document' that parses to the same RdSAP-Schema-21.0.1 shape from_api_response already handles. parse_bulk_line unwraps a record; is_sap_version filters to SAP 10.2; RangeFile exposes the S3 object as a seekable file so zipfile streams a single year's member (and a sampler stops early) without downloading the whole archive. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
43 lines
1.6 KiB
Python
43 lines
1.6 KiB
Python
"""Parse records from the gov EPC bulk export (NDJSON, stringified `document`)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
from harness.epc_bulk import is_sap_version, parse_bulk_line
|
|
|
|
|
|
def test_parse_bulk_line_unwraps_the_stringified_document() -> None:
|
|
# Arrange — a bulk record wraps the per-cert payload in a `document` string.
|
|
inner: dict[str, object] = {
|
|
"schema_type": "RdSAP-Schema-21.0.1",
|
|
"sap_version": 10.2,
|
|
"energy_rating_current": 71,
|
|
}
|
|
line: str = json.dumps(
|
|
{"certificate_number": "0000-1111-2222-3333-4444", "document": json.dumps(inner)}
|
|
)
|
|
|
|
# Act
|
|
parsed = parse_bulk_line(line)
|
|
|
|
# Assert — the cert number and the parsed inner document come back.
|
|
assert parsed is not None
|
|
cert_number, document = parsed
|
|
assert cert_number == "0000-1111-2222-3333-4444"
|
|
assert document["schema_type"] == "RdSAP-Schema-21.0.1"
|
|
assert document["energy_rating_current"] == 71
|
|
|
|
|
|
def test_parse_bulk_line_ignores_blank_lines() -> None:
|
|
# Arrange / Act / Assert — trailing/blank NDJSON lines are skipped.
|
|
assert parse_bulk_line("") is None
|
|
assert parse_bulk_line(" \n") is None
|
|
|
|
|
|
def test_is_sap_version_matches_regardless_of_numeric_or_string_form() -> None:
|
|
# Arrange / Act / Assert — the export carries sap_version as a number.
|
|
assert is_sap_version({"sap_version": 10.2}, "10.2") is True
|
|
assert is_sap_version({"sap_version": "10.2"}, "10.2") is True
|
|
assert is_sap_version({"sap_version": 10.1}, "10.2") is False
|
|
assert is_sap_version({}, "10.2") is False
|