mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
ijson use_float fixes Decimal/float coercion when streaming JSON. pyright extraPaths so the new pkg type-checks against domna-domain. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
87 lines
3 KiB
Python
87 lines
3 KiB
Python
"""Tests for build_features() — JSON cert → EpcPropertyData → feature row.
|
|
|
|
build_features wires three existing pieces: BulkZipReader yields JSON cert dicts,
|
|
EpcPropertyDataMapper.from_api_response parses them into EpcPropertyData, and
|
|
EpcMlTransform.to_rows produces the feature+target DataFrame. The function adds a
|
|
leading `certificate_number` column so each row stays traceable to its source cert.
|
|
"""
|
|
|
|
import io
|
|
import json
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from ml_training_data.bulk_zip_reader import BulkZipReader
|
|
from ml_training_data.build_features import build_features
|
|
from ml_training_data.storage import LocalStorage
|
|
|
|
_FIXTURE_PATH = Path("datatypes/epc/schema/tests/fixtures/21_0_0.json")
|
|
|
|
|
|
def _load_fixture_cert(cert_number: str) -> dict[str, object]:
|
|
cert = json.loads(_FIXTURE_PATH.read_text())
|
|
cert["certificate_number"] = cert_number
|
|
return cert
|
|
|
|
|
|
def _write_zip(storage: LocalStorage, key: str, entries: dict[str, list[dict[str, object]]]) -> None:
|
|
buf = io.BytesIO()
|
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
for entry_name, payload in entries.items():
|
|
zf.writestr(entry_name, json.dumps(payload))
|
|
storage.write_bytes(key, buf.getvalue())
|
|
|
|
|
|
def test_build_features_returns_one_row_per_supported_cert(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
cert = _load_fixture_cert("CERT-001")
|
|
_write_zip(storage, "bulk.zip", {"certificates-2024.json": [cert]})
|
|
reader = BulkZipReader(storage=storage, zip_key="bulk.zip")
|
|
|
|
# Act
|
|
df = build_features(reader, certificate_numbers={"CERT-001"})
|
|
|
|
# Assert
|
|
assert len(df) == 1
|
|
assert df.iloc[0]["certificate_number"] == "CERT-001"
|
|
assert "certificate_number" == df.columns[0]
|
|
|
|
|
|
def test_build_features_skips_unsupported_schemas_by_default(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
supported = _load_fixture_cert("OK-1")
|
|
unsupported = _load_fixture_cert("BAD-1")
|
|
unsupported["schema_type"] = "RdSAP-Schema-19.0.0" # not in mapper dispatch
|
|
_write_zip(
|
|
storage,
|
|
"bulk.zip",
|
|
{"certificates-2024.json": [supported, unsupported]},
|
|
)
|
|
reader = BulkZipReader(storage=storage, zip_key="bulk.zip")
|
|
|
|
# Act
|
|
df = build_features(reader, certificate_numbers={"OK-1", "BAD-1"})
|
|
|
|
# Assert
|
|
assert df["certificate_number"].tolist() == ["OK-1"]
|
|
|
|
|
|
def test_build_features_raises_when_skip_unsupported_schemas_is_false(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
unsupported = _load_fixture_cert("BAD-1")
|
|
unsupported["schema_type"] = "RdSAP-Schema-19.0.0"
|
|
_write_zip(storage, "bulk.zip", {"certificates-2024.json": [unsupported]})
|
|
reader = BulkZipReader(storage=storage, zip_key="bulk.zip")
|
|
|
|
# Act / Assert
|
|
with pytest.raises(ValueError, match="Unsupported EPC schema"):
|
|
build_features(
|
|
reader,
|
|
certificate_numbers={"BAD-1"},
|
|
skip_unsupported_schemas=False,
|
|
)
|