Model/tests/repositories/epc/test_epc_bulk_read.py
Khalim Conn-Kowlessar 8685f8ba3a perf(repos): bulk get_many / get_for_properties — batch reads, not N round-trips (#1138)
Final slice of ADR-0012: collapse the per-property read round-trips a batch
made (Baseline hydrated ~8 queries x 30 properties one at a time) into a
handful of per-table IN queries.

- EpcPostgresRepository: extracted a shared `_compose(rows)` from `get` (the
  windows + floor-dim fetches are now passed in, not fetched inline), so both
  `get` and the new `get_for_properties(property_ids)` build EpcPropertyData
  from pre-fetched rows. `get_for_properties` fetches each child table once
  (`WHERE epc_property_id IN ...`), groups in memory, and composes — load-whole
  per ADR-0002.
- PropertyRepository.get_many(property_ids) -> Properties: one query for the
  property rows + one bulk EPC hydration, composed in input order.
- BaselineOrchestrator / IngestionOrchestrator read the batch via get_many
  instead of N x get.
- Ports + fakes gain the bulk methods.

The #1129 round-trip fidelity test stays green (the compose extraction is
behaviour-preserving). New tests: bulk hydration correctness + round-trips are
constant w.r.t. batch size (one-per-table, proven by query count). 123 pass;
pyright strict clean; AAA.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 10:33:24 +00:00

81 lines
2.6 KiB
Python

"""Bulk EPC read: get_for_properties hydrates a batch in a handful of per-table
queries, not N x per-property (ADR-0012, #1138)."""
from __future__ import annotations
import json
from collections.abc import Callable
from pathlib import Path
from typing import Any
from sqlalchemy import Engine, event
from sqlmodel import Session
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from repositories.epc.epc_postgres_repository import EpcPostgresRepository
_JSON_SAMPLES = Path(__file__).resolve().parents[3] / "backend/epc_api/json_samples"
def _load_epc() -> EpcPropertyData:
raw: dict[str, Any] = json.loads(
(_JSON_SAMPLES / "RdSAP-Schema-21.0.0" / "epc.json").read_text()
)
return EpcPropertyDataMapper.from_api_response(raw)
def _count_queries(engine: Engine, work: Callable[[], None]) -> int:
count = 0
def _before(*_args: Any, **_kwargs: Any) -> None:
nonlocal count
count += 1
event.listen(engine, "before_cursor_execute", _before)
try:
work()
finally:
event.remove(engine, "before_cursor_execute", _before)
return count
def test_get_for_properties_hydrates_the_whole_batch(db_engine: Engine) -> None:
# Arrange — the same sample EPC persisted for two properties.
epc = _load_epc()
with Session(db_engine) as session:
repo = EpcPostgresRepository(session)
repo.save(epc, property_id=10)
repo.save(epc, property_id=11)
session.commit()
# Act
with Session(db_engine) as session:
result = EpcPostgresRepository(session).get_for_properties([10, 11])
# Assert — both fully hydrated (load-whole, ADR-0002).
assert result == {10: epc, 11: epc}
def test_get_for_properties_round_trips_do_not_scale_with_batch_size(
db_engine: Engine,
) -> None:
# Arrange
epc = _load_epc()
with Session(db_engine) as session:
repo = EpcPostgresRepository(session)
repo.save(epc, property_id=10)
repo.save(epc, property_id=11)
session.commit()
def _read(property_ids: list[int]) -> None:
with Session(db_engine) as session:
EpcPostgresRepository(session).get_for_properties(property_ids)
# Act — count queries for a 1-property batch vs a 2-property batch.
one = _count_queries(db_engine, lambda: _read([10]))
two = _count_queries(db_engine, lambda: _read([10, 11]))
# Assert — same number of round-trips regardless of batch size (one query
# per table, not per property).
assert one == two