Model/domain/addresses/postcode_batching.py
2026-05-20 13:21:11 +00:00

51 lines
1.4 KiB
Python

from __future__ import annotations
from collections.abc import Iterable, Iterator
from domain.addresses.user_address import UserAddress
from domain.postcode import Postcode
def iter_postcode_grouped_batches(
addresses: Iterable[UserAddress],
*,
max_batch_size: int = 500,
) -> Iterator[list[UserAddress]]:
if max_batch_size < 1:
raise ValueError("max_batch_size must be >= 1")
groups = _group_by_postcode_in_order(addresses)
buffer: list[UserAddress] = []
for group in groups.values():
group_len = len(group)
# Oversize single-Postcode group: flush buffer first, then dispatch
# the group as its own batch. Mirrors the legacy
# ``if group_len >= batch_size`` branch.
if group_len >= max_batch_size:
if buffer:
yield buffer
buffer = []
yield group
continue
# Adding this group would overflow: flush buffer before appending.
if len(buffer) + group_len > max_batch_size:
yield buffer
buffer = []
buffer.extend(group)
# Final flush.
if buffer:
yield buffer
def _group_by_postcode_in_order(
addresses: Iterable[UserAddress],
) -> dict[Postcode, list[UserAddress]]:
groups: dict[Postcode, list[UserAddress]] = {}
for address in addresses:
groups.setdefault(address.postcode, []).append(address)
return groups