from __future__ import annotations from collections.abc import Iterable, Iterator from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress from domain.postcode import Postcode def iter_postcode_grouped_batches( addresses: Iterable[UnstandardisedAddress], *, max_batch_size: int = 500, ) -> Iterator[AddressList]: if max_batch_size < 1: raise ValueError("max_batch_size must be >= 1") groups = _group_by_postcode_in_order(addresses) buffer: AddressList = AddressList([]) for group in groups.values(): group_len = len(group) # Oversize single-Postcode group: flush buffer first, then dispatch # the group as its own batch. Mirrors the legacy # ``if group_len >= batch_size`` branch. if group_len >= max_batch_size: if buffer: yield buffer buffer = AddressList([]) yield group continue # Adding this group would overflow: flush buffer before appending. if len(buffer) + group_len > max_batch_size: yield buffer buffer = AddressList([]) buffer.extend(group) # Final flush. if buffer: yield buffer def _group_by_postcode_in_order( addresses: Iterable[UnstandardisedAddress], ) -> dict[Postcode, AddressList]: groups: dict[Postcode, AddressList] = {} for address in addresses: groups.setdefault(address.postcode, AddressList([])).append(address) return groups