import pytest from domain.addresses.postcode_batching import iter_postcode_grouped_batches from domain.addresses.user_address import UserAddress def _addrs(postcode: str, n: int) -> list[UserAddress]: """Build ``n`` addresses sharing a postcode, with distinct address lines.""" return [ UserAddress(user_address=f"{i} {postcode} Street", postcode=postcode) for i in range(n) ] def test_empty_input_yields_no_batches() -> None: assert list(iter_postcode_grouped_batches([])) == [] def test_single_batch_under_cap() -> None: addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2) batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500)) assert len(batches) == 1 assert batches[0] == addrs def test_multiple_postcodes_packed_into_one_batch_up_to_cap() -> None: # Two groups whose total exactly equals the cap pack into a single # batch -- no premature flush. addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2) batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5)) assert len(batches) == 1 assert len(batches[0]) == 5 def test_flush_on_overflow_before_adding_next_postcode() -> None: # Cap is 5. First group fills 3 slots; second group of 3 would overflow, # so the buffer is flushed first and the next group starts a fresh batch. addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 3) batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5)) assert len(batches) == 2 assert [a.postcode for a in batches[0]] == ["AA11AA"] * 3 assert [a.postcode for a in batches[1]] == ["BB22BB"] * 3 def test_single_postcode_group_exceeding_cap_is_dispatched_whole() -> None: # An oversize single-postcode group goes out as one batch larger than # the cap -- the cap never splits a postcode. addrs = _addrs("AA1 1AA", 7) batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5)) assert len(batches) == 1 assert len(batches[0]) == 7 def test_oversize_group_flushes_existing_buffer_first() -> None: # Mirrors the legacy ``if buffer: flush`` branch when an oversize group # is encountered: buffered work must not be lost or interleaved. small = _addrs("AA1 1AA", 2) big = _addrs("BB2 2BB", 7) tail = _addrs("CC3 3CC", 1) batches = list( iter_postcode_grouped_batches(small + big + tail, max_batch_size=5) ) assert len(batches) == 3 assert [a.postcode for a in batches[0]] == ["AA11AA", "AA11AA"] assert [a.postcode for a in batches[1]] == ["BB22BB"] * 7 assert [a.postcode for a in batches[2]] == ["CC33CC"] def test_final_flush_yields_remaining_buffer() -> None: # No overflow ever happens, but the trailing buffer must still come out. addrs = _addrs("AA1 1AA", 2) + _addrs("BB2 2BB", 2) batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500)) assert batches == [addrs] def test_postcode_grouping_preserves_first_seen_order() -> None: # Interleaved input must still group by postcode and emit in first-seen # order -- never alphabetical. a1, a2 = _addrs("ZZ9 9ZZ", 2) b1, b2 = _addrs("AA1 1AA", 2) batches = list(iter_postcode_grouped_batches([a1, b1, a2, b2])) assert len(batches) == 1 assert [a.postcode for a in batches[0]] == [ "ZZ99ZZ", "ZZ99ZZ", "AA11AA", "AA11AA", ] def test_invalid_max_batch_size_raises() -> None: with pytest.raises(ValueError, match="max_batch_size"): list(iter_postcode_grouped_batches([], max_batch_size=0))