import pytest

from domain.addresses.postcode_batching import iter_postcode_grouped_batches
from domain.addresses.user_address import UserAddress


def _addrs(postcode: str, n: int) -> list[UserAddress]:
    """Build ``n`` addresses sharing a postcode, with distinct address lines."""
    return [
        UserAddress(user_address=f"{i} {postcode} Street", postcode=postcode)
        for i in range(n)
    ]


def test_empty_input_yields_no_batches() -> None:
    assert list(iter_postcode_grouped_batches([])) == []


def test_single_batch_under_cap() -> None:
    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
    assert len(batches) == 1
    assert batches[0] == addrs


def test_multiple_postcodes_packed_into_one_batch_up_to_cap() -> None:
    # Two groups whose total exactly equals the cap pack into a single
    # batch -- no premature flush.
    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
    assert len(batches) == 1
    assert len(batches[0]) == 5


def test_flush_on_overflow_before_adding_next_postcode() -> None:
    # Cap is 5. First group fills 3 slots; second group of 3 would overflow,
    # so the buffer is flushed first and the next group starts a fresh batch.
    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 3)
    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
    assert len(batches) == 2
    assert [a.postcode for a in batches[0]] == ["AA11AA"] * 3
    assert [a.postcode for a in batches[1]] == ["BB22BB"] * 3


def test_single_postcode_group_exceeding_cap_is_dispatched_whole() -> None:
    # An oversize single-postcode group goes out as one batch larger than
    # the cap -- the cap never splits a postcode.
    addrs = _addrs("AA1 1AA", 7)
    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
    assert len(batches) == 1
    assert len(batches[0]) == 7


def test_oversize_group_flushes_existing_buffer_first() -> None:
    # Mirrors the legacy ``if buffer: flush`` branch when an oversize group
    # is encountered: buffered work must not be lost or interleaved.
    small = _addrs("AA1 1AA", 2)
    big = _addrs("BB2 2BB", 7)
    tail = _addrs("CC3 3CC", 1)
    batches = list(
        iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
    )
    assert len(batches) == 3
    assert [a.postcode for a in batches[0]] == ["AA11AA", "AA11AA"]
    assert [a.postcode for a in batches[1]] == ["BB22BB"] * 7
    assert [a.postcode for a in batches[2]] == ["CC33CC"]


def test_final_flush_yields_remaining_buffer() -> None:
    # No overflow ever happens, but the trailing buffer must still come out.
    addrs = _addrs("AA1 1AA", 2) + _addrs("BB2 2BB", 2)
    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
    assert batches == [addrs]


def test_postcode_grouping_preserves_first_seen_order() -> None:
    # Interleaved input must still group by postcode and emit in first-seen
    # order -- never alphabetical.
    a1, a2 = _addrs("ZZ9 9ZZ", 2)
    b1, b2 = _addrs("AA1 1AA", 2)
    batches = list(iter_postcode_grouped_batches([a1, b1, a2, b2]))
    assert len(batches) == 1
    assert [a.postcode for a in batches[0]] == [
        "ZZ99ZZ",
        "ZZ99ZZ",
        "AA11AA",
        "AA11AA",
    ]


def test_invalid_max_batch_size_raises() -> None:
    with pytest.raises(ValueError, match="max_batch_size"):
        list(iter_postcode_grouped_batches([], max_batch_size=0))