changed useraddress landlordasset list

This commit is contained in:
Jun-te Kim 2026-05-21 16:59:57 +00:00
parent 8baa4c82aa
commit 94cbf5f516
10 changed files with 66 additions and 48 deletions

View file

@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_csv_s3_repository import (
UserAddressCsvS3Repository,
)
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
def handler(
@ -32,7 +32,7 @@ def handler(
user_address_repo=user_address_repo,
)
list_of_user_address: list[UserAddress] = orchestrator.get_user_address(
list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
input_s3_uri=s3_uri
)

View file

@ -2,21 +2,21 @@ from __future__ import annotations
from collections.abc import Iterable, Iterator
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
def iter_postcode_grouped_batches(
addresses: Iterable[UserAddress],
addresses: Iterable[LandlordAssetList],
*,
max_batch_size: int = 500,
) -> Iterator[list[UserAddress]]:
) -> Iterator[list[LandlordAssetList]]:
if max_batch_size < 1:
raise ValueError("max_batch_size must be >= 1")
groups = _group_by_postcode_in_order(addresses)
buffer: list[UserAddress] = []
buffer: list[LandlordAssetList] = []
for group in groups.values():
group_len = len(group)
@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
def _group_by_postcode_in_order(
addresses: Iterable[UserAddress],
) -> dict[Postcode, list[UserAddress]]:
groups: dict[Postcode, list[UserAddress]] = {}
addresses: Iterable[LandlordAssetList],
) -> dict[Postcode, list[LandlordAssetList]]:
groups: dict[Postcode, list[LandlordAssetList]] = {}
for address in addresses:
groups.setdefault(address.postcode, []).append(address)
return groups

View file

@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
@dataclass(frozen=True)
class UserAddress:
class LandlordAssetList:
user_address: str
postcode: Postcode
internal_reference: Optional[str] = None

View file

@ -1,5 +1,5 @@
from repositories.user_address.user_address_repository import UserAddressRepository
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
class LandlordDescriptionOverridesOrchestrator:
@ -9,14 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
def get_user_address(
self,
input_s3_uri: str,
) -> list[UserAddress]:
) -> list[LandlordAssetList]:
return self._user_address_repo.load_batch(input_s3_uri)
def get_col_to_description_mappings(
self, list_of_user_address: list[UserAddress]
self, list_of_user_address: list[LandlordAssetList]
) -> dict[str, set[str]]:
mappings: dict[str, set[str]] = {}
for user_address in list_of_user_address:
for key, value in user_address.landlord_additional_info.items():
mappings.setdefault(key, set()).add(value)
# Lower-case so case-only typos collapse to one variant.
mappings.setdefault(key, set()).add(value.lower())
return mappings

View file

@ -4,7 +4,7 @@ import uuid
from datetime import datetime, timezone
from typing import Optional
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_repository import UserAddressRepository
@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
self._csv_client = csv_client
self._bucket = bucket
def load_batch(self, s3_uri: str) -> list[UserAddress]:
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
rows = self._csv_client.read_rows(s3_uri)
if rows and _POSTCODE_COLUMN not in rows[0]:
raise ValueError(
f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
f"columns present: {sorted(rows[0])}"
)
addresses: list[UserAddress] = []
addresses: list[LandlordAssetList] = []
for row in rows:
parts = [
row[col].strip()
@ -39,7 +39,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
internal_reference: Optional[str] = raw_ref or None
addresses.append(
UserAddress(
LandlordAssetList(
user_address=user_address,
postcode=Postcode(postcode),
internal_reference=internal_reference,
@ -48,7 +48,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
)
return addresses
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
rows: list[dict[str, str]] = [
{
**addr.landlord_additional_info,

View file

@ -2,12 +2,14 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
class UserAddressRepository(ABC):
@abstractmethod
def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
@abstractmethod
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
def save_batch(
self, addresses: list[LandlordAssetList], path_prefix: str
) -> str: ...

View file

@ -1,13 +1,13 @@
import pytest
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
def _addrs(postcode: str, n: int) -> list[UserAddress]:
def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
return [
UserAddress(
LandlordAssetList(
user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
)
for i in range(n)
@ -74,9 +74,7 @@ def test_oversize_group_flushes_existing_buffer_first() -> None:
big = _addrs("BB2 2BB", 7)
tail = _addrs("CC3 3CC", 1)
# act
batches = list(
iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
)
batches = list(iter_postcode_grouped_batches(small + big + tail, max_batch_size=5))
# assert
assert len(batches) == 3
assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]

View file

@ -2,13 +2,13 @@ import dataclasses
import pytest
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
def test_user_address_holds_postcode_value_object() -> None:
# act
addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
# assert
assert addr.postcode == Postcode("SW1A1AA")
@ -17,21 +17,23 @@ def test_user_address_preserves_user_address_verbatim() -> None:
# The free-text user_address string is intentionally NOT normalised --
# only the postcode is canonicalised, and that happens inside Postcode.
# act
addr = UserAddress(user_address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
addr = LandlordAssetList(
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
)
# assert
assert addr.user_address == " 1 The Street "
def test_user_address_internal_reference_defaults_to_none() -> None:
# act
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
# assert
assert addr.internal_reference is None
def test_user_address_internal_reference_accepted() -> None:
# act
addr = UserAddress(
addr = LandlordAssetList(
user_address="1 The Street",
postcode=Postcode("SW1A1AA"),
internal_reference="cust-42",
@ -42,7 +44,7 @@ def test_user_address_internal_reference_accepted() -> None:
def test_user_address_is_frozen() -> None:
# arrange
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
# act / assert
with pytest.raises(dataclasses.FrozenInstanceError):
addr.postcode = Postcode("OTHER") # type: ignore[misc]
@ -52,15 +54,15 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
# Postcode sanitises eagerly, so addresses built from different surface
# forms of the same postcode compare equal.
# arrange
a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
# act / assert
assert a == b
def test_user_address_source_row_defaults_to_empty_dict() -> None:
# act
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
# assert
assert addr.landlord_additional_info == {}
@ -69,7 +71,7 @@ def test_user_address_carries_source_row() -> None:
# arrange
row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
# act
addr = UserAddress(
addr = LandlordAssetList(
user_address="1 The Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info=row,
@ -82,12 +84,12 @@ def test_user_address_equality_ignores_source_row() -> None:
# source_row is excluded from equality (and hashing): identity stays
# defined by the parsed fields.
# arrange
a = UserAddress(
a = LandlordAssetList(
user_address="1 The Street",
postcode=Postcode("SW1A1AA"),
landlord_additional_info={"x": "1"},
)
b = UserAddress(
b = LandlordAssetList(
user_address="1 The Street",
postcode=Postcode("SW1A1AA"),
landlord_additional_info={"y": "2"},

View file

@ -1,6 +1,6 @@
from __future__ import annotations
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
from orchestration.landlord_description_overrides_orchestrator import (
LandlordDescriptionOverridesOrchestrator,
@ -11,15 +11,15 @@ from repositories.user_address.user_address_repository import UserAddressReposit
class _StubUserAddressRepository(UserAddressRepository):
"""``get_col_to_description_mappings`` never touches the repo."""
def load_batch(self, s3_uri: str) -> list[UserAddress]:
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
raise NotImplementedError()
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
raise NotImplementedError()
def _make_user_address(landlord_additional_info: dict[str, str]) -> UserAddress:
return UserAddress(
def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
return LandlordAssetList(
user_address="1 High St",
postcode=Postcode("AA1 1AA"),
landlord_additional_info=landlord_additional_info,
@ -65,6 +65,21 @@ def test_repeated_values_collapse_to_one_variant() -> None:
assert mappings == {"description": {"cosy", "bright"}}
def test_case_only_variants_collapse_to_one() -> None:
# arrange: the same description typed with inconsistent casing.
addresses = [
_make_user_address({"description": "Cosy"}),
_make_user_address({"description": "cosy"}),
_make_user_address({"description": "COSY"}),
]
# act
mappings = _orchestrator().get_col_to_description_mappings(addresses)
# assert: lower-casing folds the casing typos into one variant.
assert mappings == {"description": {"cosy"}}
def test_empty_address_list_yields_empty_mapping() -> None:
# arrange / act
mappings = _orchestrator().get_col_to_description_mappings([])

View file

@ -3,7 +3,7 @@ from collections.abc import Iterator
import pytest
from moto import mock_aws
from domain.addresses.user_address import UserAddress
from domain.addresses.user_address import LandlordAssetList
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_csv_s3_repository import (
@ -173,7 +173,7 @@ def test_save_batch_returns_uri_under_path_prefix(
) -> None:
# arrange
addresses = [
UserAddress(
LandlordAssetList(
user_address="1 High Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info={
@ -229,7 +229,7 @@ def test_save_batch_uses_unique_filename_per_call(
) -> None:
# arrange
addresses = [
UserAddress(
LandlordAssetList(
user_address="1 High Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info={