From acb306f7b9dc7a67fc9b3d371df08abdbf471961 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 22 May 2026 07:34:50 +0000 Subject: [PATCH] asset list from landlord --- .../landlord_description_overrides/handler.py | 4 +- .../{user_address.py => asset_list.py} | 8 +-- domain/addresses/postcode_batching.py | 14 ++--- ...lord_description_overrides_orchestrator.py | 8 +-- .../user_address_csv_s3_repository.py | 18 +++---- .../user_address/user_address_repository.py | 8 ++- .../addresses/test_postcode_batching.py | 8 ++- tests/domain/addresses/test_user_address.py | 52 +++++++++---------- ...lord_description_overrides_orchestrator.py | 14 ++--- .../test_user_address_csv_s3_repository.py | 28 +++++----- 10 files changed, 78 insertions(+), 84 deletions(-) rename domain/addresses/{user_address.py => asset_list.py} (68%) diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py index 2655beb9..2691d6d2 100644 --- a/applications/landlord_description_overrides/handler.py +++ b/applications/landlord_description_overrides/handler.py @@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client from repositories.user_address.user_address_csv_s3_repository import ( UserAddressCsvS3Repository, ) -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList def handler( @@ -32,7 +32,7 @@ def handler( user_address_repo=user_address_repo, ) - list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address( + list_of_user_address: list[AssetList] = orchestrator.get_user_address( input_s3_uri=s3_uri ) diff --git a/domain/addresses/user_address.py b/domain/addresses/asset_list.py similarity index 68% rename from domain/addresses/user_address.py rename to domain/addresses/asset_list.py index c93f46e5..1332aa2e 100644 --- a/domain/addresses/user_address.py +++ b/domain/addresses/asset_list.py @@ -11,10 +11,10 @@ def _empty_source_row() -> dict[str, str]: @dataclass(frozen=True) -class LandlordAssetList: - user_address: str +class AssetList: + address: str postcode: Postcode - internal_reference: Optional[str] = None - landlord_additional_info: dict[str, str] = field( + org_reference: Optional[str] = None + additional_info: dict[str, str] = field( default_factory=_empty_source_row, compare=False ) diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py index d4d04b00..fe63605e 100644 --- a/domain/addresses/postcode_batching.py +++ b/domain/addresses/postcode_batching.py @@ -2,21 +2,21 @@ from __future__ import annotations from collections.abc import Iterable, Iterator -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode def iter_postcode_grouped_batches( - addresses: Iterable[LandlordAssetList], + addresses: Iterable[AssetList], *, max_batch_size: int = 500, -) -> Iterator[list[LandlordAssetList]]: +) -> Iterator[list[AssetList]]: if max_batch_size < 1: raise ValueError("max_batch_size must be >= 1") groups = _group_by_postcode_in_order(addresses) - buffer: list[LandlordAssetList] = [] + buffer: list[AssetList] = [] for group in groups.values(): group_len = len(group) @@ -43,9 +43,9 @@ def iter_postcode_grouped_batches( def _group_by_postcode_in_order( - addresses: Iterable[LandlordAssetList], -) -> dict[Postcode, list[LandlordAssetList]]: - groups: dict[Postcode, list[LandlordAssetList]] = {} + addresses: Iterable[AssetList], +) -> dict[Postcode, list[AssetList]]: + groups: dict[Postcode, list[AssetList]] = {} for address in addresses: groups.setdefault(address.postcode, []).append(address) return groups diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py index 9321994d..18132667 100644 --- a/orchestration/landlord_description_overrides_orchestrator.py +++ b/orchestration/landlord_description_overrides_orchestrator.py @@ -1,5 +1,5 @@ from repositories.user_address.user_address_repository import UserAddressRepository -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList class LandlordDescriptionOverridesOrchestrator: @@ -9,15 +9,15 @@ class LandlordDescriptionOverridesOrchestrator: def get_user_address( self, input_s3_uri: str, - ) -> list[LandlordAssetList]: + ) -> list[AssetList]: return self._user_address_repo.load_batch(input_s3_uri) def get_col_to_description_mappings( - self, list_of_user_address: list[LandlordAssetList] + self, list_of_user_address: list[AssetList] ) -> dict[str, set[str]]: mappings: dict[str, set[str]] = {} for user_address in list_of_user_address: - for key, value in user_address.landlord_additional_info.items(): + for key, value in user_address.additional_info.items(): # Lower-case so case-only typos collapse to one variant. mappings.setdefault(key, set()).add(value.lower()) return mappings diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py index 612a52ec..adbbfe3e 100644 --- a/repositories/user_address/user_address_csv_s3_repository.py +++ b/repositories/user_address/user_address_csv_s3_repository.py @@ -4,7 +4,7 @@ import uuid from datetime import datetime, timezone from typing import Optional -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode from infrastructure.csv_s3_client import CsvS3Client from repositories.user_address.user_address_repository import UserAddressRepository @@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository): self._csv_client = csv_client self._bucket = bucket - def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: + def load_batch(self, s3_uri: str) -> list[AssetList]: rows = self._csv_client.read_rows(s3_uri) if rows and _POSTCODE_COLUMN not in rows[0]: raise ValueError( f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; " f"columns present: {sorted(rows[0])}" ) - addresses: list[LandlordAssetList] = [] + addresses: list[AssetList] = [] for row in rows: parts = [ row[col].strip() @@ -39,19 +39,19 @@ class UserAddressCsvS3Repository(UserAddressRepository): raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip() internal_reference: Optional[str] = raw_ref or None addresses.append( - LandlordAssetList( - user_address=user_address, + AssetList( + address=user_address, postcode=Postcode(postcode), - internal_reference=internal_reference, - landlord_additional_info=row, + org_reference=internal_reference, + additional_info=row, ) ) return addresses - def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str: + def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: rows: list[dict[str, str]] = [ { - **addr.landlord_additional_info, + **addr.additional_info, _POSTCODE_CLEAN_COLUMN: str(addr.postcode), } for addr in addresses diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py index b89247c5..eafd0e1d 100644 --- a/repositories/user_address/user_address_repository.py +++ b/repositories/user_address/user_address_repository.py @@ -2,14 +2,12 @@ from __future__ import annotations from abc import ABC, abstractmethod -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList class UserAddressRepository(ABC): @abstractmethod - def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ... + def load_batch(self, s3_uri: str) -> list[AssetList]: ... @abstractmethod - def save_batch( - self, addresses: list[LandlordAssetList], path_prefix: str - ) -> str: ... + def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: ... diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py index 82e5ced7..4aaeef10 100644 --- a/tests/domain/addresses/test_postcode_batching.py +++ b/tests/domain/addresses/test_postcode_batching.py @@ -1,15 +1,13 @@ import pytest from domain.addresses.postcode_batching import iter_postcode_grouped_batches -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode -def _addrs(postcode: str, n: int) -> list[LandlordAssetList]: +def _addrs(postcode: str, n: int) -> list[AssetList]: return [ - LandlordAssetList( - user_address=f"{i} {postcode} Street", postcode=Postcode(postcode) - ) + AssetList(address=f"{i} {postcode} Street", postcode=Postcode(postcode)) for i in range(n) ] diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py index 39c52283..be065995 100644 --- a/tests/domain/addresses/test_user_address.py +++ b/tests/domain/addresses/test_user_address.py @@ -2,13 +2,13 @@ import dataclasses import pytest -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode def test_user_address_holds_postcode_value_object() -> None: # act - addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa")) + addr = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa")) # assert assert addr.postcode == Postcode("SW1A1AA") @@ -17,34 +17,32 @@ def test_user_address_preserves_user_address_verbatim() -> None: # The free-text user_address string is intentionally NOT normalised -- # only the postcode is canonicalised, and that happens inside Postcode. # act - addr = LandlordAssetList( - user_address=" 1 The Street ", postcode=Postcode("SW1A1AA") - ) + addr = AssetList(address=" 1 The Street ", postcode=Postcode("SW1A1AA")) # assert - assert addr.user_address == " 1 The Street " + assert addr.address == " 1 The Street " def test_user_address_internal_reference_defaults_to_none() -> None: # act - addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA")) # assert - assert addr.internal_reference is None + assert addr.org_reference is None def test_user_address_internal_reference_accepted() -> None: # act - addr = LandlordAssetList( - user_address="1 The Street", + addr = AssetList( + address="1 The Street", postcode=Postcode("SW1A1AA"), - internal_reference="cust-42", + org_reference="cust-42", ) # assert - assert addr.internal_reference == "cust-42" + assert addr.org_reference == "cust-42" def test_user_address_is_frozen() -> None: # arrange - addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA")) # act / assert with pytest.raises(dataclasses.FrozenInstanceError): addr.postcode = Postcode("OTHER") # type: ignore[misc] @@ -54,45 +52,45 @@ def test_user_address_equality_uses_canonical_postcode() -> None: # Postcode sanitises eagerly, so addresses built from different surface # forms of the same postcode compare equal. # arrange - a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa")) - b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA")) + a = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa")) + b = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA")) # act / assert assert a == b def test_user_address_source_row_defaults_to_empty_dict() -> None: # act - addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA")) # assert - assert addr.landlord_additional_info == {} + assert addr.additional_info == {} def test_user_address_carries_source_row() -> None: # arrange row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"} # act - addr = LandlordAssetList( - user_address="1 The Street", + addr = AssetList( + address="1 The Street", postcode=Postcode("SW1A 1AA"), - landlord_additional_info=row, + additional_info=row, ) # assert - assert addr.landlord_additional_info == row + assert addr.additional_info == row def test_user_address_equality_ignores_source_row() -> None: # source_row is excluded from equality (and hashing): identity stays # defined by the parsed fields. # arrange - a = LandlordAssetList( - user_address="1 The Street", + a = AssetList( + address="1 The Street", postcode=Postcode("SW1A1AA"), - landlord_additional_info={"x": "1"}, + additional_info={"x": "1"}, ) - b = LandlordAssetList( - user_address="1 The Street", + b = AssetList( + address="1 The Street", postcode=Postcode("SW1A1AA"), - landlord_additional_info={"y": "2"}, + additional_info={"y": "2"}, ) # act / assert assert a == b diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py index c7197071..26cf46b4 100644 --- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py +++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py @@ -1,6 +1,6 @@ from __future__ import annotations -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode from orchestration.landlord_description_overrides_orchestrator import ( LandlordDescriptionOverridesOrchestrator, @@ -11,18 +11,18 @@ from repositories.user_address.user_address_repository import UserAddressReposit class _StubUserAddressRepository(UserAddressRepository): """``get_col_to_description_mappings`` never touches the repo.""" - def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: + def load_batch(self, s3_uri: str) -> list[AssetList]: raise NotImplementedError() - def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str: + def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: raise NotImplementedError() -def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList: - return LandlordAssetList( - user_address="1 High St", +def _make_user_address(landlord_additional_info: dict[str, str]) -> AssetList: + return AssetList( + address="1 High St", postcode=Postcode("AA1 1AA"), - landlord_additional_info=landlord_additional_info, + additional_info=landlord_additional_info, ) diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py index 9d53b35b..dc97f0e3 100644 --- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py +++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py @@ -3,7 +3,7 @@ from collections.abc import Iterator import pytest from moto import mock_aws -from domain.addresses.user_address import LandlordAssetList +from domain.addresses.user_address import AssetList from domain.postcode import Postcode from infrastructure.csv_s3_client import CsvS3Client from repositories.user_address.user_address_csv_s3_repository import ( @@ -50,9 +50,9 @@ def test_load_batch_parses_address_postcode_and_reference( # assert assert len(addresses) == 1 address = addresses[0] - assert address.user_address == "1 High Street, Flat 2, Townville" + assert address.address == "1 High Street, Flat 2, Townville" assert address.postcode == Postcode("SW1A1AA") - assert address.internal_reference == "REF-001" + assert address.org_reference == "REF-001" def test_load_batch_uses_only_address_1_when_others_missing( @@ -75,9 +75,9 @@ def test_load_batch_uses_only_address_1_when_others_missing( # assert assert len(addresses) == 1 - assert addresses[0].user_address == "10 Cardiff Road" + assert addresses[0].address == "10 Cardiff Road" assert addresses[0].postcode == Postcode("CF101AA") - assert addresses[0].internal_reference == "REF-002" + assert addresses[0].org_reference == "REF-002" def test_load_batch_handles_missing_internal_reference( @@ -100,9 +100,9 @@ def test_load_batch_handles_missing_internal_reference( # assert assert len(addresses) == 1 - assert addresses[0].user_address == "5 Park Lane" + assert addresses[0].address == "5 Park Lane" assert addresses[0].postcode == Postcode("M11AA") - assert addresses[0].internal_reference is None + assert addresses[0].org_reference is None def test_load_batch_captures_full_source_row( @@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row( addresses = repo.load_batch(uri) # assert - assert addresses[0].landlord_additional_info == row + assert addresses[0].additional_info == row def test_load_batch_raises_when_postcode_column_absent( @@ -173,10 +173,10 @@ def test_save_batch_returns_uri_under_path_prefix( ) -> None: # arrange addresses = [ - LandlordAssetList( - user_address="1 High Street", + AssetList( + address="1 High Street", postcode=Postcode("SW1A 1AA"), - landlord_additional_info={ + additional_info={ "Address 1": "1 High Street", "postcode": "SW1A 1AA", }, @@ -229,10 +229,10 @@ def test_save_batch_uses_unique_filename_per_call( ) -> None: # arrange addresses = [ - LandlordAssetList( - user_address="1 High Street", + AssetList( + address="1 High Street", postcode=Postcode("SW1A 1AA"), - landlord_additional_info={ + additional_info={ "Address 1": "1 High Street", "postcode": "SW1A 1AA", },