asset list from landlord

This commit is contained in:
Jun-te Kim 2026-05-22 07:34:50 +00:00
parent 94cbf5f516
commit acb306f7b9
10 changed files with 78 additions and 84 deletions

View file

@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_csv_s3_repository import (
UserAddressCsvS3Repository,
)
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
def handler(
@ -32,7 +32,7 @@ def handler(
user_address_repo=user_address_repo,
)
list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
list_of_user_address: list[AssetList] = orchestrator.get_user_address(
input_s3_uri=s3_uri
)

View file

@ -11,10 +11,10 @@ def _empty_source_row() -> dict[str, str]:
@dataclass(frozen=True)
class LandlordAssetList:
user_address: str
class AssetList:
address: str
postcode: Postcode
internal_reference: Optional[str] = None
landlord_additional_info: dict[str, str] = field(
org_reference: Optional[str] = None
additional_info: dict[str, str] = field(
default_factory=_empty_source_row, compare=False
)

View file

@ -2,21 +2,21 @@ from __future__ import annotations
from collections.abc import Iterable, Iterator
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
def iter_postcode_grouped_batches(
addresses: Iterable[LandlordAssetList],
addresses: Iterable[AssetList],
*,
max_batch_size: int = 500,
) -> Iterator[list[LandlordAssetList]]:
) -> Iterator[list[AssetList]]:
if max_batch_size < 1:
raise ValueError("max_batch_size must be >= 1")
groups = _group_by_postcode_in_order(addresses)
buffer: list[LandlordAssetList] = []
buffer: list[AssetList] = []
for group in groups.values():
group_len = len(group)
@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
def _group_by_postcode_in_order(
addresses: Iterable[LandlordAssetList],
) -> dict[Postcode, list[LandlordAssetList]]:
groups: dict[Postcode, list[LandlordAssetList]] = {}
addresses: Iterable[AssetList],
) -> dict[Postcode, list[AssetList]]:
groups: dict[Postcode, list[AssetList]] = {}
for address in addresses:
groups.setdefault(address.postcode, []).append(address)
return groups

View file

@ -1,5 +1,5 @@
from repositories.user_address.user_address_repository import UserAddressRepository
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
class LandlordDescriptionOverridesOrchestrator:
@ -9,15 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
def get_user_address(
self,
input_s3_uri: str,
) -> list[LandlordAssetList]:
) -> list[AssetList]:
return self._user_address_repo.load_batch(input_s3_uri)
def get_col_to_description_mappings(
self, list_of_user_address: list[LandlordAssetList]
self, list_of_user_address: list[AssetList]
) -> dict[str, set[str]]:
mappings: dict[str, set[str]] = {}
for user_address in list_of_user_address:
for key, value in user_address.landlord_additional_info.items():
for key, value in user_address.additional_info.items():
# Lower-case so case-only typos collapse to one variant.
mappings.setdefault(key, set()).add(value.lower())
return mappings

View file

@ -4,7 +4,7 @@ import uuid
from datetime import datetime, timezone
from typing import Optional
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_repository import UserAddressRepository
@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
self._csv_client = csv_client
self._bucket = bucket
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
def load_batch(self, s3_uri: str) -> list[AssetList]:
rows = self._csv_client.read_rows(s3_uri)
if rows and _POSTCODE_COLUMN not in rows[0]:
raise ValueError(
f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
f"columns present: {sorted(rows[0])}"
)
addresses: list[LandlordAssetList] = []
addresses: list[AssetList] = []
for row in rows:
parts = [
row[col].strip()
@ -39,19 +39,19 @@ class UserAddressCsvS3Repository(UserAddressRepository):
raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
internal_reference: Optional[str] = raw_ref or None
addresses.append(
LandlordAssetList(
user_address=user_address,
AssetList(
address=user_address,
postcode=Postcode(postcode),
internal_reference=internal_reference,
landlord_additional_info=row,
org_reference=internal_reference,
additional_info=row,
)
)
return addresses
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
rows: list[dict[str, str]] = [
{
**addr.landlord_additional_info,
**addr.additional_info,
_POSTCODE_CLEAN_COLUMN: str(addr.postcode),
}
for addr in addresses

View file

@ -2,14 +2,12 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
class UserAddressRepository(ABC):
@abstractmethod
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
def load_batch(self, s3_uri: str) -> list[AssetList]: ...
@abstractmethod
def save_batch(
self, addresses: list[LandlordAssetList], path_prefix: str
) -> str: ...
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: ...

View file

@ -1,15 +1,13 @@
import pytest
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
def _addrs(postcode: str, n: int) -> list[AssetList]:
return [
LandlordAssetList(
user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
)
AssetList(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
for i in range(n)
]

View file

@ -2,13 +2,13 @@ import dataclasses
import pytest
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
def test_user_address_holds_postcode_value_object() -> None:
# act
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
addr = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
# assert
assert addr.postcode == Postcode("SW1A1AA")
@ -17,34 +17,32 @@ def test_user_address_preserves_user_address_verbatim() -> None:
# The free-text user_address string is intentionally NOT normalised --
# only the postcode is canonicalised, and that happens inside Postcode.
# act
addr = LandlordAssetList(
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
)
addr = AssetList(address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
# assert
assert addr.user_address == " 1 The Street "
assert addr.address == " 1 The Street "
def test_user_address_internal_reference_defaults_to_none() -> None:
# act
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
# assert
assert addr.internal_reference is None
assert addr.org_reference is None
def test_user_address_internal_reference_accepted() -> None:
# act
addr = LandlordAssetList(
user_address="1 The Street",
addr = AssetList(
address="1 The Street",
postcode=Postcode("SW1A1AA"),
internal_reference="cust-42",
org_reference="cust-42",
)
# assert
assert addr.internal_reference == "cust-42"
assert addr.org_reference == "cust-42"
def test_user_address_is_frozen() -> None:
# arrange
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
# act / assert
with pytest.raises(dataclasses.FrozenInstanceError):
addr.postcode = Postcode("OTHER") # type: ignore[misc]
@ -54,45 +52,45 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
# Postcode sanitises eagerly, so addresses built from different surface
# forms of the same postcode compare equal.
# arrange
a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
a = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
b = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
# act / assert
assert a == b
def test_user_address_source_row_defaults_to_empty_dict() -> None:
# act
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
# assert
assert addr.landlord_additional_info == {}
assert addr.additional_info == {}
def test_user_address_carries_source_row() -> None:
# arrange
row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
# act
addr = LandlordAssetList(
user_address="1 The Street",
addr = AssetList(
address="1 The Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info=row,
additional_info=row,
)
# assert
assert addr.landlord_additional_info == row
assert addr.additional_info == row
def test_user_address_equality_ignores_source_row() -> None:
# source_row is excluded from equality (and hashing): identity stays
# defined by the parsed fields.
# arrange
a = LandlordAssetList(
user_address="1 The Street",
a = AssetList(
address="1 The Street",
postcode=Postcode("SW1A1AA"),
landlord_additional_info={"x": "1"},
additional_info={"x": "1"},
)
b = LandlordAssetList(
user_address="1 The Street",
b = AssetList(
address="1 The Street",
postcode=Postcode("SW1A1AA"),
landlord_additional_info={"y": "2"},
additional_info={"y": "2"},
)
# act / assert
assert a == b

View file

@ -1,6 +1,6 @@
from __future__ import annotations
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
from orchestration.landlord_description_overrides_orchestrator import (
LandlordDescriptionOverridesOrchestrator,
@ -11,18 +11,18 @@ from repositories.user_address.user_address_repository import UserAddressReposit
class _StubUserAddressRepository(UserAddressRepository):
"""``get_col_to_description_mappings`` never touches the repo."""
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
def load_batch(self, s3_uri: str) -> list[AssetList]:
raise NotImplementedError()
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
raise NotImplementedError()
def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
return LandlordAssetList(
user_address="1 High St",
def _make_user_address(landlord_additional_info: dict[str, str]) -> AssetList:
return AssetList(
address="1 High St",
postcode=Postcode("AA1 1AA"),
landlord_additional_info=landlord_additional_info,
additional_info=landlord_additional_info,
)

View file

@ -3,7 +3,7 @@ from collections.abc import Iterator
import pytest
from moto import mock_aws
from domain.addresses.user_address import LandlordAssetList
from domain.addresses.user_address import AssetList
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_csv_s3_repository import (
@ -50,9 +50,9 @@ def test_load_batch_parses_address_postcode_and_reference(
# assert
assert len(addresses) == 1
address = addresses[0]
assert address.user_address == "1 High Street, Flat 2, Townville"
assert address.address == "1 High Street, Flat 2, Townville"
assert address.postcode == Postcode("SW1A1AA")
assert address.internal_reference == "REF-001"
assert address.org_reference == "REF-001"
def test_load_batch_uses_only_address_1_when_others_missing(
@ -75,9 +75,9 @@ def test_load_batch_uses_only_address_1_when_others_missing(
# assert
assert len(addresses) == 1
assert addresses[0].user_address == "10 Cardiff Road"
assert addresses[0].address == "10 Cardiff Road"
assert addresses[0].postcode == Postcode("CF101AA")
assert addresses[0].internal_reference == "REF-002"
assert addresses[0].org_reference == "REF-002"
def test_load_batch_handles_missing_internal_reference(
@ -100,9 +100,9 @@ def test_load_batch_handles_missing_internal_reference(
# assert
assert len(addresses) == 1
assert addresses[0].user_address == "5 Park Lane"
assert addresses[0].address == "5 Park Lane"
assert addresses[0].postcode == Postcode("M11AA")
assert addresses[0].internal_reference is None
assert addresses[0].org_reference is None
def test_load_batch_captures_full_source_row(
@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
addresses = repo.load_batch(uri)
# assert
assert addresses[0].landlord_additional_info == row
assert addresses[0].additional_info == row
def test_load_batch_raises_when_postcode_column_absent(
@ -173,10 +173,10 @@ def test_save_batch_returns_uri_under_path_prefix(
) -> None:
# arrange
addresses = [
LandlordAssetList(
user_address="1 High Street",
AssetList(
address="1 High Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info={
additional_info={
"Address 1": "1 High Street",
"postcode": "SW1A 1AA",
},
@ -229,10 +229,10 @@ def test_save_batch_uses_unique_filename_per_call(
) -> None:
# arrange
addresses = [
LandlordAssetList(
user_address="1 High Street",
AssetList(
address="1 High Street",
postcode=Postcode("SW1A 1AA"),
landlord_additional_info={
additional_info={
"Address 1": "1 High Street",
"postcode": "SW1A 1AA",
},