mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
changed useraddress landlordasset list
This commit is contained in:
parent
8baa4c82aa
commit
94cbf5f516
10 changed files with 66 additions and 48 deletions
|
|
@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
|
|||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
UserAddressCsvS3Repository,
|
||||
)
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
|
||||
|
||||
def handler(
|
||||
|
|
@ -32,7 +32,7 @@ def handler(
|
|||
user_address_repo=user_address_repo,
|
||||
)
|
||||
|
||||
list_of_user_address: list[UserAddress] = orchestrator.get_user_address(
|
||||
list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
|
||||
input_s3_uri=s3_uri
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,21 +2,21 @@ from __future__ import annotations
|
|||
|
||||
from collections.abc import Iterable, Iterator
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def iter_postcode_grouped_batches(
|
||||
addresses: Iterable[UserAddress],
|
||||
addresses: Iterable[LandlordAssetList],
|
||||
*,
|
||||
max_batch_size: int = 500,
|
||||
) -> Iterator[list[UserAddress]]:
|
||||
) -> Iterator[list[LandlordAssetList]]:
|
||||
if max_batch_size < 1:
|
||||
raise ValueError("max_batch_size must be >= 1")
|
||||
|
||||
groups = _group_by_postcode_in_order(addresses)
|
||||
|
||||
buffer: list[UserAddress] = []
|
||||
buffer: list[LandlordAssetList] = []
|
||||
for group in groups.values():
|
||||
group_len = len(group)
|
||||
|
||||
|
|
@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
|
|||
|
||||
|
||||
def _group_by_postcode_in_order(
|
||||
addresses: Iterable[UserAddress],
|
||||
) -> dict[Postcode, list[UserAddress]]:
|
||||
groups: dict[Postcode, list[UserAddress]] = {}
|
||||
addresses: Iterable[LandlordAssetList],
|
||||
) -> dict[Postcode, list[LandlordAssetList]]:
|
||||
groups: dict[Postcode, list[LandlordAssetList]] = {}
|
||||
for address in addresses:
|
||||
groups.setdefault(address.postcode, []).append(address)
|
||||
return groups
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
|
|||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UserAddress:
|
||||
class LandlordAssetList:
|
||||
user_address: str
|
||||
postcode: Postcode
|
||||
internal_reference: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from repositories.user_address.user_address_repository import UserAddressRepository
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
|
||||
|
||||
class LandlordDescriptionOverridesOrchestrator:
|
||||
|
|
@ -9,14 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
|
|||
def get_user_address(
|
||||
self,
|
||||
input_s3_uri: str,
|
||||
) -> list[UserAddress]:
|
||||
) -> list[LandlordAssetList]:
|
||||
return self._user_address_repo.load_batch(input_s3_uri)
|
||||
|
||||
def get_col_to_description_mappings(
|
||||
self, list_of_user_address: list[UserAddress]
|
||||
self, list_of_user_address: list[LandlordAssetList]
|
||||
) -> dict[str, set[str]]:
|
||||
mappings: dict[str, set[str]] = {}
|
||||
for user_address in list_of_user_address:
|
||||
for key, value in user_address.landlord_additional_info.items():
|
||||
mappings.setdefault(key, set()).add(value)
|
||||
# Lower-case so case-only typos collapse to one variant.
|
||||
mappings.setdefault(key, set()).add(value.lower())
|
||||
return mappings
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import uuid
|
|||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.user_address.user_address_repository import UserAddressRepository
|
||||
|
|
@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
self._csv_client = csv_client
|
||||
self._bucket = bucket
|
||||
|
||||
def load_batch(self, s3_uri: str) -> list[UserAddress]:
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
|
||||
rows = self._csv_client.read_rows(s3_uri)
|
||||
if rows and _POSTCODE_COLUMN not in rows[0]:
|
||||
raise ValueError(
|
||||
f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
|
||||
f"columns present: {sorted(rows[0])}"
|
||||
)
|
||||
addresses: list[UserAddress] = []
|
||||
addresses: list[LandlordAssetList] = []
|
||||
for row in rows:
|
||||
parts = [
|
||||
row[col].strip()
|
||||
|
|
@ -39,7 +39,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
|
||||
internal_reference: Optional[str] = raw_ref or None
|
||||
addresses.append(
|
||||
UserAddress(
|
||||
LandlordAssetList(
|
||||
user_address=user_address,
|
||||
postcode=Postcode(postcode),
|
||||
internal_reference=internal_reference,
|
||||
|
|
@ -48,7 +48,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
)
|
||||
return addresses
|
||||
|
||||
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
|
||||
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
|
||||
rows: list[dict[str, str]] = [
|
||||
{
|
||||
**addr.landlord_additional_info,
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
|
||||
|
||||
class UserAddressRepository(ABC):
|
||||
@abstractmethod
|
||||
def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
|
||||
|
||||
@abstractmethod
|
||||
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
|
||||
def save_batch(
|
||||
self, addresses: list[LandlordAssetList], path_prefix: str
|
||||
) -> str: ...
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import pytest
|
||||
|
||||
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def _addrs(postcode: str, n: int) -> list[UserAddress]:
|
||||
def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
|
||||
return [
|
||||
UserAddress(
|
||||
LandlordAssetList(
|
||||
user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
|
||||
)
|
||||
for i in range(n)
|
||||
|
|
@ -74,9 +74,7 @@ def test_oversize_group_flushes_existing_buffer_first() -> None:
|
|||
big = _addrs("BB2 2BB", 7)
|
||||
tail = _addrs("CC3 3CC", 1)
|
||||
# act
|
||||
batches = list(
|
||||
iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
|
||||
)
|
||||
batches = list(iter_postcode_grouped_batches(small + big + tail, max_batch_size=5))
|
||||
# assert
|
||||
assert len(batches) == 3
|
||||
assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@ import dataclasses
|
|||
|
||||
import pytest
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def test_user_address_holds_postcode_value_object() -> None:
|
||||
# act
|
||||
addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
# assert
|
||||
assert addr.postcode == Postcode("SW1A1AA")
|
||||
|
||||
|
|
@ -17,21 +17,23 @@ def test_user_address_preserves_user_address_verbatim() -> None:
|
|||
# The free-text user_address string is intentionally NOT normalised --
|
||||
# only the postcode is canonicalised, and that happens inside Postcode.
|
||||
# act
|
||||
addr = UserAddress(user_address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
|
||||
addr = LandlordAssetList(
|
||||
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
|
||||
)
|
||||
# assert
|
||||
assert addr.user_address == " 1 The Street "
|
||||
|
||||
|
||||
def test_user_address_internal_reference_defaults_to_none() -> None:
|
||||
# act
|
||||
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.internal_reference is None
|
||||
|
||||
|
||||
def test_user_address_internal_reference_accepted() -> None:
|
||||
# act
|
||||
addr = UserAddress(
|
||||
addr = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
internal_reference="cust-42",
|
||||
|
|
@ -42,7 +44,7 @@ def test_user_address_internal_reference_accepted() -> None:
|
|||
|
||||
def test_user_address_is_frozen() -> None:
|
||||
# arrange
|
||||
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
with pytest.raises(dataclasses.FrozenInstanceError):
|
||||
addr.postcode = Postcode("OTHER") # type: ignore[misc]
|
||||
|
|
@ -52,15 +54,15 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
|
|||
# Postcode sanitises eagerly, so addresses built from different surface
|
||||
# forms of the same postcode compare equal.
|
||||
# arrange
|
||||
a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_user_address_source_row_defaults_to_empty_dict() -> None:
|
||||
# act
|
||||
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.landlord_additional_info == {}
|
||||
|
||||
|
|
@ -69,7 +71,7 @@ def test_user_address_carries_source_row() -> None:
|
|||
# arrange
|
||||
row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
|
||||
# act
|
||||
addr = UserAddress(
|
||||
addr = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info=row,
|
||||
|
|
@ -82,12 +84,12 @@ def test_user_address_equality_ignores_source_row() -> None:
|
|||
# source_row is excluded from equality (and hashing): identity stays
|
||||
# defined by the parsed fields.
|
||||
# arrange
|
||||
a = UserAddress(
|
||||
a = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
landlord_additional_info={"x": "1"},
|
||||
)
|
||||
b = UserAddress(
|
||||
b = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
landlord_additional_info={"y": "2"},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
from orchestration.landlord_description_overrides_orchestrator import (
|
||||
LandlordDescriptionOverridesOrchestrator,
|
||||
|
|
@ -11,15 +11,15 @@ from repositories.user_address.user_address_repository import UserAddressReposit
|
|||
class _StubUserAddressRepository(UserAddressRepository):
|
||||
"""``get_col_to_description_mappings`` never touches the repo."""
|
||||
|
||||
def load_batch(self, s3_uri: str) -> list[UserAddress]:
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
|
||||
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _make_user_address(landlord_additional_info: dict[str, str]) -> UserAddress:
|
||||
return UserAddress(
|
||||
def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
|
||||
return LandlordAssetList(
|
||||
user_address="1 High St",
|
||||
postcode=Postcode("AA1 1AA"),
|
||||
landlord_additional_info=landlord_additional_info,
|
||||
|
|
@ -65,6 +65,21 @@ def test_repeated_values_collapse_to_one_variant() -> None:
|
|||
assert mappings == {"description": {"cosy", "bright"}}
|
||||
|
||||
|
||||
def test_case_only_variants_collapse_to_one() -> None:
|
||||
# arrange: the same description typed with inconsistent casing.
|
||||
addresses = [
|
||||
_make_user_address({"description": "Cosy"}),
|
||||
_make_user_address({"description": "cosy"}),
|
||||
_make_user_address({"description": "COSY"}),
|
||||
]
|
||||
|
||||
# act
|
||||
mappings = _orchestrator().get_col_to_description_mappings(addresses)
|
||||
|
||||
# assert: lower-casing folds the casing typos into one variant.
|
||||
assert mappings == {"description": {"cosy"}}
|
||||
|
||||
|
||||
def test_empty_address_list_yields_empty_mapping() -> None:
|
||||
# arrange / act
|
||||
mappings = _orchestrator().get_col_to_description_mappings([])
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from collections.abc import Iterator
|
|||
import pytest
|
||||
from moto import mock_aws
|
||||
|
||||
from domain.addresses.user_address import UserAddress
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
|
|
@ -173,7 +173,7 @@ def test_save_batch_returns_uri_under_path_prefix(
|
|||
) -> None:
|
||||
# arrange
|
||||
addresses = [
|
||||
UserAddress(
|
||||
LandlordAssetList(
|
||||
user_address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info={
|
||||
|
|
@ -229,7 +229,7 @@ def test_save_batch_uses_unique_filename_per_call(
|
|||
) -> None:
|
||||
# arrange
|
||||
addresses = [
|
||||
UserAddress(
|
||||
LandlordAssetList(
|
||||
user_address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info={
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue