mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
asset list from landlord
This commit is contained in:
parent
94cbf5f516
commit
acb306f7b9
10 changed files with 78 additions and 84 deletions
|
|
@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
|
|||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
UserAddressCsvS3Repository,
|
||||
)
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
|
||||
|
||||
def handler(
|
||||
|
|
@ -32,7 +32,7 @@ def handler(
|
|||
user_address_repo=user_address_repo,
|
||||
)
|
||||
|
||||
list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
|
||||
list_of_user_address: list[AssetList] = orchestrator.get_user_address(
|
||||
input_s3_uri=s3_uri
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ def _empty_source_row() -> dict[str, str]:
|
|||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LandlordAssetList:
|
||||
user_address: str
|
||||
class AssetList:
|
||||
address: str
|
||||
postcode: Postcode
|
||||
internal_reference: Optional[str] = None
|
||||
landlord_additional_info: dict[str, str] = field(
|
||||
org_reference: Optional[str] = None
|
||||
additional_info: dict[str, str] = field(
|
||||
default_factory=_empty_source_row, compare=False
|
||||
)
|
||||
|
|
@ -2,21 +2,21 @@ from __future__ import annotations
|
|||
|
||||
from collections.abc import Iterable, Iterator
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def iter_postcode_grouped_batches(
|
||||
addresses: Iterable[LandlordAssetList],
|
||||
addresses: Iterable[AssetList],
|
||||
*,
|
||||
max_batch_size: int = 500,
|
||||
) -> Iterator[list[LandlordAssetList]]:
|
||||
) -> Iterator[list[AssetList]]:
|
||||
if max_batch_size < 1:
|
||||
raise ValueError("max_batch_size must be >= 1")
|
||||
|
||||
groups = _group_by_postcode_in_order(addresses)
|
||||
|
||||
buffer: list[LandlordAssetList] = []
|
||||
buffer: list[AssetList] = []
|
||||
for group in groups.values():
|
||||
group_len = len(group)
|
||||
|
||||
|
|
@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
|
|||
|
||||
|
||||
def _group_by_postcode_in_order(
|
||||
addresses: Iterable[LandlordAssetList],
|
||||
) -> dict[Postcode, list[LandlordAssetList]]:
|
||||
groups: dict[Postcode, list[LandlordAssetList]] = {}
|
||||
addresses: Iterable[AssetList],
|
||||
) -> dict[Postcode, list[AssetList]]:
|
||||
groups: dict[Postcode, list[AssetList]] = {}
|
||||
for address in addresses:
|
||||
groups.setdefault(address.postcode, []).append(address)
|
||||
return groups
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from repositories.user_address.user_address_repository import UserAddressRepository
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
|
||||
|
||||
class LandlordDescriptionOverridesOrchestrator:
|
||||
|
|
@ -9,15 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
|
|||
def get_user_address(
|
||||
self,
|
||||
input_s3_uri: str,
|
||||
) -> list[LandlordAssetList]:
|
||||
) -> list[AssetList]:
|
||||
return self._user_address_repo.load_batch(input_s3_uri)
|
||||
|
||||
def get_col_to_description_mappings(
|
||||
self, list_of_user_address: list[LandlordAssetList]
|
||||
self, list_of_user_address: list[AssetList]
|
||||
) -> dict[str, set[str]]:
|
||||
mappings: dict[str, set[str]] = {}
|
||||
for user_address in list_of_user_address:
|
||||
for key, value in user_address.landlord_additional_info.items():
|
||||
for key, value in user_address.additional_info.items():
|
||||
# Lower-case so case-only typos collapse to one variant.
|
||||
mappings.setdefault(key, set()).add(value.lower())
|
||||
return mappings
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import uuid
|
|||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.user_address.user_address_repository import UserAddressRepository
|
||||
|
|
@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
self._csv_client = csv_client
|
||||
self._bucket = bucket
|
||||
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
|
||||
def load_batch(self, s3_uri: str) -> list[AssetList]:
|
||||
rows = self._csv_client.read_rows(s3_uri)
|
||||
if rows and _POSTCODE_COLUMN not in rows[0]:
|
||||
raise ValueError(
|
||||
f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
|
||||
f"columns present: {sorted(rows[0])}"
|
||||
)
|
||||
addresses: list[LandlordAssetList] = []
|
||||
addresses: list[AssetList] = []
|
||||
for row in rows:
|
||||
parts = [
|
||||
row[col].strip()
|
||||
|
|
@ -39,19 +39,19 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
|
||||
internal_reference: Optional[str] = raw_ref or None
|
||||
addresses.append(
|
||||
LandlordAssetList(
|
||||
user_address=user_address,
|
||||
AssetList(
|
||||
address=user_address,
|
||||
postcode=Postcode(postcode),
|
||||
internal_reference=internal_reference,
|
||||
landlord_additional_info=row,
|
||||
org_reference=internal_reference,
|
||||
additional_info=row,
|
||||
)
|
||||
)
|
||||
return addresses
|
||||
|
||||
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
|
||||
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
|
||||
rows: list[dict[str, str]] = [
|
||||
{
|
||||
**addr.landlord_additional_info,
|
||||
**addr.additional_info,
|
||||
_POSTCODE_CLEAN_COLUMN: str(addr.postcode),
|
||||
}
|
||||
for addr in addresses
|
||||
|
|
|
|||
|
|
@ -2,14 +2,12 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
|
||||
|
||||
class UserAddressRepository(ABC):
|
||||
@abstractmethod
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
|
||||
def load_batch(self, s3_uri: str) -> list[AssetList]: ...
|
||||
|
||||
@abstractmethod
|
||||
def save_batch(
|
||||
self, addresses: list[LandlordAssetList], path_prefix: str
|
||||
) -> str: ...
|
||||
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: ...
|
||||
|
|
|
|||
|
|
@ -1,15 +1,13 @@
|
|||
import pytest
|
||||
|
||||
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
|
||||
def _addrs(postcode: str, n: int) -> list[AssetList]:
|
||||
return [
|
||||
LandlordAssetList(
|
||||
user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
|
||||
)
|
||||
AssetList(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@ import dataclasses
|
|||
|
||||
import pytest
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def test_user_address_holds_postcode_value_object() -> None:
|
||||
# act
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
addr = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
# assert
|
||||
assert addr.postcode == Postcode("SW1A1AA")
|
||||
|
||||
|
|
@ -17,34 +17,32 @@ def test_user_address_preserves_user_address_verbatim() -> None:
|
|||
# The free-text user_address string is intentionally NOT normalised --
|
||||
# only the postcode is canonicalised, and that happens inside Postcode.
|
||||
# act
|
||||
addr = LandlordAssetList(
|
||||
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
|
||||
)
|
||||
addr = AssetList(address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.user_address == " 1 The Street "
|
||||
assert addr.address == " 1 The Street "
|
||||
|
||||
|
||||
def test_user_address_internal_reference_defaults_to_none() -> None:
|
||||
# act
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.internal_reference is None
|
||||
assert addr.org_reference is None
|
||||
|
||||
|
||||
def test_user_address_internal_reference_accepted() -> None:
|
||||
# act
|
||||
addr = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
addr = AssetList(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
internal_reference="cust-42",
|
||||
org_reference="cust-42",
|
||||
)
|
||||
# assert
|
||||
assert addr.internal_reference == "cust-42"
|
||||
assert addr.org_reference == "cust-42"
|
||||
|
||||
|
||||
def test_user_address_is_frozen() -> None:
|
||||
# arrange
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
with pytest.raises(dataclasses.FrozenInstanceError):
|
||||
addr.postcode = Postcode("OTHER") # type: ignore[misc]
|
||||
|
|
@ -54,45 +52,45 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
|
|||
# Postcode sanitises eagerly, so addresses built from different surface
|
||||
# forms of the same postcode compare equal.
|
||||
# arrange
|
||||
a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
a = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_user_address_source_row_defaults_to_empty_dict() -> None:
|
||||
# act
|
||||
addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.landlord_additional_info == {}
|
||||
assert addr.additional_info == {}
|
||||
|
||||
|
||||
def test_user_address_carries_source_row() -> None:
|
||||
# arrange
|
||||
row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
|
||||
# act
|
||||
addr = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
addr = AssetList(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info=row,
|
||||
additional_info=row,
|
||||
)
|
||||
# assert
|
||||
assert addr.landlord_additional_info == row
|
||||
assert addr.additional_info == row
|
||||
|
||||
|
||||
def test_user_address_equality_ignores_source_row() -> None:
|
||||
# source_row is excluded from equality (and hashing): identity stays
|
||||
# defined by the parsed fields.
|
||||
# arrange
|
||||
a = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
a = AssetList(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
landlord_additional_info={"x": "1"},
|
||||
additional_info={"x": "1"},
|
||||
)
|
||||
b = LandlordAssetList(
|
||||
user_address="1 The Street",
|
||||
b = AssetList(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
landlord_additional_info={"y": "2"},
|
||||
additional_info={"y": "2"},
|
||||
)
|
||||
# act / assert
|
||||
assert a == b
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
from orchestration.landlord_description_overrides_orchestrator import (
|
||||
LandlordDescriptionOverridesOrchestrator,
|
||||
|
|
@ -11,18 +11,18 @@ from repositories.user_address.user_address_repository import UserAddressReposit
|
|||
class _StubUserAddressRepository(UserAddressRepository):
|
||||
"""``get_col_to_description_mappings`` never touches the repo."""
|
||||
|
||||
def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
|
||||
def load_batch(self, s3_uri: str) -> list[AssetList]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
|
||||
def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
|
||||
return LandlordAssetList(
|
||||
user_address="1 High St",
|
||||
def _make_user_address(landlord_additional_info: dict[str, str]) -> AssetList:
|
||||
return AssetList(
|
||||
address="1 High St",
|
||||
postcode=Postcode("AA1 1AA"),
|
||||
landlord_additional_info=landlord_additional_info,
|
||||
additional_info=landlord_additional_info,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from collections.abc import Iterator
|
|||
import pytest
|
||||
from moto import mock_aws
|
||||
|
||||
from domain.addresses.user_address import LandlordAssetList
|
||||
from domain.addresses.user_address import AssetList
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
|
|
@ -50,9 +50,9 @@ def test_load_batch_parses_address_postcode_and_reference(
|
|||
# assert
|
||||
assert len(addresses) == 1
|
||||
address = addresses[0]
|
||||
assert address.user_address == "1 High Street, Flat 2, Townville"
|
||||
assert address.address == "1 High Street, Flat 2, Townville"
|
||||
assert address.postcode == Postcode("SW1A1AA")
|
||||
assert address.internal_reference == "REF-001"
|
||||
assert address.org_reference == "REF-001"
|
||||
|
||||
|
||||
def test_load_batch_uses_only_address_1_when_others_missing(
|
||||
|
|
@ -75,9 +75,9 @@ def test_load_batch_uses_only_address_1_when_others_missing(
|
|||
|
||||
# assert
|
||||
assert len(addresses) == 1
|
||||
assert addresses[0].user_address == "10 Cardiff Road"
|
||||
assert addresses[0].address == "10 Cardiff Road"
|
||||
assert addresses[0].postcode == Postcode("CF101AA")
|
||||
assert addresses[0].internal_reference == "REF-002"
|
||||
assert addresses[0].org_reference == "REF-002"
|
||||
|
||||
|
||||
def test_load_batch_handles_missing_internal_reference(
|
||||
|
|
@ -100,9 +100,9 @@ def test_load_batch_handles_missing_internal_reference(
|
|||
|
||||
# assert
|
||||
assert len(addresses) == 1
|
||||
assert addresses[0].user_address == "5 Park Lane"
|
||||
assert addresses[0].address == "5 Park Lane"
|
||||
assert addresses[0].postcode == Postcode("M11AA")
|
||||
assert addresses[0].internal_reference is None
|
||||
assert addresses[0].org_reference is None
|
||||
|
||||
|
||||
def test_load_batch_captures_full_source_row(
|
||||
|
|
@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
|
|||
addresses = repo.load_batch(uri)
|
||||
|
||||
# assert
|
||||
assert addresses[0].landlord_additional_info == row
|
||||
assert addresses[0].additional_info == row
|
||||
|
||||
|
||||
def test_load_batch_raises_when_postcode_column_absent(
|
||||
|
|
@ -173,10 +173,10 @@ def test_save_batch_returns_uri_under_path_prefix(
|
|||
) -> None:
|
||||
# arrange
|
||||
addresses = [
|
||||
LandlordAssetList(
|
||||
user_address="1 High Street",
|
||||
AssetList(
|
||||
address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info={
|
||||
additional_info={
|
||||
"Address 1": "1 High Street",
|
||||
"postcode": "SW1A 1AA",
|
||||
},
|
||||
|
|
@ -229,10 +229,10 @@ def test_save_batch_uses_unique_filename_per_call(
|
|||
) -> None:
|
||||
# arrange
|
||||
addresses = [
|
||||
LandlordAssetList(
|
||||
user_address="1 High Street",
|
||||
AssetList(
|
||||
address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
landlord_additional_info={
|
||||
additional_info={
|
||||
"Address 1": "1 High Street",
|
||||
"postcode": "SW1A 1AA",
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue