added landlord orchestration

This commit is contained in:
Jun-te Kim 2026-05-21 16:32:50 +00:00
parent 4830f82b58
commit b14f98788e
6 changed files with 72 additions and 24 deletions

View file

@ -1,9 +1,45 @@
from typing import Any
import boto3
from orchestration.landlord_description_overrides_orchestrator import (
LandlordDescriptionOverridesOrchestrator,
)
from infrastructure.csv_s3_client import CsvS3Client
from repositories.user_address.user_address_csv_s3_repository import (
UserAddressCsvS3Repository,
)
def handler(
body: dict[str, Any],
context: Any,
) -> dict[str, list[str]]:
print("hello world")
s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
bucket = "retrofit-data-dev"
# boto3.client is overloaded per-service in the installed stubs; cast
# to Any so the strict-mode checker treats it as opaque.
boto3_client: Any = (
boto3.client
) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
boto_s3: Any = boto3_client("s3")
csv_client = CsvS3Client(boto_s3, bucket)
user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
orchestrator = LandlordDescriptionOverridesOrchestrator(
user_address_repo=user_address_repo,
)
list_of_user_address = orchestrator.get_user_address(input_s3_uri=s3_uri)
for each_user_address in list_of_user_address:
print(each_user_address.landlord_additional_info.keys())
break
# Read csv of user input
# get the column and unique variations of each description
# { walls: "wall variation 1", "wall varition 2"}
# Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
return {"hello world": ["hello world"]}

View file

@ -79,17 +79,17 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
data_filename = "hyde.xlsx"
sheet_name = "AddressProfilingResults"
postcode_column = "Postcode"
address1_column = "Address"
data_filename = "asset_list (8).xlsx"
sheet_name = "Standardised Asset List"
postcode_column = "postcode"
address1_column = "domna_address_1"
address1_method = None
fulladdress_column = "Postcode"
fulladdress_column = "domna_address_1"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type" # Good to include if landlord gave
landlord_property_type = "landlord_property_id" # Good to include if landlord gave
landlord_built_form = None # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
@ -468,4 +468,3 @@ def app():
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)

View file

@ -15,4 +15,6 @@ class UserAddress:
user_address: str
postcode: Postcode
internal_reference: Optional[str] = None
source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
landlord_additional_info: dict[str, str] = field(
default_factory=_empty_source_row, compare=False
)

View file

@ -43,14 +43,17 @@ class UserAddressCsvS3Repository(UserAddressRepository):
user_address=user_address,
postcode=Postcode(postcode),
internal_reference=internal_reference,
source_row=row,
landlord_additional_info=row,
)
)
return addresses
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
rows: list[dict[str, str]] = [
{**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
{
**addr.landlord_additional_info,
_POSTCODE_CLEAN_COLUMN: str(addr.postcode),
}
for addr in addresses
]

View file

@ -17,9 +17,7 @@ def test_user_address_preserves_user_address_verbatim() -> None:
# The free-text user_address string is intentionally NOT normalised --
# only the postcode is canonicalised, and that happens inside Postcode.
# act
addr = UserAddress(
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
)
addr = UserAddress(user_address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
# assert
assert addr.user_address == " 1 The Street "
@ -64,7 +62,7 @@ def test_user_address_source_row_defaults_to_empty_dict() -> None:
# act
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
# assert
assert addr.source_row == {}
assert addr.landlord_additional_info == {}
def test_user_address_carries_source_row() -> None:
@ -74,10 +72,10 @@ def test_user_address_carries_source_row() -> None:
addr = UserAddress(
user_address="1 The Street",
postcode=Postcode("SW1A 1AA"),
source_row=row,
landlord_additional_info=row,
)
# assert
assert addr.source_row == row
assert addr.landlord_additional_info == row
def test_user_address_equality_ignores_source_row() -> None:
@ -87,12 +85,12 @@ def test_user_address_equality_ignores_source_row() -> None:
a = UserAddress(
user_address="1 The Street",
postcode=Postcode("SW1A1AA"),
source_row={"x": "1"},
landlord_additional_info={"x": "1"},
)
b = UserAddress(
user_address="1 The Street",
postcode=Postcode("SW1A1AA"),
source_row={"y": "2"},
landlord_additional_info={"y": "2"},
)
# act / assert
assert a == b

View file

@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
addresses = repo.load_batch(uri)
# assert
assert addresses[0].source_row == row
assert addresses[0].landlord_additional_info == row
def test_load_batch_raises_when_postcode_column_absent(
@ -154,7 +154,9 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
# act
saved_uri = repo.save_batch(addresses, "tasks/passthrough")
saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
saved_rows = repo._csv_client.read_rows(
saved_uri
) # pyright: ignore[reportPrivateUsage]
# assert
assert len(saved_rows) == 1
@ -174,7 +176,10 @@ def test_save_batch_returns_uri_under_path_prefix(
UserAddress(
user_address="1 High Street",
postcode=Postcode("SW1A 1AA"),
source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
landlord_additional_info={
"Address 1": "1 High Street",
"postcode": "SW1A 1AA",
},
),
]
@ -207,7 +212,9 @@ def test_save_then_reload_round_trip_preserves_columns(
# act
saved_uri = repo.save_batch(addresses, "tasks/round-trip")
saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
saved_rows = repo._csv_client.read_rows(
saved_uri
) # pyright: ignore[reportPrivateUsage]
# assert
# Original columns come back verbatim; postcode_clean is the only addition.
@ -225,7 +232,10 @@ def test_save_batch_uses_unique_filename_per_call(
UserAddress(
user_address="1 High Street",
postcode=Postcode("SW1A 1AA"),
source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
landlord_additional_info={
"Address 1": "1 High Street",
"postcode": "SW1A 1AA",
},
),
]