From b14f98788e05b6c4964817be27fc83f35725b4e5 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 21 May 2026 16:32:50 +0000 Subject: [PATCH] added landlord orchestration --- .../landlord_description_overrides/handler.py | 38 ++++++++++++++++++- asset_list/app.py | 13 +++---- domain/addresses/user_address.py | 4 +- .../user_address_csv_s3_repository.py | 7 +++- tests/domain/addresses/test_user_address.py | 14 +++---- .../test_user_address_csv_s3_repository.py | 20 +++++++--- 6 files changed, 72 insertions(+), 24 deletions(-) diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py index f998da1d..003bd4d3 100644 --- a/applications/landlord_description_overrides/handler.py +++ b/applications/landlord_description_overrides/handler.py @@ -1,9 +1,45 @@ from typing import Any +import boto3 +from orchestration.landlord_description_overrides_orchestrator import ( + LandlordDescriptionOverridesOrchestrator, +) +from infrastructure.csv_s3_client import CsvS3Client +from repositories.user_address.user_address_csv_s3_repository import ( + UserAddressCsvS3Repository, +) def handler( body: dict[str, Any], context: Any, ) -> dict[str, list[str]]: - print("hello world") + + s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv" + bucket = "retrofit-data-dev" + + # boto3.client is overloaded per-service in the installed stubs; cast + # to Any so the strict-mode checker treats it as opaque. + boto3_client: Any = ( + boto3.client + ) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + boto_s3: Any = boto3_client("s3") + + csv_client = CsvS3Client(boto_s3, bucket) + user_address_repo = UserAddressCsvS3Repository(csv_client, bucket) + + orchestrator = LandlordDescriptionOverridesOrchestrator( + user_address_repo=user_address_repo, + ) + + list_of_user_address = orchestrator.get_user_address(input_s3_uri=s3_uri) + + for each_user_address in list_of_user_address: + print(each_user_address.landlord_additional_info.keys()) + break + + # Read csv of user input + # get the column and unique variations of each description + # { walls: "wall variation 1", "wall varition 2"} + # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped + return {"hello world": ["hello world"]} diff --git a/asset_list/app.py b/asset_list/app.py index 424f4df6..aef410e6 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -79,17 +79,17 @@ def app(): """ data_folder = "/workspaces/model/asset_list" - data_filename = "hyde.xlsx" - sheet_name = "AddressProfilingResults" - postcode_column = "Postcode" - address1_column = "Address" + data_filename = "asset_list (8).xlsx" + sheet_name = "Standardised Asset List" + postcode_column = "postcode" + address1_column = "domna_address_1" address1_method = None - fulladdress_column = "Postcode" + fulladdress_column = "domna_address_1" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Property Type" # Good to include if landlord gave + landlord_property_type = "landlord_property_id" # Good to include if landlord gave landlord_built_form = None # Good to include if landlord gave landlord_wall_construction = None landlord_roof_construction = None @@ -468,4 +468,3 @@ def app(): asset_list.duplicated_addresses.to_excel( writer, sheet_name="Duplicate Properties", index=False ) - diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py index 9a28751b..b6deb2e4 100644 --- a/domain/addresses/user_address.py +++ b/domain/addresses/user_address.py @@ -15,4 +15,6 @@ class UserAddress: user_address: str postcode: Postcode internal_reference: Optional[str] = None - source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False) + landlord_additional_info: dict[str, str] = field( + default_factory=_empty_source_row, compare=False + ) diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py index 058fd5a5..0b54d360 100644 --- a/repositories/user_address/user_address_csv_s3_repository.py +++ b/repositories/user_address/user_address_csv_s3_repository.py @@ -43,14 +43,17 @@ class UserAddressCsvS3Repository(UserAddressRepository): user_address=user_address, postcode=Postcode(postcode), internal_reference=internal_reference, - source_row=row, + landlord_additional_info=row, ) ) return addresses def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: rows: list[dict[str, str]] = [ - {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)} + { + **addr.landlord_additional_info, + _POSTCODE_CLEAN_COLUMN: str(addr.postcode), + } for addr in addresses ] diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py index 8d092df3..21e5050d 100644 --- a/tests/domain/addresses/test_user_address.py +++ b/tests/domain/addresses/test_user_address.py @@ -17,9 +17,7 @@ def test_user_address_preserves_user_address_verbatim() -> None: # The free-text user_address string is intentionally NOT normalised -- # only the postcode is canonicalised, and that happens inside Postcode. # act - addr = UserAddress( - user_address=" 1 The Street ", postcode=Postcode("SW1A1AA") - ) + addr = UserAddress(user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")) # assert assert addr.user_address == " 1 The Street " @@ -64,7 +62,7 @@ def test_user_address_source_row_defaults_to_empty_dict() -> None: # act addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA")) # assert - assert addr.source_row == {} + assert addr.landlord_additional_info == {} def test_user_address_carries_source_row() -> None: @@ -74,10 +72,10 @@ def test_user_address_carries_source_row() -> None: addr = UserAddress( user_address="1 The Street", postcode=Postcode("SW1A 1AA"), - source_row=row, + landlord_additional_info=row, ) # assert - assert addr.source_row == row + assert addr.landlord_additional_info == row def test_user_address_equality_ignores_source_row() -> None: @@ -87,12 +85,12 @@ def test_user_address_equality_ignores_source_row() -> None: a = UserAddress( user_address="1 The Street", postcode=Postcode("SW1A1AA"), - source_row={"x": "1"}, + landlord_additional_info={"x": "1"}, ) b = UserAddress( user_address="1 The Street", postcode=Postcode("SW1A1AA"), - source_row={"y": "2"}, + landlord_additional_info={"y": "2"}, ) # act / assert assert a == b diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py index 9ffb250a..0f630923 100644 --- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py +++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py @@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row( addresses = repo.load_batch(uri) # assert - assert addresses[0].source_row == row + assert addresses[0].landlord_additional_info == row def test_load_batch_raises_when_postcode_column_absent( @@ -154,7 +154,9 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean( # act saved_uri = repo.save_batch(addresses, "tasks/passthrough") - saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage] + saved_rows = repo._csv_client.read_rows( + saved_uri + ) # pyright: ignore[reportPrivateUsage] # assert assert len(saved_rows) == 1 @@ -174,7 +176,10 @@ def test_save_batch_returns_uri_under_path_prefix( UserAddress( user_address="1 High Street", postcode=Postcode("SW1A 1AA"), - source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"}, + landlord_additional_info={ + "Address 1": "1 High Street", + "postcode": "SW1A 1AA", + }, ), ] @@ -207,7 +212,9 @@ def test_save_then_reload_round_trip_preserves_columns( # act saved_uri = repo.save_batch(addresses, "tasks/round-trip") - saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage] + saved_rows = repo._csv_client.read_rows( + saved_uri + ) # pyright: ignore[reportPrivateUsage] # assert # Original columns come back verbatim; postcode_clean is the only addition. @@ -225,7 +232,10 @@ def test_save_batch_uses_unique_filename_per_call( UserAddress( user_address="1 High Street", postcode=Postcode("SW1A 1AA"), - source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"}, + landlord_additional_info={ + "Address 1": "1 High Street", + "postcode": "SW1A 1AA", + }, ), ]