mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added landlord orchestration
This commit is contained in:
parent
4830f82b58
commit
b14f98788e
6 changed files with 72 additions and 24 deletions
|
|
@ -1,9 +1,45 @@
|
|||
from typing import Any
|
||||
import boto3
|
||||
from orchestration.landlord_description_overrides_orchestrator import (
|
||||
LandlordDescriptionOverridesOrchestrator,
|
||||
)
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
UserAddressCsvS3Repository,
|
||||
)
|
||||
|
||||
|
||||
def handler(
|
||||
body: dict[str, Any],
|
||||
context: Any,
|
||||
) -> dict[str, list[str]]:
|
||||
print("hello world")
|
||||
|
||||
s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
|
||||
bucket = "retrofit-data-dev"
|
||||
|
||||
# boto3.client is overloaded per-service in the installed stubs; cast
|
||||
# to Any so the strict-mode checker treats it as opaque.
|
||||
boto3_client: Any = (
|
||||
boto3.client
|
||||
) # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
||||
boto_s3: Any = boto3_client("s3")
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, bucket)
|
||||
user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
|
||||
|
||||
orchestrator = LandlordDescriptionOverridesOrchestrator(
|
||||
user_address_repo=user_address_repo,
|
||||
)
|
||||
|
||||
list_of_user_address = orchestrator.get_user_address(input_s3_uri=s3_uri)
|
||||
|
||||
for each_user_address in list_of_user_address:
|
||||
print(each_user_address.landlord_additional_info.keys())
|
||||
break
|
||||
|
||||
# Read csv of user input
|
||||
# get the column and unique variations of each description
|
||||
# { walls: "wall variation 1", "wall varition 2"}
|
||||
# Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
|
||||
|
||||
return {"hello world": ["hello world"]}
|
||||
|
|
|
|||
|
|
@ -79,17 +79,17 @@ def app():
|
|||
"""
|
||||
|
||||
data_folder = "/workspaces/model/asset_list"
|
||||
data_filename = "hyde.xlsx"
|
||||
sheet_name = "AddressProfilingResults"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "Address"
|
||||
data_filename = "asset_list (8).xlsx"
|
||||
sheet_name = "Standardised Asset List"
|
||||
postcode_column = "postcode"
|
||||
address1_column = "domna_address_1"
|
||||
address1_method = None
|
||||
fulladdress_column = "Postcode"
|
||||
fulladdress_column = "domna_address_1"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type" # Good to include if landlord gave
|
||||
landlord_property_type = "landlord_property_id" # Good to include if landlord gave
|
||||
landlord_built_form = None # Good to include if landlord gave
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
|
|
@ -468,4 +468,3 @@ def app():
|
|||
asset_list.duplicated_addresses.to_excel(
|
||||
writer, sheet_name="Duplicate Properties", index=False
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,4 +15,6 @@ class UserAddress:
|
|||
user_address: str
|
||||
postcode: Postcode
|
||||
internal_reference: Optional[str] = None
|
||||
source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
|
||||
landlord_additional_info: dict[str, str] = field(
|
||||
default_factory=_empty_source_row, compare=False
|
||||
)
|
||||
|
|
|
|||
|
|
@ -43,14 +43,17 @@ class UserAddressCsvS3Repository(UserAddressRepository):
|
|||
user_address=user_address,
|
||||
postcode=Postcode(postcode),
|
||||
internal_reference=internal_reference,
|
||||
source_row=row,
|
||||
landlord_additional_info=row,
|
||||
)
|
||||
)
|
||||
return addresses
|
||||
|
||||
def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
|
||||
rows: list[dict[str, str]] = [
|
||||
{**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
|
||||
{
|
||||
**addr.landlord_additional_info,
|
||||
_POSTCODE_CLEAN_COLUMN: str(addr.postcode),
|
||||
}
|
||||
for addr in addresses
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -17,9 +17,7 @@ def test_user_address_preserves_user_address_verbatim() -> None:
|
|||
# The free-text user_address string is intentionally NOT normalised --
|
||||
# only the postcode is canonicalised, and that happens inside Postcode.
|
||||
# act
|
||||
addr = UserAddress(
|
||||
user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
|
||||
)
|
||||
addr = UserAddress(user_address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.user_address == " 1 The Street "
|
||||
|
||||
|
|
@ -64,7 +62,7 @@ def test_user_address_source_row_defaults_to_empty_dict() -> None:
|
|||
# act
|
||||
addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.source_row == {}
|
||||
assert addr.landlord_additional_info == {}
|
||||
|
||||
|
||||
def test_user_address_carries_source_row() -> None:
|
||||
|
|
@ -74,10 +72,10 @@ def test_user_address_carries_source_row() -> None:
|
|||
addr = UserAddress(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
source_row=row,
|
||||
landlord_additional_info=row,
|
||||
)
|
||||
# assert
|
||||
assert addr.source_row == row
|
||||
assert addr.landlord_additional_info == row
|
||||
|
||||
|
||||
def test_user_address_equality_ignores_source_row() -> None:
|
||||
|
|
@ -87,12 +85,12 @@ def test_user_address_equality_ignores_source_row() -> None:
|
|||
a = UserAddress(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
source_row={"x": "1"},
|
||||
landlord_additional_info={"x": "1"},
|
||||
)
|
||||
b = UserAddress(
|
||||
user_address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
source_row={"y": "2"},
|
||||
landlord_additional_info={"y": "2"},
|
||||
)
|
||||
# act / assert
|
||||
assert a == b
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
|
|||
addresses = repo.load_batch(uri)
|
||||
|
||||
# assert
|
||||
assert addresses[0].source_row == row
|
||||
assert addresses[0].landlord_additional_info == row
|
||||
|
||||
|
||||
def test_load_batch_raises_when_postcode_column_absent(
|
||||
|
|
@ -154,7 +154,9 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
|
|||
|
||||
# act
|
||||
saved_uri = repo.save_batch(addresses, "tasks/passthrough")
|
||||
saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
|
||||
saved_rows = repo._csv_client.read_rows(
|
||||
saved_uri
|
||||
) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
# assert
|
||||
assert len(saved_rows) == 1
|
||||
|
|
@ -174,7 +176,10 @@ def test_save_batch_returns_uri_under_path_prefix(
|
|||
UserAddress(
|
||||
user_address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
|
||||
landlord_additional_info={
|
||||
"Address 1": "1 High Street",
|
||||
"postcode": "SW1A 1AA",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
|
@ -207,7 +212,9 @@ def test_save_then_reload_round_trip_preserves_columns(
|
|||
|
||||
# act
|
||||
saved_uri = repo.save_batch(addresses, "tasks/round-trip")
|
||||
saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
|
||||
saved_rows = repo._csv_client.read_rows(
|
||||
saved_uri
|
||||
) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
# assert
|
||||
# Original columns come back verbatim; postcode_clean is the only addition.
|
||||
|
|
@ -225,7 +232,10 @@ def test_save_batch_uses_unique_filename_per_call(
|
|||
UserAddress(
|
||||
user_address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
|
||||
landlord_additional_info={
|
||||
"Address 1": "1 High Street",
|
||||
"postcode": "SW1A 1AA",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue