from collections.abc import Iterator import pytest from moto import mock_aws from infrastructure.s3.csv_s3_client import CsvS3Client from tests.infrastructure import make_boto_client BUCKET = "csv-bucket" @pytest.fixture def csv_client() -> Iterator[CsvS3Client]: with mock_aws(): boto_client = make_boto_client("s3") boto_client.create_bucket(Bucket=BUCKET) yield CsvS3Client(boto_client, BUCKET) def test_save_rows_returns_s3_uri(csv_client: CsvS3Client) -> None: # arrange rows = [{"address": "1 High St", "postcode": "AB1 2CD"}] # act uri = csv_client.save_rows(rows, "uploads/addresses.csv") # assert assert uri == f"s3://{BUCKET}/uploads/addresses.csv" def test_round_trip_preserves_rows(csv_client: CsvS3Client) -> None: # arrange rows = [ {"address": "1 High St", "postcode": "AB1 2CD"}, {"address": "2 Low St", "postcode": "XY9 8ZW"}, ] # act uri = csv_client.save_rows(rows, "uploads/addresses.csv") fetched = csv_client.read_rows(uri) # assert assert fetched == rows def test_save_rows_rejects_empty_list(csv_client: CsvS3Client) -> None: # act / assert with pytest.raises(ValueError, match="empty"): csv_client.save_rows([], "uploads/empty.csv") def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None: # act / assert with pytest.raises(ValueError, match="does not match client bucket"): csv_client.read_rows("s3://other-bucket/uploads/addresses.csv") def test_read_rows_indexes_duplicate_column_names(csv_client: CsvS3Client) -> None: # arrange: the Hyde export has two columns both headed "Walls" — a # description and a score. Without disambiguation csv.DictReader would # collapse them onto one key and the description would be lost. raw = "Address 1,Walls,Roofs,Walls\n1 High St,Cavity: Filled,Pitched 300mm,9.6\n" uri = csv_client.put_object("uploads/dup.csv", raw.encode("utf-8")) # act rows = csv_client.read_rows(uri) # assert: the first occurrence keeps its name, the second gets an index. assert rows == [ { "Address 1": "1 High St", "Walls": "Cavity: Filled", "Roofs": "Pitched 300mm", "Walls_1": "9.6", } ] def test_read_rows_indexes_each_repeat_of_a_column(csv_client: CsvS3Client) -> None: # arrange: three columns sharing one header. raw = "Walls,Walls,Walls\nfirst,second,third\n" uri = csv_client.put_object("uploads/triple.csv", raw.encode("utf-8")) # act rows = csv_client.read_rows(uri) # assert assert rows == [{"Walls": "first", "Walls_1": "second", "Walls_2": "third"}]