Model/tests/infrastructure/test_csv_s3_client.py

84 lines
2.6 KiB
Python

from collections.abc import Iterator
import pytest
from moto import mock_aws
from infrastructure.s3.csv_s3_client import CsvS3Client
from tests.infrastructure import make_boto_client
BUCKET = "csv-bucket"
@pytest.fixture
def csv_client() -> Iterator[CsvS3Client]:
with mock_aws():
boto_client = make_boto_client("s3")
boto_client.create_bucket(Bucket=BUCKET)
yield CsvS3Client(boto_client, BUCKET)
def test_save_rows_returns_s3_uri(csv_client: CsvS3Client) -> None:
# arrange
rows = [{"address": "1 High St", "postcode": "AB1 2CD"}]
# act
uri = csv_client.save_rows(rows, "uploads/addresses.csv")
# assert
assert uri == f"s3://{BUCKET}/uploads/addresses.csv"
def test_round_trip_preserves_rows(csv_client: CsvS3Client) -> None:
# arrange
rows = [
{"address": "1 High St", "postcode": "AB1 2CD"},
{"address": "2 Low St", "postcode": "XY9 8ZW"},
]
# act
uri = csv_client.save_rows(rows, "uploads/addresses.csv")
fetched = csv_client.read_rows(uri)
# assert
assert fetched == rows
def test_save_rows_rejects_empty_list(csv_client: CsvS3Client) -> None:
# act / assert
with pytest.raises(ValueError, match="empty"):
csv_client.save_rows([], "uploads/empty.csv")
def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
# act / assert
with pytest.raises(ValueError, match="does not match client bucket"):
csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
def test_read_rows_indexes_duplicate_column_names(csv_client: CsvS3Client) -> None:
# arrange: the Hyde export has two columns both headed "Walls" — a
# description and a score. Without disambiguation csv.DictReader would
# collapse them onto one key and the description would be lost.
raw = "Address 1,Walls,Roofs,Walls\n1 High St,Cavity: Filled,Pitched 300mm,9.6\n"
uri = csv_client.put_object("uploads/dup.csv", raw.encode("utf-8"))
# act
rows = csv_client.read_rows(uri)
# assert: the first occurrence keeps its name, the second gets an index.
assert rows == [
{
"Address 1": "1 High St",
"Walls": "Cavity: Filled",
"Roofs": "Pitched 300mm",
"Walls_1": "9.6",
}
]
def test_read_rows_indexes_each_repeat_of_a_column(csv_client: CsvS3Client) -> None:
# arrange: three columns sharing one header.
raw = "Walls,Walls,Walls\nfirst,second,third\n"
uri = csv_client.put_object("uploads/triple.csv", raw.encode("utf-8"))
# act
rows = csv_client.read_rows(uri)
# assert
assert rows == [{"Walls": "first", "Walls_1": "second", "Walls_2": "third"}]