mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
from collections.abc import Iterator
|
|
|
|
import pytest
|
|
from moto import mock_aws
|
|
|
|
from infrastructure.s3.csv_s3_client import CsvS3Client
|
|
from tests.infrastructure import make_boto_client
|
|
|
|
BUCKET = "csv-bucket"
|
|
|
|
|
|
@pytest.fixture
|
|
def csv_client() -> Iterator[CsvS3Client]:
|
|
with mock_aws():
|
|
boto_client = make_boto_client("s3")
|
|
boto_client.create_bucket(Bucket=BUCKET)
|
|
yield CsvS3Client(boto_client, BUCKET)
|
|
|
|
|
|
def test_save_rows_returns_s3_uri(csv_client: CsvS3Client) -> None:
|
|
# arrange
|
|
rows = [{"address": "1 High St", "postcode": "AB1 2CD"}]
|
|
# act
|
|
uri = csv_client.save_rows(rows, "uploads/addresses.csv")
|
|
# assert
|
|
assert uri == f"s3://{BUCKET}/uploads/addresses.csv"
|
|
|
|
|
|
def test_round_trip_preserves_rows(csv_client: CsvS3Client) -> None:
|
|
# arrange
|
|
rows = [
|
|
{"address": "1 High St", "postcode": "AB1 2CD"},
|
|
{"address": "2 Low St", "postcode": "XY9 8ZW"},
|
|
]
|
|
# act
|
|
uri = csv_client.save_rows(rows, "uploads/addresses.csv")
|
|
fetched = csv_client.read_rows(uri)
|
|
# assert
|
|
assert fetched == rows
|
|
|
|
|
|
def test_save_rows_rejects_empty_list(csv_client: CsvS3Client) -> None:
|
|
# act / assert
|
|
with pytest.raises(ValueError, match="empty"):
|
|
csv_client.save_rows([], "uploads/empty.csv")
|
|
|
|
|
|
def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
|
|
# act / assert
|
|
with pytest.raises(ValueError, match="does not match client bucket"):
|
|
csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
|
|
|
|
|
|
def test_read_rows_indexes_duplicate_column_names(csv_client: CsvS3Client) -> None:
|
|
# arrange: the Hyde export has two columns both headed "Walls" — a
|
|
# description and a score. Without disambiguation csv.DictReader would
|
|
# collapse them onto one key and the description would be lost.
|
|
raw = "Address 1,Walls,Roofs,Walls\n1 High St,Cavity: Filled,Pitched 300mm,9.6\n"
|
|
uri = csv_client.put_object("uploads/dup.csv", raw.encode("utf-8"))
|
|
|
|
# act
|
|
rows = csv_client.read_rows(uri)
|
|
|
|
# assert: the first occurrence keeps its name, the second gets an index.
|
|
assert rows == [
|
|
{
|
|
"Address 1": "1 High St",
|
|
"Walls": "Cavity: Filled",
|
|
"Roofs": "Pitched 300mm",
|
|
"Walls_1": "9.6",
|
|
}
|
|
]
|
|
|
|
|
|
def test_read_rows_indexes_each_repeat_of_a_column(csv_client: CsvS3Client) -> None:
|
|
# arrange: three columns sharing one header.
|
|
raw = "Walls,Walls,Walls\nfirst,second,third\n"
|
|
uri = csv_client.put_object("uploads/triple.csv", raw.encode("utf-8"))
|
|
|
|
# act
|
|
rows = csv_client.read_rows(uri)
|
|
|
|
# assert
|
|
assert rows == [{"Walls": "first", "Walls_1": "second", "Walls_2": "third"}]
|