mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
90 lines
2.3 KiB
Python
90 lines
2.3 KiB
Python
"""Tests for LocalStorage — fs-backed Storage protocol for the training pipeline.
|
|
|
|
Storage is the swap-point between local-dev (LocalStorage rooted at ./data/) and the
|
|
eventual S3-backed impl. Downstream stages (bulk_fetch, write_parquet) talk to the
|
|
Storage protocol only, not Path.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from ml_training_data.storage import LocalStorage
|
|
|
|
|
|
def test_write_bytes_then_read_bytes_returns_same_data(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
payload = b"hello world"
|
|
|
|
# Act
|
|
storage.write_bytes("greetings/hello.txt", payload)
|
|
out = storage.read_bytes("greetings/hello.txt")
|
|
|
|
# Assert
|
|
assert out == payload
|
|
|
|
|
|
def test_exists_is_false_before_write_and_true_after(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
|
|
# Act
|
|
before = storage.exists("a/b.bin")
|
|
storage.write_bytes("a/b.bin", b"x")
|
|
after = storage.exists("a/b.bin")
|
|
|
|
# Assert
|
|
assert before is False
|
|
assert after is True
|
|
|
|
|
|
def test_iter_keys_yields_every_written_key(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
storage.write_bytes("certs/a.json", b"1")
|
|
storage.write_bytes("certs/b.json", b"2")
|
|
storage.write_bytes("manifest.json", b"3")
|
|
|
|
# Act
|
|
keys = sorted(storage.iter_keys())
|
|
|
|
# Assert
|
|
assert keys == ["certs/a.json", "certs/b.json", "manifest.json"]
|
|
|
|
|
|
def test_iter_keys_filters_by_prefix(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
storage.write_bytes("certs/a.json", b"1")
|
|
storage.write_bytes("certs/b.json", b"2")
|
|
storage.write_bytes("manifest.json", b"3")
|
|
|
|
# Act
|
|
keys = sorted(storage.iter_keys(prefix="certs/"))
|
|
|
|
# Assert
|
|
assert keys == ["certs/a.json", "certs/b.json"]
|
|
|
|
|
|
def test_read_bytes_raises_filenotfound_for_missing_key(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
|
|
# Act / Assert
|
|
with pytest.raises(FileNotFoundError):
|
|
storage.read_bytes("nope.bin")
|
|
|
|
|
|
def test_open_read_returns_seekable_binary_stream(tmp_path: Path) -> None:
|
|
# Arrange
|
|
storage = LocalStorage(root=tmp_path)
|
|
storage.write_bytes("big.bin", b"abcdefghij")
|
|
|
|
# Act
|
|
with storage.open_read("big.bin") as f:
|
|
f.seek(4)
|
|
chunk = f.read(3)
|
|
|
|
# Assert
|
|
assert chunk == b"efg"
|