import json import os import pytest from backend.documents_parser.pdf import pdf_to_text_list PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf") FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "pashub_site_notes_1_text.json") @pytest.fixture def pdf_bytes() -> bytes: with open(PDF_PATH, "rb") as f: return f.read() class TestPdfToTextList: def test_returns_list(self, pdf_bytes: bytes) -> None: result = pdf_to_text_list(pdf_bytes) assert isinstance(result, list) def test_all_elements_are_strings(self, pdf_bytes: bytes) -> None: result = pdf_to_text_list(pdf_bytes) assert all(isinstance(t, str) for t in result) def test_matches_fixture(self, pdf_bytes: bytes) -> None: with open(FIXTURE_PATH) as f: expected = json.load(f) result = pdf_to_text_list(pdf_bytes) assert result == expected