Model/backend/documents_parser/tests/test_pdf.py
2026-04-16 14:45:28 +00:00

31 lines
946 B
Python

import json
import os
import pytest
from backend.documents_parser.pdf import pdf_to_text_list
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json")
@pytest.fixture
def pdf_bytes() -> bytes:
with open(PDF_PATH, "rb") as f:
return f.read()
class TestPdfToTextList:
def test_returns_list(self, pdf_bytes: bytes) -> None:
result = pdf_to_text_list(pdf_bytes)
assert isinstance(result, list)
def test_all_elements_are_strings(self, pdf_bytes: bytes) -> None:
result = pdf_to_text_list(pdf_bytes)
assert all(isinstance(t, str) for t in result)
def test_matches_fixture(self, pdf_bytes: bytes) -> None:
with open(FIXTURE_PATH) as f:
expected = json.load(f)
result = pdf_to_text_list(pdf_bytes)
assert result == expected