mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
site notes pdf to json 🟥
This commit is contained in:
parent
326ed20015
commit
bc527a039f
3 changed files with 36 additions and 0 deletions
5
backend/documents_parser/pdf.py
Normal file
5
backend/documents_parser/pdf.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
from typing import List
|
||||
|
||||
|
||||
def pdf_to_text_list(pdf_bytes: bytes) -> List[str]:
|
||||
raise NotImplementedError
|
||||
BIN
backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf
vendored
Normal file
Binary file not shown.
31
backend/documents_parser/tests/test_pdf.py
Normal file
31
backend/documents_parser/tests/test_pdf.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.documents_parser.pdf import pdf_to_text_list
|
||||
|
||||
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
|
||||
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pdf_bytes() -> bytes:
|
||||
with open(PDF_PATH, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class TestPdfToTextList:
|
||||
def test_returns_list(self, pdf_bytes: bytes) -> None:
|
||||
result = pdf_to_text_list(pdf_bytes)
|
||||
assert isinstance(result, list)
|
||||
|
||||
def test_all_elements_are_strings(self, pdf_bytes: bytes) -> None:
|
||||
result = pdf_to_text_list(pdf_bytes)
|
||||
assert all(isinstance(t, str) for t in result)
|
||||
|
||||
def test_matches_fixture(self, pdf_bytes: bytes) -> None:
|
||||
with open(FIXTURE_PATH) as f:
|
||||
expected = json.load(f)
|
||||
result = pdf_to_text_list(pdf_bytes)
|
||||
assert result == expected
|
||||
Loading…
Add table
Reference in a new issue