mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
site notes pdf to json 🟩
This commit is contained in:
parent
bc527a039f
commit
16a8363a2a
1 changed files with 8 additions and 1 deletions
|
|
@ -1,5 +1,12 @@
|
|||
from typing import List
|
||||
|
||||
import pymupdf
|
||||
|
||||
|
||||
def pdf_to_text_list(pdf_bytes: bytes) -> List[str]:
|
||||
raise NotImplementedError
|
||||
tokens: List[str] = []
|
||||
with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
|
||||
for page in doc:
|
||||
for line in page.get_text().split("\n"):
|
||||
tokens.append(line)
|
||||
return tokens
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue