from typing import List import pymupdf def pdf_to_text_list(pdf_bytes: bytes) -> List[str]: tokens: List[str] = [] with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc: for page in doc: for line in page.get_text().split("\n"): tokens.append(line) return tokens def pdf_to_pages(pdf_bytes: bytes) -> List[str]: with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc: return [page.get_text() for page in doc]