made it into a prety list

This commit is contained in:
Jun-te Kim 2025-03-04 12:32:23 +00:00
parent e5ea08fa0c
commit d25d699000

View file

@ -7,13 +7,16 @@ class pdfReaderToText():
def __init__(self, file_path):
self.source_path = file_path
self.logger = Logger(name='pdfReader', level=logging.DEBUG).get_logger()
self.text = ""
self.all_text = ""
def get_text_from_pdf_file(self):
self.logger.debug(f"Extrating text from {self.source_path}")
pdf = pymupdf.open(self.source_path)
for page in pdf:
text = page.get_text().encode("utf8")
self.logger('###')
self.logger.info(text)
text = page.get_text()
self.all_text += text
from pprint import pprint
pprint(self.all_text.split('\n'))