mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
made it into a prety list
This commit is contained in:
parent
e5ea08fa0c
commit
d25d699000
1 changed files with 7 additions and 4 deletions
|
|
@ -7,13 +7,16 @@ class pdfReaderToText():
|
|||
def __init__(self, file_path):
|
||||
self.source_path = file_path
|
||||
self.logger = Logger(name='pdfReader', level=logging.DEBUG).get_logger()
|
||||
self.text = ""
|
||||
self.all_text = ""
|
||||
|
||||
def get_text_from_pdf_file(self):
|
||||
self.logger.debug(f"Extrating text from {self.source_path}")
|
||||
pdf = pymupdf.open(self.source_path)
|
||||
|
||||
for page in pdf:
|
||||
text = page.get_text().encode("utf8")
|
||||
self.logger('###')
|
||||
self.logger.info(text)
|
||||
text = page.get_text()
|
||||
self.all_text += text
|
||||
|
||||
|
||||
from pprint import pprint
|
||||
pprint(self.all_text.split('\n'))
|
||||
Loading…
Add table
Reference in a new issue