from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor from backend.documents_parser.pdf import pdf_to_text_list def parse_pashub_site_notes(file_path: str) -> EpcPropertyData: with open(file_path, "rb") as f: pdf_bytes = f.read() tokens = pdf_to_text_list(pdf_bytes) site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract() return EpcPropertyDataMapper.from_site_notes(site_notes)