pdf reader work

This commit is contained in:
Jun-te Kim 2025-03-11 12:19:25 +00:00
parent af338dd02b
commit 5059bc28e9
2 changed files with 65 additions and 10 deletions

View file

@ -25,18 +25,18 @@ def main():
# logger.info(pformat(list_of_house_ass_names))
# POC of downloading each file
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
south_coast_scraper.download_file_for_each_address()
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
# south_coast_scraper.download_file_for_each_address()
# POC of pdf reader
DATA_LOC = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
pdfReader = pdfReaderToText(DATA_LOC)
siteNoteReader = pdfReader.get_reader()
logger.warning(siteNoteReader.type)
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
# logger.info(south_coast_scraper.surveyor_to_housing_assosications)
pdfReader = pdfReaderToText(DATA_LOC_1)
doc1 = pdfReader.get_reader()
pdfReader = pdfReaderToText(DATA_LOC_2)
doc2 = pdfReader.get_reader()
if __name__ == "__main__":

View file

@ -8,4 +8,59 @@ class SiteNotes():
class QuidosSiteNotes(SiteNotes):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.QUIDOS_SITE_NOTE
self.type = ReportType.QUIDOS_SITE_NOTE
self.setup()
def setup(self):
"""
A function to read QUIDOS SITE REPORT and get all data
"""
# Summary Information
avoid = [
"Reference Number",
"EPC Language",
"UPRN",
"Postcode",
"Region",
"Address",
"Town",
"County",
"Property Tenure",
"Transaction Type",
"Inspection Date",
'Assessors accreditation number',
'Assessors name',
'Company name/trading name',
'Address',
'POST CODE',
'Phone number',
'Fax number',
'E-mail address',
'Related party disclosure',
'Current SAP rating',
'Potential SAP rating',
'Current EI rating',
'Current annual emissions',
'Current annual energy costs',
'Emission figures including 9.92 emission factor of 0.925',
]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
self.reference_number = get_value('Reference Number')
self.epc_language = get_value('EPC Language')
self.UPRN = get_value('UPRN')
self.postcode = get_value('Postcode')
self.region = get_value('Region')
self.address = get_value('Address')
self.town = get_value('Town')
self.country = get_value('County')
self.property_tenure = get_value('Property Tenure')
self.transaction_type = get_value('Transaction Type')
self.inspection_date = get_value('Inspection Date')
self.assessor_accrediation_number
self.company_trading_name
self.company_post_code
self.company_fax_number
self.company_related_party_disclosure
self.assessor_