mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
pdf reader work
This commit is contained in:
parent
af338dd02b
commit
5059bc28e9
2 changed files with 65 additions and 10 deletions
18
etl/main.py
18
etl/main.py
|
|
@ -25,18 +25,18 @@ def main():
|
|||
# logger.info(pformat(list_of_house_ass_names))
|
||||
|
||||
# POC of downloading each file
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
south_coast_scraper.download_file_for_each_address()
|
||||
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
# south_coast_scraper.download_file_for_each_address()
|
||||
|
||||
# POC of pdf reader
|
||||
DATA_LOC = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
pdfReader = pdfReaderToText(DATA_LOC)
|
||||
siteNoteReader = pdfReader.get_reader()
|
||||
logger.warning(siteNoteReader.type)
|
||||
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
|
||||
|
||||
|
||||
|
||||
# logger.info(south_coast_scraper.surveyor_to_housing_assosications)
|
||||
pdfReader = pdfReaderToText(DATA_LOC_1)
|
||||
doc1 = pdfReader.get_reader()
|
||||
pdfReader = pdfReaderToText(DATA_LOC_2)
|
||||
doc2 = pdfReader.get_reader()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -8,4 +8,59 @@ class SiteNotes():
|
|||
class QuidosSiteNotes(SiteNotes):
|
||||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.QUIDOS_SITE_NOTE
|
||||
self.type = ReportType.QUIDOS_SITE_NOTE
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
A function to read QUIDOS SITE REPORT and get all data
|
||||
"""
|
||||
# Summary Information
|
||||
avoid = [
|
||||
"Reference Number",
|
||||
"EPC Language",
|
||||
"UPRN",
|
||||
"Postcode",
|
||||
"Region",
|
||||
"Address",
|
||||
"Town",
|
||||
"County",
|
||||
"Property Tenure",
|
||||
"Transaction Type",
|
||||
"Inspection Date",
|
||||
'Assessor’s accreditation number',
|
||||
'Assessor’s name',
|
||||
'Company name/trading name',
|
||||
'Address',
|
||||
'POST CODE',
|
||||
'Phone number',
|
||||
'Fax number',
|
||||
'E-mail address',
|
||||
'Related party disclosure',
|
||||
'Current SAP rating',
|
||||
'Potential SAP rating',
|
||||
'Current EI rating',
|
||||
'Current annual emissions',
|
||||
'Current annual energy costs',
|
||||
'Emission figures including 9.92 emission factor of 0.925',
|
||||
]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.reference_number = get_value('Reference Number')
|
||||
self.epc_language = get_value('EPC Language')
|
||||
self.UPRN = get_value('UPRN')
|
||||
self.postcode = get_value('Postcode')
|
||||
self.region = get_value('Region')
|
||||
self.address = get_value('Address')
|
||||
self.town = get_value('Town')
|
||||
self.country = get_value('County')
|
||||
self.property_tenure = get_value('Property Tenure')
|
||||
self.transaction_type = get_value('Transaction Type')
|
||||
self.inspection_date = get_value('Inspection Date')
|
||||
self.assessor_accrediation_number
|
||||
self.company_trading_name
|
||||
self.company_post_code
|
||||
self.company_fax_number
|
||||
self.company_related_party_disclosure
|
||||
self.assessor_
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue