diff --git a/etl/pdfReader/pdfReaderToText.py b/etl/pdfReader/pdfReaderToText.py index 16adee0..350b31a 100644 --- a/etl/pdfReader/pdfReaderToText.py +++ b/etl/pdfReader/pdfReaderToText.py @@ -31,10 +31,9 @@ class pdfReaderToText(): if len(self.text_list) > 1: if "Quidos Ltd using Argyle software BRE approved calculator".lower() in self.text_list[0].lower(): self.type = ReportType.QUIDOS_PRESITE_NOTE - return self.type else: - return None - # raise NotImplementedError("New type of file - please contact Jun-te Kim") + pass + return self.type def get_reader(self): self.get_file_type() diff --git a/etl/scis_invoice.py b/etl/scis_invoice.py index 5d478cc..dd91364 100644 --- a/etl/scis_invoice.py +++ b/etl/scis_invoice.py @@ -1,5 +1,7 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller from pprint import pformat +from etl.pdfReader.pdfReaderToText import pdfReaderToText +from etl.surveyedData.surveryedData import surveyedDataProcessor def get_type_of_file(path): @@ -9,12 +11,8 @@ def main(): south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION, development=True) file_paths = south_coast_scraper.download_file_for_each_address() - print(pformat(file_paths)) - for eachAddress in file_paths: - print(eachAddress) - - # Download presite notes and add it to pdf reader class + survey = surveyedDataProcessor(eachAddress) if __name__ == "__main__": diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index 4568a5a..17ad03f 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -243,9 +243,9 @@ class SharePointScraper(): if 'file' not in address: # Only directories allAddress.append(address['name']) - address_paths = {} - for address in allAddress: + for i, address in enumerate(allAddress): path = f"/{name}/{WEEK_COMMENCING}/{house_ass}/{address}" + address_paths = {} files_to_download_sharepoint_info = self.get_folders_in_path(path) if 'value' not in files_to_download_sharepoint_info: raise RuntimeError("Failed to get files to download") diff --git a/etl/surveyedData/__init__.py b/etl/surveyedData/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/etl/surveyedData/surveryedData.py b/etl/surveyedData/surveryedData.py new file mode 100644 index 0000000..a47bfe0 --- /dev/null +++ b/etl/surveyedData/surveryedData.py @@ -0,0 +1,6 @@ +class surveyedDataProcessor(): + def __init__(self, address_to_files): + for key, value in address_to_files.items(): + self.address = key + self.files = value + print(f"Address is {self.address}, with all files at location {self.files}")