diff --git a/etl/pdfReader/reportType.py b/etl/pdfReader/reportType.py index 6433d79..90bd5f4 100644 --- a/etl/pdfReader/reportType.py +++ b/etl/pdfReader/reportType.py @@ -2,4 +2,6 @@ from enum import Enum class ReportType(Enum): - QUIDOS_PRESITE_NOTE = 1 \ No newline at end of file + QUIDOS_PRESITE_NOTE = 1 + CHARTED_SURVEYOR_REPORT = 2 + ENERGY_PERFORMANCE_REPORT = 3 \ No newline at end of file diff --git a/etl/scis_invoice.py b/etl/scis_invoice.py index 605c86e..5d478cc 100644 --- a/etl/scis_invoice.py +++ b/etl/scis_invoice.py @@ -1,9 +1,21 @@ +from etl.scraper.scraper import SharePointScraper, SharePointInstaller +from pprint import pformat + + +def get_type_of_file(path): + pass + def main(): - print("hello world") + south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION, development=True) + file_paths = south_coast_scraper.download_file_for_each_address() + + print(pformat(file_paths)) + + for eachAddress in file_paths: + print(eachAddress) + + # Download presite notes and add it to pdf reader class if __name__ == "__main__": - """ - Hopefully a very small script to generate a CSV and any othe relevant information to work out the invoice for each work - """ main() diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index d5ccbbe..4568a5a 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -230,6 +230,7 @@ class SharePointScraper(): @ensure_housing_assosiation_is_loaded def download_file_for_each_address(self): + paths = [] for name in self.surveyor_names: if WEEK_COMMENCING in self.surveyor_to_dates_folder[name]: for house_ass in self.surveyor_to_housing_assosications[name]: @@ -242,7 +243,7 @@ class SharePointScraper(): if 'file' not in address: # Only directories allAddress.append(address['name']) - + address_paths = {} for address in allAddress: path = f"/{name}/{WEEK_COMMENCING}/{house_ass}/{address}" files_to_download_sharepoint_info = self.get_folders_in_path(path) @@ -257,11 +258,15 @@ class SharePointScraper(): if any(file["name"].endswith(ext) for ext in avoid): continue file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']}) - + each_file = [] for file_name, url in file_names_to_download.items(): self.logger.info(f"Downloading {file_name} from {url}") content = self.get_file_content(url) - self.create_temp_file(content, f"{name}/{WEEK_COMMENCING}/{house_ass}/{address}/{file_name}") + file_path = self.create_temp_file(content, f"{name}/{WEEK_COMMENCING}/{house_ass}/{address}/{file_name}") + each_file.append(file_path) + address_paths.update({address: each_file}) + paths.append(address_paths) + return paths def create_temp_file(self, content, path): # Ensure the path is under /tmp/