diff --git a/etl/db/load.py b/etl/db/load.py index 1f4fbd4..6763a04 100644 --- a/etl/db/load.py +++ b/etl/db/load.py @@ -1,6 +1,8 @@ from etl.hubSpotClient.hubspot import HubSpotClient, DealStage from etl.surveyPrice.surveyPrice import SurveyPrice from etl.surveyedData.surveryedData import surveyedDataProcessor +from etl.scraper.scraper import SharePointScraper, SharePointInstaller + from urllib.parse import unquote @@ -36,8 +38,52 @@ class HubspotTodb(): return final_path else: return decoded_path.strip('/') + + def get_sharepoint_scraper(self, installer): + sp = None + if installer.upper() == "J & J CRUMP": + sp = SharePointScraper(SharePointInstaller.JJC) + elif installer.upper() == "SCIS": + sp = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION) + else: + sp = None + + return sp + + def create_files_locally(self, sp, path, address): + address_paths = {} + file_names_to_download = {} + avoid = [".jpg",".mov", ".JPG", ".heic", ".HEIC", ".png", ".PNG", ".jpeg", ".JPEG", ".mov", ".MOV", ".mp4", ".MP4"] + + + microsoft_graph_data = sp.get_folders_in_path(path) + for file in microsoft_graph_data['value']: + if 'file' in file: + if any(file["name"].endswith(ext) for ext in avoid): + continue + file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']}) + + each_file = [] + for file_name, url in file_names_to_download.items(): + content = sp.get_file_content(url) + file_path = sp.create_temp_file(content, f"{address}/{file_name}") + each_file.append(file_path) + address_paths.update({address: each_file}) + return address_paths def gather_data_from_each_sharepoint(self): self.get_all_deals() for _, row in self.deals_in_hubspot.iterrows(): - print(self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])) \ No newline at end of file + sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"]) + path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]) + data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"]) + + for add, file_loc in data_loc.items(): + self.data_in_sharepoint.append(surveyedDataProcessor(add, file_loc)) + + def print_address(self): + self.gather_data_from_each_sharepoint() + for dp in self.data_in_sharepoint: + print(dp.address) + + diff --git a/etl/hubspot_to_db.py b/etl/hubspot_to_db.py index 60eff81..c60be8d 100644 --- a/etl/hubspot_to_db.py +++ b/etl/hubspot_to_db.py @@ -10,4 +10,4 @@ from etl.db.load import HubspotTodb dbLoader = HubspotTodb() -dbLoader.gather_data_from_each_sharepoint() \ No newline at end of file +dbLoader.print_address() \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index ce9ed0f..957da4f 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -1,5 +1,5 @@ from etl.pdfReader.reportType import ReportType -from etl.transform.types import ( +from etl.transform.preSiteNoteTypes import ( CompanyInfo, SurverySummaryInfo, AssessorInfo, PropertyDescription, PropertyDetail, Dimension, Walls, Roofs, Floors, Door, VentilationAndCooling, diff --git a/etl/simple_load_example.py b/etl/simple_load_example.py index b8f8c64..aa36924 100644 --- a/etl/simple_load_example.py +++ b/etl/simple_load_example.py @@ -2,7 +2,7 @@ import os from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.db.db import get_db_session, init_db -from etl.transform.types import AssessorInfo +from etl.transform.preSiteNoteTypes import AssessorInfo pre_site_note_path = os.path.join(os.getcwd(), "..", "example_data", "pre_site_note.pdf") diff --git a/etl/tests/test_csr_class.py b/etl/tests/test_csr_class.py index 12ddf1c..4ec1c27 100644 --- a/etl/tests/test_csr_class.py +++ b/etl/tests/test_csr_class.py @@ -2,7 +2,7 @@ import os from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.db.db import get_db_session, init_db -from etl.transform.types import AssessorInfo +from etl.transform.preSiteNoteTypes import AssessorInfo import pytest from etl.jjc_old_lewis_manual_way_ import work_out_total_floor_area diff --git a/etl/tests/test_pre_site_note_class.py b/etl/tests/test_pre_site_note_class.py index ec89a49..21fccef 100644 --- a/etl/tests/test_pre_site_note_class.py +++ b/etl/tests/test_pre_site_note_class.py @@ -2,7 +2,7 @@ import os from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.db.db import get_db_session, init_db -from etl.transform.types import AssessorInfo +from etl.transform.preSiteNoteTypes import AssessorInfo import pytest from etl.jjc_old_lewis_manual_way_ import work_out_total_floor_area diff --git a/etl/transform/types.py b/etl/transform/preSiteNoteTypes.py similarity index 100% rename from etl/transform/types.py rename to etl/transform/preSiteNoteTypes.py