survey-extraction/etl/db/hubSpotLoad.py

from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from etl.db.db import get_db_session, init_db
import pandas as pd
from etl.db.db import get_db_session, init_db
from etl.utils.utils import get_sharepoint_path

class HubspotTodb():
    def __init__(self):
        init_db()
        self.hubspot = HubSpotClient()
        self.deals_in_hubspot = None
        self.data_in_sharepoint = []
        self.sp = SurveyPrice()

    def get_all_deals(self):
        self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
        return self.deals_in_hubspot


    def get_sharepoint_scraper(self, installer):
        sp = None
        if installer.upper() == "J & J CRUMP":
            sp = SharePointScraper(SharePointInstaller.JJC)
        elif installer.upper() == "SCIS":
            sp = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION)
        elif installer.upper() == "SGEC":
            sp = SharePointScraper(SharePointInstaller.SGEC)
        else:
            sp = None

        return sp

    def create_files_locally(self, sp, path, address):
        address_paths = {}
        file_names_to_download = {}
        avoid = [".jpg",".mov", ".JPG", ".heic", ".HEIC", ".png", ".PNG", ".jpeg", ".JPEG", ".mov", ".MOV", ".mp4", ".MP4"]


        microsoft_graph_data = sp.get_folders_in_path(path)
        for file in microsoft_graph_data['value']:
            if 'file' in file:
                if any(file["name"].endswith(ext) for ext in avoid):
                    continue
                file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})

        each_file = []
        for file_name, url in file_names_to_download.items():
            content = sp.get_file_content(url)
            file_path = sp.create_temp_file(content, f"{address}/{file_name}")
            each_file.append(file_path)
        address_paths.update({address: each_file})
        return address_paths

    def string_to_installer(self, installer):
        if installer.upper() == "J & J CRUMP":
            return SharePointInstaller.JJC
        elif installer.upper() == "SCIS":
            return SharePointInstaller.SOUTH_COAST_INSULATION
        elif installer.upper() == "SGEC":
            return SharePointInstaller.SGEC
        else:
            return None

    def work_out_invoice(self, row):
        survey = self.gather_data_from_sharepoint_url(row)
        installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
        survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
        hubspot_data = pd.DataFrame([row])
        merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
        return self.sp.calculate_all_price(merged_df)


        # self.sp.calculate_one_price_with_sharepoint_url(row, )

    def gather_data_from_sharepoint_url(self, row):
        sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
        path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
        data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])

        for add, file_loc in data_loc.items():
            sdp = surveyedDataProcessor(add, file_loc)
            sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
            with  get_db_session() as session:
                self.load_one_pre_site_note(session, sdp, row)
            return sdp


    def gather_data_from_each_sharepoint(self):
        self.get_all_deals()
        for _, row in self.deals_in_hubspot.iterrows():
            sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
            path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
            data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])

            for add, file_loc in data_loc.items():
                sdp = surveyedDataProcessor(add, file_loc)
                sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
                self.data_in_sharepoint.append(sdp)


    def load_all(self, fast=False):
        if fast is False:
            self.gather_data_from_each_sharepoint()
        with get_db_session() as session:
            self.load_all_pre_site_note(session)
            session.commit()

    def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data):
        df = hubspot_data
        assessor = surveyedData.load_assessor_table(db_session)

        # Loads the pre site summary information
        summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)

        property_description = surveyedData.load_property_description(db_session)

        # Creates the a final pre site note table that links all information
        presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)

        building_table = surveyedData.create_buildings_table(
            db_session,
            df["HUBSPOT_LANDLORD_ID"],
            df["HUBSPOT_DOMNA_ID"],
        )
        documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)

    def load_all_pre_site_note(self, db_session):
        # Loads all pre
        for surveyedData in self.data_in_sharepoint:
            self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)