survey-extraction/etl/db/hubSpotLoad.py
2025-05-23 15:58:04 +00:00

136 lines
5.4 KiB
Python

from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from etl.db.db import get_db_session, init_db
import pandas as pd
from etl.db.db import get_db_session, init_db
from etl.utils.utils import get_sharepoint_path
class HubspotTodb():
def __init__(self):
init_db()
self.hubspot = HubSpotClient()
self.deals_in_hubspot = None
self.data_in_sharepoint = []
self.sp = SurveyPrice()
def get_all_deals(self):
self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
return self.deals_in_hubspot
def get_sharepoint_scraper(self, installer):
sp = None
if installer.upper() == "J & J CRUMP":
sp = SharePointScraper(SharePointInstaller.JJC)
elif installer.upper() == "SCIS":
sp = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION)
elif installer.upper() == "SGEC":
sp = SharePointScraper(SharePointInstaller.SGEC)
else:
sp = None
return sp
def create_files_locally(self, sp, path, address):
address_paths = {}
file_names_to_download = {}
avoid = [".jpg",".mov", ".JPG", ".heic", ".HEIC", ".png", ".PNG", ".jpeg", ".JPEG", ".mov", ".MOV", ".mp4", ".MP4"]
microsoft_graph_data = sp.get_folders_in_path(path)
for file in microsoft_graph_data['value']:
if 'file' in file:
if any(file["name"].endswith(ext) for ext in avoid):
continue
file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})
each_file = []
for file_name, url in file_names_to_download.items():
content = sp.get_file_content(url)
file_path = sp.create_temp_file(content, f"{address}/{file_name}")
each_file.append(file_path)
address_paths.update({address: each_file})
return address_paths
def string_to_installer(self, installer):
if installer.upper() == "J & J CRUMP":
return SharePointInstaller.JJC
elif installer.upper() == "SCIS":
return SharePointInstaller.SOUTH_COAST_INSULATION
elif installer.upper() == "SGEC":
return SharePointInstaller.SGEC
else:
return None
def work_out_invoice(self, row):
survey = self.gather_data_from_sharepoint_url(row)
installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
hubspot_data = pd.DataFrame([row])
merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
return self.sp.calculate_all_price(merged_df)
# self.sp.calculate_one_price_with_sharepoint_url(row, )
def gather_data_from_sharepoint_url(self, row):
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
for add, file_loc in data_loc.items():
sdp = surveyedDataProcessor(add, file_loc)
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
with get_db_session() as session:
self.load_one_pre_site_note(session, sdp, row)
return sdp
def gather_data_from_each_sharepoint(self):
self.get_all_deals()
for _, row in self.deals_in_hubspot.iterrows():
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
for add, file_loc in data_loc.items():
sdp = surveyedDataProcessor(add, file_loc)
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
self.data_in_sharepoint.append(sdp)
def load_all(self, fast=False):
if fast is False:
self.gather_data_from_each_sharepoint()
with get_db_session() as session:
self.load_all_pre_site_note(session)
session.commit()
def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data):
df = hubspot_data
assessor = surveyedData.load_assessor_table(db_session)
# Loads the pre site summary information
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
property_description = surveyedData.load_property_description(db_session)
# Creates the a final pre site note table that links all information
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
building_table = surveyedData.create_buildings_table(
db_session,
df["HUBSPOT_LANDLORD_ID"],
df["HUBSPOT_DOMNA_ID"],
)
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
def load_all_pre_site_note(self, db_session):
# Loads all pre
for surveyedData in self.data_in_sharepoint:
self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)