mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
136 lines
5.4 KiB
Python
136 lines
5.4 KiB
Python
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
|
|
from etl.surveyPrice.surveyPrice import SurveyPrice
|
|
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
|
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
|
|
from etl.db.db import get_db_session, init_db
|
|
import pandas as pd
|
|
from etl.db.db import get_db_session, init_db
|
|
from etl.utils.utils import get_sharepoint_path
|
|
|
|
class HubspotTodb():
|
|
def __init__(self):
|
|
init_db()
|
|
self.hubspot = HubSpotClient()
|
|
self.deals_in_hubspot = None
|
|
self.data_in_sharepoint = []
|
|
self.sp = SurveyPrice()
|
|
|
|
def get_all_deals(self):
|
|
self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
|
|
return self.deals_in_hubspot
|
|
|
|
|
|
def get_sharepoint_scraper(self, installer):
|
|
sp = None
|
|
if installer.upper() == "J & J CRUMP":
|
|
sp = SharePointScraper(SharePointInstaller.JJC)
|
|
elif installer.upper() == "SCIS":
|
|
sp = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION)
|
|
elif installer.upper() == "SGEC":
|
|
sp = SharePointScraper(SharePointInstaller.SGEC)
|
|
else:
|
|
sp = None
|
|
|
|
return sp
|
|
|
|
def create_files_locally(self, sp, path, address):
|
|
address_paths = {}
|
|
file_names_to_download = {}
|
|
avoid = [".jpg",".mov", ".JPG", ".heic", ".HEIC", ".png", ".PNG", ".jpeg", ".JPEG", ".mov", ".MOV", ".mp4", ".MP4"]
|
|
|
|
|
|
microsoft_graph_data = sp.get_folders_in_path(path)
|
|
for file in microsoft_graph_data['value']:
|
|
if 'file' in file:
|
|
if any(file["name"].endswith(ext) for ext in avoid):
|
|
continue
|
|
file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})
|
|
|
|
each_file = []
|
|
for file_name, url in file_names_to_download.items():
|
|
content = sp.get_file_content(url)
|
|
file_path = sp.create_temp_file(content, f"{address}/{file_name}")
|
|
each_file.append(file_path)
|
|
address_paths.update({address: each_file})
|
|
return address_paths
|
|
|
|
def string_to_installer(self, installer):
|
|
if installer.upper() == "J & J CRUMP":
|
|
return SharePointInstaller.JJC
|
|
elif installer.upper() == "SCIS":
|
|
return SharePointInstaller.SOUTH_COAST_INSULATION
|
|
elif installer.upper() == "SGEC":
|
|
return SharePointInstaller.SGEC
|
|
else:
|
|
return None
|
|
|
|
def work_out_invoice(self, row):
|
|
survey = self.gather_data_from_sharepoint_url(row)
|
|
installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
|
|
survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
|
|
hubspot_data = pd.DataFrame([row])
|
|
merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
|
|
return self.sp.calculate_all_price(merged_df)
|
|
|
|
|
|
|
|
# self.sp.calculate_one_price_with_sharepoint_url(row, )
|
|
|
|
def gather_data_from_sharepoint_url(self, row):
|
|
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
|
|
path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
|
|
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
|
|
|
|
for add, file_loc in data_loc.items():
|
|
sdp = surveyedDataProcessor(add, file_loc)
|
|
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
|
|
with get_db_session() as session:
|
|
self.load_one_pre_site_note(session, sdp, row)
|
|
return sdp
|
|
|
|
|
|
def gather_data_from_each_sharepoint(self):
|
|
self.get_all_deals()
|
|
for _, row in self.deals_in_hubspot.iterrows():
|
|
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
|
|
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
|
|
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
|
|
|
|
for add, file_loc in data_loc.items():
|
|
sdp = surveyedDataProcessor(add, file_loc)
|
|
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
|
|
self.data_in_sharepoint.append(sdp)
|
|
|
|
|
|
def load_all(self, fast=False):
|
|
if fast is False:
|
|
self.gather_data_from_each_sharepoint()
|
|
with get_db_session() as session:
|
|
self.load_all_pre_site_note(session)
|
|
session.commit()
|
|
|
|
def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data):
|
|
df = hubspot_data
|
|
assessor = surveyedData.load_assessor_table(db_session)
|
|
|
|
# Loads the pre site summary information
|
|
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
|
|
|
|
property_description = surveyedData.load_property_description(db_session)
|
|
|
|
# Creates the a final pre site note table that links all information
|
|
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
|
|
|
|
building_table = surveyedData.create_buildings_table(
|
|
db_session,
|
|
df["HUBSPOT_LANDLORD_ID"],
|
|
df["HUBSPOT_DOMNA_ID"],
|
|
)
|
|
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
|
|
|
|
def load_all_pre_site_note(self, db_session):
|
|
# Loads all pre
|
|
for surveyedData in self.data_in_sharepoint:
|
|
self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)
|
|
|
|
|