From 348fc055032a1f597811abe13bd082115fbaf571 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 23 May 2025 15:58:04 +0000 Subject: [PATCH] hub spot in --- etl/db/hubSpotLoad.py | 27 ++------------------------- etl/hubSpotClient/types.py | 29 +++++++++++++++++++++++++++-- etl/hubspot_to_invoice_rewrite.py | 1 + etl/scraper/scraper.py | 2 +- etl/utils/utils.py | 23 +++++++++++++++++++++++ 5 files changed, 54 insertions(+), 28 deletions(-) create mode 100644 etl/utils/utils.py diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index c070e83..3840c87 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -5,9 +5,7 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller from etl.db.db import get_db_session, init_db import pandas as pd from etl.db.db import get_db_session, init_db - -from urllib.parse import unquote - +from etl.utils.utils import get_sharepoint_path class HubspotTodb(): def __init__(self): @@ -21,27 +19,6 @@ class HubspotTodb(): self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot() return self.deals_in_hubspot - def get_sharepoint_path(self, url): - url_parts = url.split('/') - # Find the index of 'Forms' - forms_index = url_parts.index('Forms') - # Get the part after 'Forms' - after_forms = url_parts[forms_index + 1] - - # Find 'id=' and extract after it - if 'id=' in after_forms: - id_part = after_forms.split('id=')[1] - # Only keep the path before '&' (to ignore other parameters) - id_path = id_part.split('&')[0] - # Decode the path - decoded_path = unquote(id_path) - # Now, remove the leading '/sites/xxx/Shared Documents/' part - parts = decoded_path.split('Shared Documents') - if len(parts) > 1: - final_path = parts[1].strip('/') - return final_path - else: - return decoded_path.strip('/') def get_sharepoint_scraper(self, installer): sp = None @@ -101,7 +78,7 @@ class HubspotTodb(): def gather_data_from_sharepoint_url(self, row): sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"]) - path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]) + path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]) data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"]) for add, file_loc in data_loc.items(): diff --git a/etl/hubSpotClient/types.py b/etl/hubSpotClient/types.py index 82c9cd3..22ff6ff 100644 --- a/etl/hubSpotClient/types.py +++ b/etl/hubSpotClient/types.py @@ -2,7 +2,20 @@ from sqlmodel import Field, SQLModel from sqlalchemy import Column from sqlalchemy.dialects.postgresql import UUID import uuid -from pydantic import Field, field_validator, ValidationError +from pydantic import Field, field_validator, ValidationError, model_validator +from etl.utils.utils import get_sharepoint_path +from etl.scraper.scraper import SharePointScraper, SharePointInstaller + +def string_to_installer(installer): + if installer.upper() == "J & J CRUMP": + return SharePointInstaller.JJC + elif installer.upper() == "SCIS": + return SharePointInstaller.SOUTH_COAST_INSULATION + elif installer.upper() == "SGEC": + return SharePointInstaller.SGEC + else: + return None + class BaseModel(SQLModel): id: uuid.UUID = Field( @@ -30,4 +43,16 @@ class SubmissionInfoFromDeal(BaseModel): def must_be_non_negative(cls, v): if v < 0: raise ValidationError("Must be non-negative for Post Sap Score") - return v \ No newline at end of file + return v + + @model_validator(mode="after") + def check_submission_folder_path(self): + path = get_sharepoint_path(self.submission_folder_path) + installer = string_to_installer(self.installer) + sp = SharePointScraper(installer) + files = sp.get_folders_in_path(path) + if "value" in files: + if len(files["value"]) > 0: + return self + + raise RuntimeError("Sharepoint URL invalid") diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py index c47ac4e..8bb19cc 100644 --- a/etl/hubspot_to_invoice_rewrite.py +++ b/etl/hubspot_to_invoice_rewrite.py @@ -48,3 +48,4 @@ sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_C # Commented out as i don't want to sync up hubspot_to_db just yet sp.move_deals_to_completed(deal_ids) +#TODO what do the installers want \ No newline at end of file diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index de7704d..70a9cb5 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -23,7 +23,7 @@ class SharePointInstaller(Enum): # https//{tenant}.sharepoint.com/sites/{site}/_api/site/id SOUTH_COAST_INSULATION = os.getenv("SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID", None) JJC = os.getenv("JJC_SERVICE_SHAREPOINT_ID", None) - SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", None) + SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", "52018e5c-3215-4fe4-a4e3-bbf0d0aa7cd9") BAXTER_KELLY = os.getenv("BAXTER_KELLY_SERVICE_SHAREPOINT_ID", "6f930bf3-572d-4f91-b1ae-ec536fa319e2") DOMNA = os.getenv("DOMNA_SHAREPOINT_ID", "8ab64924-ccde-4b56-b0dc-4e11596446e4") OSMOSIS_WAVE_3 = os.getenv("OSMOSIS_SHAREPOINT_ID", "350a3b48-8311-4506-8abb-69bafc280d6f") diff --git a/etl/utils/utils.py b/etl/utils/utils.py new file mode 100644 index 0000000..286b645 --- /dev/null +++ b/etl/utils/utils.py @@ -0,0 +1,23 @@ +from urllib.parse import unquote + +def get_sharepoint_path(url): + url_parts = url.split('/') + # Find the index of 'Forms' + forms_index = url_parts.index('Forms') + # Get the part after 'Forms' + after_forms = url_parts[forms_index + 1] + + # Find 'id=' and extract after it + if 'id=' in after_forms: + id_part = after_forms.split('id=')[1] + # Only keep the path before '&' (to ignore other parameters) + id_path = id_part.split('&')[0] + # Decode the path + decoded_path = unquote(id_path) + # Now, remove the leading '/sites/xxx/Shared Documents/' part + parts = decoded_path.split('Shared Documents') + if len(parts) > 1: + final_path = parts[1].strip('/') + return final_path + else: + return decoded_path.strip('/') \ No newline at end of file