From 348fc055032a1f597811abe13bd082115fbaf571 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte@domna.homes>
Date: Fri, 23 May 2025 15:58:04 +0000
Subject: [PATCH] hub spot in

---
 etl/db/hubSpotLoad.py             | 27 ++-------------------------
 etl/hubSpotClient/types.py        | 29 +++++++++++++++++++++++++++--
 etl/hubspot_to_invoice_rewrite.py |  1 +
 etl/scraper/scraper.py            |  2 +-
 etl/utils/utils.py                | 23 +++++++++++++++++++++++
 5 files changed, 54 insertions(+), 28 deletions(-)
 create mode 100644 etl/utils/utils.py

diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py
index c070e83..3840c87 100644
--- a/etl/db/hubSpotLoad.py
+++ b/etl/db/hubSpotLoad.py
@@ -5,9 +5,7 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller
 from etl.db.db import get_db_session, init_db
 import pandas as pd
 from etl.db.db import get_db_session, init_db
-
-from urllib.parse import unquote
-
+from etl.utils.utils import get_sharepoint_path
 
 class HubspotTodb():
     def __init__(self):
@@ -21,27 +19,6 @@ class HubspotTodb():
         self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
         return self.deals_in_hubspot
     
-    def get_sharepoint_path(self, url):
-        url_parts = url.split('/')
-        # Find the index of 'Forms'
-        forms_index = url_parts.index('Forms')
-        # Get the part after 'Forms'
-        after_forms = url_parts[forms_index + 1]
-        
-        # Find 'id=' and extract after it
-        if 'id=' in after_forms:
-            id_part = after_forms.split('id=')[1]
-            # Only keep the path before '&' (to ignore other parameters)
-            id_path = id_part.split('&')[0]
-            # Decode the path
-            decoded_path = unquote(id_path)
-            # Now, remove the leading '/sites/xxx/Shared Documents/' part
-            parts = decoded_path.split('Shared Documents')
-            if len(parts) > 1:
-                final_path = parts[1].strip('/')
-                return final_path
-            else:
-                return decoded_path.strip('/')
             
     def get_sharepoint_scraper(self, installer):
         sp = None
@@ -101,7 +78,7 @@ class HubspotTodb():
 
     def gather_data_from_sharepoint_url(self, row):
         sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
-        path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
+        path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
         data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
 
         for add, file_loc in data_loc.items():
diff --git a/etl/hubSpotClient/types.py b/etl/hubSpotClient/types.py
index 82c9cd3..22ff6ff 100644
--- a/etl/hubSpotClient/types.py
+++ b/etl/hubSpotClient/types.py
@@ -2,7 +2,20 @@ from sqlmodel import Field, SQLModel
 from sqlalchemy import Column
 from sqlalchemy.dialects.postgresql import UUID
 import uuid
-from pydantic import  Field, field_validator, ValidationError
+from pydantic import  Field, field_validator, ValidationError, model_validator
+from etl.utils.utils import get_sharepoint_path
+from etl.scraper.scraper import SharePointScraper, SharePointInstaller
+
+def string_to_installer(installer):
+    if installer.upper() == "J & J CRUMP":
+        return SharePointInstaller.JJC
+    elif installer.upper() == "SCIS":
+        return SharePointInstaller.SOUTH_COAST_INSULATION
+    elif installer.upper() == "SGEC":
+        return SharePointInstaller.SGEC
+    else:
+        return None
+
 
 class BaseModel(SQLModel):
     id: uuid.UUID = Field(
@@ -30,4 +43,16 @@ class SubmissionInfoFromDeal(BaseModel):
     def must_be_non_negative(cls, v):
         if v < 0:
             raise ValidationError("Must be non-negative for Post Sap Score")
-        return v
\ No newline at end of file
+        return v
+    
+    @model_validator(mode="after")
+    def check_submission_folder_path(self):
+        path = get_sharepoint_path(self.submission_folder_path)
+        installer = string_to_installer(self.installer) 
+        sp = SharePointScraper(installer)
+        files = sp.get_folders_in_path(path)
+        if "value" in files:
+            if len(files["value"]) > 0:
+                return self
+        
+        raise RuntimeError("Sharepoint URL invalid")
diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py
index c47ac4e..8bb19cc 100644
--- a/etl/hubspot_to_invoice_rewrite.py
+++ b/etl/hubspot_to_invoice_rewrite.py
@@ -48,3 +48,4 @@ sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_C
 # Commented out as i don't want to sync up hubspot_to_db just yet
 sp.move_deals_to_completed(deal_ids)
 
+#TODO what do the installers want
\ No newline at end of file
diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py
index de7704d..70a9cb5 100644
--- a/etl/scraper/scraper.py
+++ b/etl/scraper/scraper.py
@@ -23,7 +23,7 @@ class SharePointInstaller(Enum):
     # https//{tenant}.sharepoint.com/sites/{site}/_api/site/id
     SOUTH_COAST_INSULATION = os.getenv("SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID", None)
     JJC = os.getenv("JJC_SERVICE_SHAREPOINT_ID", None)
-    SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", None)
+    SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", "52018e5c-3215-4fe4-a4e3-bbf0d0aa7cd9")
     BAXTER_KELLY = os.getenv("BAXTER_KELLY_SERVICE_SHAREPOINT_ID", "6f930bf3-572d-4f91-b1ae-ec536fa319e2")
     DOMNA = os.getenv("DOMNA_SHAREPOINT_ID", "8ab64924-ccde-4b56-b0dc-4e11596446e4")
     OSMOSIS_WAVE_3 = os.getenv("OSMOSIS_SHAREPOINT_ID", "350a3b48-8311-4506-8abb-69bafc280d6f")
diff --git a/etl/utils/utils.py b/etl/utils/utils.py
new file mode 100644
index 0000000..286b645
--- /dev/null
+++ b/etl/utils/utils.py
@@ -0,0 +1,23 @@
+from urllib.parse import unquote
+
+def get_sharepoint_path(url):
+    url_parts = url.split('/')
+    # Find the index of 'Forms'
+    forms_index = url_parts.index('Forms')
+    # Get the part after 'Forms'
+    after_forms = url_parts[forms_index + 1]
+    
+    # Find 'id=' and extract after it
+    if 'id=' in after_forms:
+        id_part = after_forms.split('id=')[1]
+        # Only keep the path before '&' (to ignore other parameters)
+        id_path = id_part.split('&')[0]
+        # Decode the path
+        decoded_path = unquote(id_path)
+        # Now, remove the leading '/sites/xxx/Shared Documents/' part
+        parts = decoded_path.split('Shared Documents')
+        if len(parts) > 1:
+            final_path = parts[1].strip('/')
+            return final_path
+        else:
+            return decoded_path.strip('/')
\ No newline at end of file