Merge pull request #59 from Hestia-Homes/feature/nextjs

hub spot in
2026-06-08 11:17:29 +00:00 · 2025-05-23 17:01:05 +01:00 · 2025-05-23 17:01:05 +01:00 · 40a1cea8bc
commit 40a1cea8bc
parent 70dee78a84 348fc05503
5 changed files with 54 additions and 28 deletions
--- a/etl/db/hubSpotLoad.py
+++ b/etl/db/hubSpotLoad.py
@ -5,9 +5,7 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller
 from etl.db.db import get_db_session, init_db
 import pandas as pd
 from etl.db.db import get_db_session, init_db
-
-from urllib.parse import unquote
-
+from etl.utils.utils import get_sharepoint_path

 class HubspotTodb():
    def __init__(self):
@ -21,27 +19,6 @@ class HubspotTodb():
        self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
        return self.deals_in_hubspot
    
-    def get_sharepoint_path(self, url):
-        url_parts = url.split('/')
-        # Find the index of 'Forms'
-        forms_index = url_parts.index('Forms')
-        # Get the part after 'Forms'
-        after_forms = url_parts[forms_index + 1]
-        
-        # Find 'id=' and extract after it
-        if 'id=' in after_forms:
-            id_part = after_forms.split('id=')[1]
-            # Only keep the path before '&' (to ignore other parameters)
-            id_path = id_part.split('&')[0]
-            # Decode the path
-            decoded_path = unquote(id_path)
-            # Now, remove the leading '/sites/xxx/Shared Documents/' part
-            parts = decoded_path.split('Shared Documents')
-            if len(parts) > 1:
-                final_path = parts[1].strip('/')
-                return final_path
-            else:
-                return decoded_path.strip('/')
            
    def get_sharepoint_scraper(self, installer):
        sp = None
@ -101,7 +78,7 @@ class HubspotTodb():

    def gather_data_from_sharepoint_url(self, row):
        sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
-        path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
+        path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
        data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])

        for add, file_loc in data_loc.items():
--- a/etl/hubSpotClient/types.py
+++ b/etl/hubSpotClient/types.py
@ -2,7 +2,20 @@ from sqlmodel import Field, SQLModel
 from sqlalchemy import Column
 from sqlalchemy.dialects.postgresql import UUID
 import uuid
-from pydantic import  Field, field_validator, ValidationError
+from pydantic import  Field, field_validator, ValidationError, model_validator
+from etl.utils.utils import get_sharepoint_path
+from etl.scraper.scraper import SharePointScraper, SharePointInstaller
+
+def string_to_installer(installer):
+    if installer.upper() == "J & J CRUMP":
+        return SharePointInstaller.JJC
+    elif installer.upper() == "SCIS":
+        return SharePointInstaller.SOUTH_COAST_INSULATION
+    elif installer.upper() == "SGEC":
+        return SharePointInstaller.SGEC
+    else:
+        return None
+

 class BaseModel(SQLModel):
    id: uuid.UUID = Field(
@ -30,4 +43,16 @@ class SubmissionInfoFromDeal(BaseModel):
    def must_be_non_negative(cls, v):
        if v < 0:
            raise ValidationError("Must be non-negative for Post Sap Score")
-        return v
+        return v
+    
+    @model_validator(mode="after")
+    def check_submission_folder_path(self):
+        path = get_sharepoint_path(self.submission_folder_path)
+        installer = string_to_installer(self.installer) 
+        sp = SharePointScraper(installer)
+        files = sp.get_folders_in_path(path)
+        if "value" in files:
+            if len(files["value"]) > 0:
+                return self
+        
+        raise RuntimeError("Sharepoint URL invalid")
--- a/etl/hubspot_to_invoice_rewrite.py
+++ b/etl/hubspot_to_invoice_rewrite.py
@ -48,3 +48,4 @@ sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_C
 # Commented out as i don't want to sync up hubspot_to_db just yet
 sp.move_deals_to_completed(deal_ids)

+#TODO what do the installers want
--- a/etl/scraper/scraper.py
+++ b/etl/scraper/scraper.py
@ -23,7 +23,7 @@ class SharePointInstaller(Enum):
    # https//{tenant}.sharepoint.com/sites/{site}/_api/site/id
    SOUTH_COAST_INSULATION = os.getenv("SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID", None)
    JJC = os.getenv("JJC_SERVICE_SHAREPOINT_ID", None)
-    SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", None)
+    SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", "52018e5c-3215-4fe4-a4e3-bbf0d0aa7cd9")
    BAXTER_KELLY = os.getenv("BAXTER_KELLY_SERVICE_SHAREPOINT_ID", "6f930bf3-572d-4f91-b1ae-ec536fa319e2")
    DOMNA = os.getenv("DOMNA_SHAREPOINT_ID", "8ab64924-ccde-4b56-b0dc-4e11596446e4")
    OSMOSIS_WAVE_3 = os.getenv("OSMOSIS_SHAREPOINT_ID", "350a3b48-8311-4506-8abb-69bafc280d6f")
--- a/etl/utils/utils.py
+++ b/etl/utils/utils.py
@ -0,0 +1,23 @@
+from urllib.parse import unquote
+
+def get_sharepoint_path(url):
+    url_parts = url.split('/')
+    # Find the index of 'Forms'
+    forms_index = url_parts.index('Forms')
+    # Get the part after 'Forms'
+    after_forms = url_parts[forms_index + 1]
+    
+    # Find 'id=' and extract after it
+    if 'id=' in after_forms:
+        id_part = after_forms.split('id=')[1]
+        # Only keep the path before '&' (to ignore other parameters)
+        id_path = id_part.split('&')[0]
+        # Decode the path
+        decoded_path = unquote(id_path)
+        # Now, remove the leading '/sites/xxx/Shared Documents/' part
+        parts = decoded_path.split('Shared Documents')
+        if len(parts) > 1:
+            final_path = parts[1].strip('/')
+            return final_path
+        else:
+            return decoded_path.strip('/')