mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
commit
40a1cea8bc
5 changed files with 54 additions and 28 deletions
|
|
@ -5,9 +5,7 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller
|
|||
from etl.db.db import get_db_session, init_db
|
||||
import pandas as pd
|
||||
from etl.db.db import get_db_session, init_db
|
||||
|
||||
from urllib.parse import unquote
|
||||
|
||||
from etl.utils.utils import get_sharepoint_path
|
||||
|
||||
class HubspotTodb():
|
||||
def __init__(self):
|
||||
|
|
@ -21,27 +19,6 @@ class HubspotTodb():
|
|||
self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
|
||||
return self.deals_in_hubspot
|
||||
|
||||
def get_sharepoint_path(self, url):
|
||||
url_parts = url.split('/')
|
||||
# Find the index of 'Forms'
|
||||
forms_index = url_parts.index('Forms')
|
||||
# Get the part after 'Forms'
|
||||
after_forms = url_parts[forms_index + 1]
|
||||
|
||||
# Find 'id=' and extract after it
|
||||
if 'id=' in after_forms:
|
||||
id_part = after_forms.split('id=')[1]
|
||||
# Only keep the path before '&' (to ignore other parameters)
|
||||
id_path = id_part.split('&')[0]
|
||||
# Decode the path
|
||||
decoded_path = unquote(id_path)
|
||||
# Now, remove the leading '/sites/xxx/Shared Documents/' part
|
||||
parts = decoded_path.split('Shared Documents')
|
||||
if len(parts) > 1:
|
||||
final_path = parts[1].strip('/')
|
||||
return final_path
|
||||
else:
|
||||
return decoded_path.strip('/')
|
||||
|
||||
def get_sharepoint_scraper(self, installer):
|
||||
sp = None
|
||||
|
|
@ -101,7 +78,7 @@ class HubspotTodb():
|
|||
|
||||
def gather_data_from_sharepoint_url(self, row):
|
||||
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
|
||||
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
|
||||
path = get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
|
||||
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
|
||||
|
||||
for add, file_loc in data_loc.items():
|
||||
|
|
|
|||
|
|
@ -2,7 +2,20 @@ from sqlmodel import Field, SQLModel
|
|||
from sqlalchemy import Column
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
import uuid
|
||||
from pydantic import Field, field_validator, ValidationError
|
||||
from pydantic import Field, field_validator, ValidationError, model_validator
|
||||
from etl.utils.utils import get_sharepoint_path
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
|
||||
|
||||
def string_to_installer(installer):
|
||||
if installer.upper() == "J & J CRUMP":
|
||||
return SharePointInstaller.JJC
|
||||
elif installer.upper() == "SCIS":
|
||||
return SharePointInstaller.SOUTH_COAST_INSULATION
|
||||
elif installer.upper() == "SGEC":
|
||||
return SharePointInstaller.SGEC
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
id: uuid.UUID = Field(
|
||||
|
|
@ -30,4 +43,16 @@ class SubmissionInfoFromDeal(BaseModel):
|
|||
def must_be_non_negative(cls, v):
|
||||
if v < 0:
|
||||
raise ValidationError("Must be non-negative for Post Sap Score")
|
||||
return v
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_submission_folder_path(self):
|
||||
path = get_sharepoint_path(self.submission_folder_path)
|
||||
installer = string_to_installer(self.installer)
|
||||
sp = SharePointScraper(installer)
|
||||
files = sp.get_folders_in_path(path)
|
||||
if "value" in files:
|
||||
if len(files["value"]) > 0:
|
||||
return self
|
||||
|
||||
raise RuntimeError("Sharepoint URL invalid")
|
||||
|
|
|
|||
|
|
@ -48,3 +48,4 @@ sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_C
|
|||
# Commented out as i don't want to sync up hubspot_to_db just yet
|
||||
sp.move_deals_to_completed(deal_ids)
|
||||
|
||||
#TODO what do the installers want
|
||||
|
|
@ -23,7 +23,7 @@ class SharePointInstaller(Enum):
|
|||
# https//{tenant}.sharepoint.com/sites/{site}/_api/site/id
|
||||
SOUTH_COAST_INSULATION = os.getenv("SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID", None)
|
||||
JJC = os.getenv("JJC_SERVICE_SHAREPOINT_ID", None)
|
||||
SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", None)
|
||||
SGEC = os.getenv("SGEC_SERVICE_SHAREPOINT_ID", "52018e5c-3215-4fe4-a4e3-bbf0d0aa7cd9")
|
||||
BAXTER_KELLY = os.getenv("BAXTER_KELLY_SERVICE_SHAREPOINT_ID", "6f930bf3-572d-4f91-b1ae-ec536fa319e2")
|
||||
DOMNA = os.getenv("DOMNA_SHAREPOINT_ID", "8ab64924-ccde-4b56-b0dc-4e11596446e4")
|
||||
OSMOSIS_WAVE_3 = os.getenv("OSMOSIS_SHAREPOINT_ID", "350a3b48-8311-4506-8abb-69bafc280d6f")
|
||||
|
|
|
|||
23
etl/utils/utils.py
Normal file
23
etl/utils/utils.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from urllib.parse import unquote
|
||||
|
||||
def get_sharepoint_path(url):
|
||||
url_parts = url.split('/')
|
||||
# Find the index of 'Forms'
|
||||
forms_index = url_parts.index('Forms')
|
||||
# Get the part after 'Forms'
|
||||
after_forms = url_parts[forms_index + 1]
|
||||
|
||||
# Find 'id=' and extract after it
|
||||
if 'id=' in after_forms:
|
||||
id_part = after_forms.split('id=')[1]
|
||||
# Only keep the path before '&' (to ignore other parameters)
|
||||
id_path = id_part.split('&')[0]
|
||||
# Decode the path
|
||||
decoded_path = unquote(id_path)
|
||||
# Now, remove the leading '/sites/xxx/Shared Documents/' part
|
||||
parts = decoded_path.split('Shared Documents')
|
||||
if len(parts) > 1:
|
||||
final_path = parts[1].strip('/')
|
||||
return final_path
|
||||
else:
|
||||
return decoded_path.strip('/')
|
||||
Loading…
Add table
Reference in a new issue