diff --git a/etl/db/load.py b/etl/db/load.py index e402073..1f4fbd4 100644 --- a/etl/db/load.py +++ b/etl/db/load.py @@ -1,12 +1,43 @@ from etl.hubSpotClient.hubspot import HubSpotClient, DealStage from etl.surveyPrice.surveyPrice import SurveyPrice +from etl.surveyedData.surveryedData import surveyedDataProcessor +from urllib.parse import unquote + class HubspotTodb(): def __init__(self): self.hubspot = HubSpotClient() self.deals_in_hubspot = None + self.data_in_sharepoint = [] def get_all_deals(self): sp = SurveyPrice() self.deals_in_hubspot = sp.get_all_surveys_from_hubspot() return self.deals_in_hubspot + + def get_sharepoint_path(self, url): + url_parts = url.split('/') + # Find the index of 'Forms' + forms_index = url_parts.index('Forms') + # Get the part after 'Forms' + after_forms = url_parts[forms_index + 1] + + # Find 'id=' and extract after it + if 'id=' in after_forms: + id_part = after_forms.split('id=')[1] + # Only keep the path before '&' (to ignore other parameters) + id_path = id_part.split('&')[0] + # Decode the path + decoded_path = unquote(id_path) + # Now, remove the leading '/sites/xxx/Shared Documents/' part + parts = decoded_path.split('Shared Documents') + if len(parts) > 1: + final_path = parts[1].strip('/') + return final_path + else: + return decoded_path.strip('/') + + def gather_data_from_each_sharepoint(self): + self.get_all_deals() + for _, row in self.deals_in_hubspot.iterrows(): + print(self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])) \ No newline at end of file diff --git a/etl/hubspot_to_db.py b/etl/hubspot_to_db.py index 626bd0c..60eff81 100644 --- a/etl/hubspot_to_db.py +++ b/etl/hubspot_to_db.py @@ -10,4 +10,4 @@ from etl.db.load import HubspotTodb dbLoader = HubspotTodb() -dbLoader.get_all_deals() \ No newline at end of file +dbLoader.gather_data_from_each_sharepoint() \ No newline at end of file