From e8fa12824d175891ca088e3dd2534d22dc5938e8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 17 Dec 2025 11:25:10 +0000 Subject: [PATCH] made the script faster --- .vscode/settings.json | 22 +++++ etl/hubSpotClient/hubspotClient.py | 28 ++++++ .../scripts/hubspot_abri_etl_first_time.py | 98 ++++++++++++++----- 3 files changed, 121 insertions(+), 27 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c1..ce943bf 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,5 +15,27 @@ // "%load_ext autoreload", "%autoreload 2" // ] + "vim.enableNeovim": false, + + // Allow VSCode native keybindings to override Vim when needed + "vim.handleKeys": { + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false + }, + + // Terminal copy/paste via Ctrl+Shift+C / Ctrl+Shift+V + "terminal.integrated.copyOnSelection": false, + "terminal.integrated.commandsToSkipShell": [ + "workbench.action.terminal.copySelection", + "workbench.action.terminal.paste" + ], } \ No newline at end of file diff --git a/etl/hubSpotClient/hubspotClient.py b/etl/hubSpotClient/hubspotClient.py index ca26f87..b0d74d0 100644 --- a/etl/hubSpotClient/hubspotClient.py +++ b/etl/hubSpotClient/hubspotClient.py @@ -15,6 +15,7 @@ class Companies(Enum): SOUTHERN_HOUSING_GROUP = "109343619305" LIVEWEST = "86205872354" SURESERVE = "301745289413" + HOMEGROUP = "94946071794" class DealStage(Enum): SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914" @@ -83,6 +84,32 @@ class HubSpotClient(): self.logger.error(f"Error fetching associated company for deal {deal_id}: {e}") return None + def get_deals_from_company(self, company_id: str) -> list[str]: + associations_api = self.client.crm.associations.v4.basic_api + + deal_ids = [] + after = None + + while True: + response = associations_api.get_page( + object_type="companies", + object_id=company_id, + to_object_type="deals", + limit=100, + after=after + ) + + deal_ids.extend( + assoc.to_object_id for assoc in response.results + ) + + if not response.paging or not response.paging.next: + break + + after = response.paging.next.after + + return deal_ids + def from_deal_get_associated_listing(self, deal_id: str): """ Get the associated listing information for a given deal. @@ -126,6 +153,7 @@ class HubSpotClient(): properties=[ 'dealname', 'dealstage', + 'pipeline', 'outcome', #outcome, 'outcome_notes', #outcome notes 'project_code', diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py index 936765a..cd7a52b 100644 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py @@ -1,33 +1,77 @@ +# from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline +# from tqdm import tqdm +# from etl.db.hubSpotLoad import HubspotTodb + +# # get ALL deals +# hubspot = HubSpotClient() + +# # All deals from a pipeline_id via filter +# deals = hubspot.get_deal_ids_by_pipeline( +# pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, +# ) + +# # deals from companies we care about +# valueable_deals = [ +# # Companies.ABRI.value, +# # Companies.SOUTHERN_HOUSING_GROUP.value, +# # Companies.SURESERVE.value, +# # Companies.LIVEWEST.value, +# Companies.HOMEGROUP.value, +# ] +# deals_to_add = [] + + +# deal_to_companies = {} +# loader = HubspotTodb() +# # Get all deals we care about +# for i,deal in enumerate(tqdm(deals)): +# company = hubspot.from_deal_get_associated_company_id(deal) +# if company in valueable_deals: +# deals_to_add.append(deal) +# deal_to_companies.update({deal: company}) +# deal_data = hubspot.from_deal_get_info(deal_id=deal) +# listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) +# loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot) + + from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline from tqdm import tqdm from etl.db.hubSpotLoad import HubspotTodb -# get ALL deals hubspot = HubSpotClient() - -# All deals from a pipeline_id via filter -deals = hubspot.get_deal_ids_by_pipeline( - pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, - ) - -# deals from companies we care about -valueable_deals = [ - Companies.ABRI.value, - Companies.SOUTHERN_HOUSING_GROUP.value, - Companies.SURESERVE.value, - Companies.LIVEWEST.value, -] -deals_to_add = [] - - -deal_to_companies = {} loader = HubspotTodb() -# Get all deals we care about -for i,deal in enumerate(tqdm(deals)): - company = hubspot.from_deal_get_associated_company_id(deal) - if company in valueable_deals: - deals_to_add.append(deal) - deal_to_companies.update({deal: company}) - deal_data = hubspot.from_deal_get_info(deal_id=deal) - listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) - loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot) \ No newline at end of file + +PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value + +valuable_companies = [ + Companies.HOMEGROUP.value, +] + +deals_to_add = [] +deal_to_companies = {} + +for company_id in valuable_companies: + # 🔥 Cheap: company → deals + deal_ids = hubspot.get_deals_from_company(company_id) + + for deal_id in tqdm(deal_ids, desc=f"Company {company_id}"): + # Fetch minimal deal info once + deal_data = hubspot.from_deal_get_info(deal_id) + print(f"working on deal {deal_id}") + # Filter by pipeline (small local filter) + if deal_data.get("pipeline") != PIPELINE_ID: + continue + + deals_to_add.append(deal_id) + deal_to_companies[deal_id] = company_id + + listing_data = hubspot.from_deal_get_associated_listing(deal_id) + + loader.new_record_to_hubspot_data( + deal_data, + company_id, + listing_data, + hubspot + ) + + print(f"Uploaded deal_id {deal_id} to db") \ No newline at end of file