From 4c4b3b059c40f812564a4419de0643c0fb3fef8d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 17 Dec 2025 11:27:24 +0000 Subject: [PATCH] gather all deals --- .github/workflows/hubspot_sync.yml | 22 +++--- .../scripts/hubspot_abri_etl_first_time.py | 77 ------------------- .../scripts/hubspot_gather_all_deals.py | 45 +++++++++++ 3 files changed, 56 insertions(+), 88 deletions(-) delete mode 100644 etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py create mode 100644 etl/hubSpotClient/scripts/hubspot_gather_all_deals.py diff --git a/.github/workflows/hubspot_sync.yml b/.github/workflows/hubspot_sync.yml index ce97480..5d57a15 100644 --- a/.github/workflows/hubspot_sync.yml +++ b/.github/workflows/hubspot_sync.yml @@ -21,16 +21,6 @@ jobs: pip install poetry poetry install --no-root - # - name: Run scripts - # env: - # PYTHONPATH: ${{ github.workspace }} - # DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }} - # run: | - # pwd - # ls -la - # poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py - - - name: Run scripts env: PYTHONPATH: ${{ github.workspace }} @@ -38,6 +28,16 @@ jobs: run: | pwd ls -la - poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py + poetry run python etl/hubSpotClient/scripts/hubspot_gather_all_deals.py + + + # - name: Run scripts + # env: + # PYTHONPATH: ${{ github.workspace }} + # DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }} + # run: | + # pwd + # ls -la + # poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py deleted file mode 100644 index cd7a52b..0000000 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ /dev/null @@ -1,77 +0,0 @@ -# from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline -# from tqdm import tqdm -# from etl.db.hubSpotLoad import HubspotTodb - -# # get ALL deals -# hubspot = HubSpotClient() - -# # All deals from a pipeline_id via filter -# deals = hubspot.get_deal_ids_by_pipeline( -# pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, -# ) - -# # deals from companies we care about -# valueable_deals = [ -# # Companies.ABRI.value, -# # Companies.SOUTHERN_HOUSING_GROUP.value, -# # Companies.SURESERVE.value, -# # Companies.LIVEWEST.value, -# Companies.HOMEGROUP.value, -# ] -# deals_to_add = [] - - -# deal_to_companies = {} -# loader = HubspotTodb() -# # Get all deals we care about -# for i,deal in enumerate(tqdm(deals)): -# company = hubspot.from_deal_get_associated_company_id(deal) -# if company in valueable_deals: -# deals_to_add.append(deal) -# deal_to_companies.update({deal: company}) -# deal_data = hubspot.from_deal_get_info(deal_id=deal) -# listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) -# loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot) - - -from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline -from tqdm import tqdm -from etl.db.hubSpotLoad import HubspotTodb - -hubspot = HubSpotClient() -loader = HubspotTodb() - -PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value - -valuable_companies = [ - Companies.HOMEGROUP.value, -] - -deals_to_add = [] -deal_to_companies = {} - -for company_id in valuable_companies: - # 🔥 Cheap: company → deals - deal_ids = hubspot.get_deals_from_company(company_id) - - for deal_id in tqdm(deal_ids, desc=f"Company {company_id}"): - # Fetch minimal deal info once - deal_data = hubspot.from_deal_get_info(deal_id) - print(f"working on deal {deal_id}") - # Filter by pipeline (small local filter) - if deal_data.get("pipeline") != PIPELINE_ID: - continue - - deals_to_add.append(deal_id) - deal_to_companies[deal_id] = company_id - - listing_data = hubspot.from_deal_get_associated_listing(deal_id) - - loader.new_record_to_hubspot_data( - deal_data, - company_id, - listing_data, - hubspot - ) - - print(f"Uploaded deal_id {deal_id} to db") \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py b/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py new file mode 100644 index 0000000..eec8ae0 --- /dev/null +++ b/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py @@ -0,0 +1,45 @@ +from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline +from tqdm import tqdm +from etl.db.hubSpotLoad import HubspotTodb + +hubspot = HubSpotClient() +loader = HubspotTodb() + +PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value + +valuable_companies = [ + Companies.HOMEGROUP.value, + Companies.ABRI.value, + Companies.SOUTHERN_HOUSING_GROUP.value, + Companies.SURESERVE.value, + Companies.LIVEWEST.value, +] + +deals_to_add = [] +deal_to_companies = {} + +for company_id in valuable_companies: + # 🔥 Cheap: company → deals + deal_ids = hubspot.get_deals_from_company(company_id) + + for deal_id in tqdm(deal_ids, desc=f"Company {company_id}"): + # Fetch minimal deal info once + deal_data = hubspot.from_deal_get_info(deal_id) + print(f"working on deal {deal_id}") + # Filter by pipeline (small local filter) + if deal_data.get("pipeline") != PIPELINE_ID: + continue + + deals_to_add.append(deal_id) + deal_to_companies[deal_id] = company_id + + listing_data = hubspot.from_deal_get_associated_listing(deal_id) + + loader.new_record_to_hubspot_data( + deal_data, + company_id, + listing_data, + hubspot + ) + + print(f"Uploaded deal_id {deal_id} to db") \ No newline at end of file