diff --git a/.github/workflows/hubspot_sync.yml b/.github/workflows/hubspot_sync.yml index 86d9551..7c86de3 100644 --- a/.github/workflows/hubspot_sync.yml +++ b/.github/workflows/hubspot_sync.yml @@ -21,16 +21,6 @@ jobs: pip install poetry poetry install --no-root - # - name: Run scripts - # env: - # PYTHONPATH: ${{ github.workspace }} - # DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }} - # run: | - # pwd - # ls -la - # poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py - - - name: Run scripts env: PYTHONPATH: ${{ github.workspace }} @@ -38,7 +28,17 @@ jobs: run: | pwd ls -la - poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py + poetry run python etl/hubSpotClient/scripts/hubspot_gather_all_deals.py + + + # - name: Run scripts + # env: + # PYTHONPATH: ${{ github.workspace }} + # DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }} + # run: | + # pwd + # ls -la + # poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c1..ce943bf 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,5 +15,27 @@ // "%load_ext autoreload", "%autoreload 2" // ] + "vim.enableNeovim": false, + + // Allow VSCode native keybindings to override Vim when needed + "vim.handleKeys": { + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false, + "": false + }, + + // Terminal copy/paste via Ctrl+Shift+C / Ctrl+Shift+V + "terminal.integrated.copyOnSelection": false, + "terminal.integrated.commandsToSkipShell": [ + "workbench.action.terminal.copySelection", + "workbench.action.terminal.paste" + ], } \ No newline at end of file diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index dca83d5..0c47a40 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -117,6 +117,8 @@ class HubspotTodb: Also uploads photos if present and adds S3 URL. """ with get_db_session() as session: + print("junte was here ") + print(deal_data) deal_id = deal_data.get("hs_object_id") statement = select(HubspotDealData).where(HubspotDealData.deal_id == deal_id) diff --git a/etl/hubSpotClient/hubspotClient.py b/etl/hubSpotClient/hubspotClient.py index ca26f87..591b128 100644 --- a/etl/hubSpotClient/hubspotClient.py +++ b/etl/hubSpotClient/hubspotClient.py @@ -15,6 +15,8 @@ class Companies(Enum): SOUTHERN_HOUSING_GROUP = "109343619305" LIVEWEST = "86205872354" SURESERVE = "301745289413" + HOMEGROUP = "94946071794" + APPLE = "184769046716" class DealStage(Enum): SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914" @@ -83,6 +85,32 @@ class HubSpotClient(): self.logger.error(f"Error fetching associated company for deal {deal_id}: {e}") return None + def get_deals_from_company(self, company_id: str) -> list[str]: + associations_api = self.client.crm.associations.v4.basic_api + + deal_ids = [] + after = None + + while True: + response = associations_api.get_page( + object_type="companies", + object_id=company_id, + to_object_type="deals", + limit=100, + after=after + ) + + deal_ids.extend( + assoc.to_object_id for assoc in response.results + ) + + if not response.paging or not response.paging.next: + break + + after = response.paging.next.after + + return deal_ids + def from_deal_get_associated_listing(self, deal_id: str): """ Get the associated listing information for a given deal. @@ -126,6 +154,7 @@ class HubSpotClient(): properties=[ 'dealname', 'dealstage', + 'pipeline', 'outcome', #outcome, 'outcome_notes', #outcome notes 'project_code', diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py deleted file mode 100644 index 936765a..0000000 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ /dev/null @@ -1,33 +0,0 @@ -from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline -from tqdm import tqdm -from etl.db.hubSpotLoad import HubspotTodb - -# get ALL deals -hubspot = HubSpotClient() - -# All deals from a pipeline_id via filter -deals = hubspot.get_deal_ids_by_pipeline( - pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, - ) - -# deals from companies we care about -valueable_deals = [ - Companies.ABRI.value, - Companies.SOUTHERN_HOUSING_GROUP.value, - Companies.SURESERVE.value, - Companies.LIVEWEST.value, -] -deals_to_add = [] - - -deal_to_companies = {} -loader = HubspotTodb() -# Get all deals we care about -for i,deal in enumerate(tqdm(deals)): - company = hubspot.from_deal_get_associated_company_id(deal) - if company in valueable_deals: - deals_to_add.append(deal) - deal_to_companies.update({deal: company}) - deal_data = hubspot.from_deal_get_info(deal_id=deal) - listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) - loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot) \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py b/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py new file mode 100644 index 0000000..f4826a0 --- /dev/null +++ b/etl/hubSpotClient/scripts/hubspot_gather_all_deals.py @@ -0,0 +1,46 @@ +from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline +from tqdm import tqdm +from etl.db.hubSpotLoad import HubspotTodb + +hubspot = HubSpotClient() +loader = HubspotTodb() + +PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value + +valuable_companies = [ + # Companies.HOMEGROUP.value, + # Companies.ABRI.value, + # Companies.SOUTHERN_HOUSING_GROUP.value, + # Companies.SURESERVE.value, + # Companies.LIVEWEST.value, + Companies.APPLE.value, +] + +deals_to_add = [] +deal_to_companies = {} + +for company_id in valuable_companies: + # 🔥 Cheap: company → deals + deal_ids = hubspot.get_deals_from_company(company_id) + + for deal_id in tqdm(deal_ids, desc=f"Company {company_id}"): + # Fetch minimal deal info once + deal_data = hubspot.from_deal_get_info(deal_id) + print(f"working on deal {deal_id}") + # Filter by pipeline (small local filter) + if deal_data.get("pipeline") != PIPELINE_ID: + continue + + deals_to_add.append(deal_id) + deal_to_companies[deal_id] = company_id + + listing_data = hubspot.from_deal_get_associated_listing(deal_id) + + loader.new_record_to_hubspot_data( + deal_data, + company_id, + listing_data, + hubspot + ) + + print(f"Uploaded deal_id {deal_id} to db") \ No newline at end of file