From db61f5916811a955fe1e6f67e54d08521a05f9e1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 4 Nov 2025 14:40:33 +0000 Subject: [PATCH 1/3] save --- etl/hubSpotClient/hubspotClient.py | 62 +++++++++++++++++++ .../scripts/hubspot_update_abri_script.py | 1 + 2 files changed, 63 insertions(+) diff --git a/etl/hubSpotClient/hubspotClient.py b/etl/hubSpotClient/hubspotClient.py index 822aff8..0dc047e 100644 --- a/etl/hubSpotClient/hubspotClient.py +++ b/etl/hubSpotClient/hubspotClient.py @@ -143,3 +143,65 @@ class HubSpotClient(): company_info = company.properties return company_info + def get_all_pipelines(self): + """ + Retrieve all pipelines for deals, returning a list of dicts with pipeline names and IDs. + """ + try: + pipelines_api = self.client.crm.pipelines.pipelines_api + response = pipelines_api.get_all(object_type="deals") + + pipelines = [ + { + "name": pipeline.label, + "id": pipeline.id + } + for pipeline in response.results + ] + + self.logger.info(f"Retrieved {len(pipelines)} pipelines.") + return pipelines + + except Exception as e: + self.logger.error(f"Error retrieving pipelines: {e}") + return [] + + def get_deal_stages(self, pipeline_id=None): + """ + Retrieve all deal stages for a given pipeline. + If no pipeline_id is provided, retrieves all stages for all pipelines. + Returns a list of dicts with pipeline name, stage name, and stage ID. + """ + try: + pipelines_api = self.client.crm.pipelines.pipelines_api + response = pipelines_api.get_all(object_type="deals") + + all_stages = [] + + for pipeline in response.results: + # Skip other pipelines if a specific one is requested + if pipeline_id and pipeline.id != str(pipeline_id): + continue + + stages = [ + { + "pipeline_name": pipeline.label, + "pipeline_id": pipeline.id, + "stage_name": stage.label, + "stage_id": stage.id + } + for stage in pipeline.stages + ] + + all_stages.extend(stages) + + if not all_stages: + self.logger.info(f"No deal stages found for pipeline {pipeline_id if pipeline_id else 'ALL'}") + else: + self.logger.info(f"Retrieved {len(all_stages)} deal stages.") + + return all_stages + + except Exception as e: + self.logger.error(f"Error retrieving deal stages: {e}") + return [] \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_update_abri_script.py b/etl/hubSpotClient/scripts/hubspot_update_abri_script.py index 428fd99..3c82de9 100644 --- a/etl/hubSpotClient/scripts/hubspot_update_abri_script.py +++ b/etl/hubSpotClient/scripts/hubspot_update_abri_script.py @@ -3,6 +3,7 @@ from tqdm import tqdm from etl.db.hubSpotLoad import HubspotTodb hubspot = HubSpotClient() +hubspot.get_deal_stages() db = HubspotTodb() records = db.find_all_deals_with_company_id(Companies.ABRI.value) From a4f6bc1f62f341492cfc7f2b6d47688816447283 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 4 Nov 2025 15:15:18 +0000 Subject: [PATCH 2/3] new files --- ...hubspot_abri_sync.yml => hubspot_sync.yml} | 6 ++-- etl/db/hubSpotLoad.py | 2 +- etl/hubSpotClient/hubspotClient.py | 2 ++ .../scripts/hubspot_abri_etl_first_time.py | 5 ++- .../scripts/hubspot_update_abri_script.py | 24 -------------- .../scripts/hubspot_update_script.py | 31 +++++++++++++++++++ 6 files changed, 41 insertions(+), 29 deletions(-) rename .github/workflows/{hubspot_abri_sync.yml => hubspot_sync.yml} (91%) delete mode 100644 etl/hubSpotClient/scripts/hubspot_update_abri_script.py create mode 100644 etl/hubSpotClient/scripts/hubspot_update_script.py diff --git a/.github/workflows/hubspot_abri_sync.yml b/.github/workflows/hubspot_sync.yml similarity index 91% rename from .github/workflows/hubspot_abri_sync.yml rename to .github/workflows/hubspot_sync.yml index d2e89f9..9c4fd63 100644 --- a/.github/workflows/hubspot_abri_sync.yml +++ b/.github/workflows/hubspot_sync.yml @@ -1,4 +1,4 @@ -name: Hubspot Sync Abri +name: Hubspot Sync on: schedule: @@ -6,7 +6,7 @@ on: workflow_dispatch: jobs: - hubspot-sync-abri: + hubspot-sync: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 @@ -28,4 +28,4 @@ jobs: run: | pwd ls -la - poetry run python etl/hubSpotClient/scripts/hubspot_update_abri_script.py \ No newline at end of file + poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py \ No newline at end of file diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index 4226446..b09e0eb 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -8,7 +8,7 @@ class HubspotTodb(): def new_record_to_hubspot_data(self, deal_data, company, listing): print("This has been depreciated using new interface") - self.upsert_hubspot_deal(self, deal_data, company, listing) + self.upsert_hubspot_deal(deal_data, company, listing) def new_record_company(self, company_data): diff --git a/etl/hubSpotClient/hubspotClient.py b/etl/hubSpotClient/hubspotClient.py index 0dc047e..29a40fa 100644 --- a/etl/hubSpotClient/hubspotClient.py +++ b/etl/hubSpotClient/hubspotClient.py @@ -6,6 +6,8 @@ from hubspot.crm.associations import ApiException class Companies(Enum): ABRI = "237615001799" + SOUTHERN_HOUSING_GROUP = "109343619305" + LIVEWEST = "86205872354" class DealStage(Enum): SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914" diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py index 9dc4e4c..6f35fb5 100644 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py @@ -20,8 +20,11 @@ deals = hubspot.get_deal_ids_by_pipeline( # deals from companies we care about valueable_deals = [ - Companies.ABRI.value + Companies.ABRI.value, + Companies.LIVEWEST.value, + Companies.SOUTHERN_HOUSING_GROUP.value, ] + deals_to_add = [] diff --git a/etl/hubSpotClient/scripts/hubspot_update_abri_script.py b/etl/hubSpotClient/scripts/hubspot_update_abri_script.py deleted file mode 100644 index 3c82de9..0000000 --- a/etl/hubSpotClient/scripts/hubspot_update_abri_script.py +++ /dev/null @@ -1,24 +0,0 @@ -from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline -from tqdm import tqdm -from etl.db.hubSpotLoad import HubspotTodb - -hubspot = HubSpotClient() -hubspot.get_deal_stages() -db = HubspotTodb() - -records = db.find_all_deals_with_company_id(Companies.ABRI.value) - -updated_count = 0 # Counter for deals that needed updating -checked_count = 0 # Optional: total processed counter - -for deal in tqdm(records, desc="Checking HubSpot deals"): - checked_count += 1 - was_up_to_date = db.update_deal(deal, hubspot) - - # update_deal() returns False when discrepancies are found - if not was_up_to_date: - updated_count += 1 - -print(f"\nβœ… Finished checking {checked_count} deals.") -print(f"🧩 {updated_count} deal(s) were updated.") -print(f"πŸ“ˆ {checked_count - updated_count} deal(s) were already up to date.") \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_update_script.py b/etl/hubSpotClient/scripts/hubspot_update_script.py new file mode 100644 index 0000000..9c52d89 --- /dev/null +++ b/etl/hubSpotClient/scripts/hubspot_update_script.py @@ -0,0 +1,31 @@ +from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline +from tqdm import tqdm +from etl.db.hubSpotLoad import HubspotTodb + +hubspot = HubSpotClient() +hubspot.get_deal_stages() +db = HubspotTodb() + +companies = [ + Companies.ABRI, + Companies.LIVEWEST, + Companies.SOUTHERN_HOUSING_GROUP, +] + +for company in companies: + records = db.find_all_deals_with_company_id(company.value) + + updated_count = 0 # Counter for deals that needed updating + checked_count = 0 # Optional: total processed counter + + for deal in tqdm(records, desc="Checking HubSpot deals"): + checked_count += 1 + was_up_to_date = db.update_deal(deal, hubspot) + + # update_deal() returns False when discrepancies are found + if not was_up_to_date: + updated_count += 1 + + print(f"\nβœ… Finished checking {checked_count} deals for company {company.name}.") + print(f"🧩 {updated_count} deal(s) were updated.") + print(f"πŸ“ˆ {checked_count - updated_count} deal(s) were already up to date.") \ No newline at end of file From ce2e327510631d1a2d3c7d7d41f943c378b8a58c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 4 Nov 2025 20:15:44 +0000 Subject: [PATCH 3/3] script makes sure it runs --- .../scripts/hubspot_abri_etl_first_time.py | 95 ++++++++++++------- etl/hubSpotClient/scripts/hubspot_company.py | 2 +- .../scripts/hubspot_update_script.py | 74 +++++++++++++-- 3 files changed, 126 insertions(+), 45 deletions(-) diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py index 6f35fb5..0539f58 100644 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py @@ -2,47 +2,72 @@ from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline from tqdm import tqdm from etl.db.hubSpotLoad import HubspotTodb -''' -# TODO: - get one deal from db, from db - for avri only so far - add it to the db - show in frontend -''' - -# get ALL deals hubspot = HubSpotClient() +hubspot.get_deal_stages() +db = HubspotTodb() -# All deals from a pipeline_id via filter -deals = hubspot.get_deal_ids_by_pipeline( - pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, - ) - -# deals from companies we care about -valueable_deals = [ - Companies.ABRI.value, - Companies.LIVEWEST.value, - Companies.SOUTHERN_HOUSING_GROUP.value, +companies = [ + Companies.ABRI, + Companies.LIVEWEST, + Companies.SOUTHERN_HOUSING_GROUP, ] -deals_to_add = [] +# Track all failures and summary data +all_failed_deals = [] +summary_report = {} +for company in companies: + records = db.find_all_deals_with_company_id(company.value) -deal_to_companies = {} -loader = HubspotTodb() -# Get all deals we care about -for i,deal in enumerate(tqdm(deals)): - company = hubspot.from_deal_get_associated_company_id(deal) - if company in valueable_deals: - deals_to_add.append(deal) - deal_to_companies.update({deal: company}) - deal_data = hubspot.from_deal_get_info(deal_id=deal) - listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) - loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data) - + updated_count = 0 + checked_count = 0 + failed_deals = [] + for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"): + checked_count += 1 + try: + print(f"πŸ” Working on deal {deal}") + was_up_to_date = db.update_deal(deal, hubspot) -#TODO check if database has abri data -# make companies table -# make a scrip that updates table + if not was_up_to_date: + updated_count += 1 + except Exception as e: + failed_info = { + "company": company.name, + "deal_id": deal, + "error": str(e) + } + failed_deals.append(failed_info) + all_failed_deals.append(failed_info) + print(f"❌ Failed to update deal {deal}: {e}") + + # Store company-level summary (don’t print yet) + summary_report[company.name] = { + "checked": checked_count, + "updated": updated_count, + "up_to_date": checked_count - updated_count - len(failed_deals), + "failed": len(failed_deals), + } + +# ---- Final Summary Report ---- +print("\n" + "="*100) +print("πŸ“Š FINAL SUMMARY REPORT") +print("="*100) + +for company_name, stats in summary_report.items(): + print(f"\n🏒 {company_name}") + print(f" - Total deals checked: {stats['checked']}") + print(f" - Updated deals: {stats['updated']}") + print(f" - Up-to-date deals: {stats['up_to_date']}") + print(f" - Failed deals: {stats['failed']}") + +# ---- Global failure details ---- +if all_failed_deals: + print("\n" + "="*100) + print("⚠️ FAILED DEALS DETAILS") + print("="*100) + for f in all_failed_deals: + print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}") +else: + print("\nπŸŽ‰ No failed deals across any company!") diff --git a/etl/hubSpotClient/scripts/hubspot_company.py b/etl/hubSpotClient/scripts/hubspot_company.py index b8f8342..2e55b76 100644 --- a/etl/hubSpotClient/scripts/hubspot_company.py +++ b/etl/hubSpotClient/scripts/hubspot_company.py @@ -5,7 +5,7 @@ from etl.db.hubSpotLoad import HubspotTodb hubspot = HubSpotClient() # All deals from a pipeline_id via filter -company = hubspot.get_company_information(Companies.ABRI.value) +company = hubspot.get_company_information(Companies.SOUTHERN_HOUSING_GROUP.value) loader = HubspotTodb() loader.new_record_company(company) diff --git a/etl/hubSpotClient/scripts/hubspot_update_script.py b/etl/hubSpotClient/scripts/hubspot_update_script.py index 9c52d89..5705c23 100644 --- a/etl/hubSpotClient/scripts/hubspot_update_script.py +++ b/etl/hubSpotClient/scripts/hubspot_update_script.py @@ -12,20 +12,76 @@ companies = [ Companies.SOUTHERN_HOUSING_GROUP, ] +# Global trackers +all_failed_deals = [] +summary_report = {} + +print("\nπŸš€ Starting HubSpot deal consistency check...\n") + for company in companies: + print(f"\n🏒 Processing company: {company.name}") records = db.find_all_deals_with_company_id(company.value) - updated_count = 0 # Counter for deals that needed updating - checked_count = 0 # Optional: total processed counter + updated_count = 0 + checked_count = 0 + failed_deals = [] - for deal in tqdm(records, desc="Checking HubSpot deals"): + for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"): checked_count += 1 - was_up_to_date = db.update_deal(deal, hubspot) + try: + print(f"πŸ” Working on deal {deal}") + was_up_to_date = db.update_deal(deal, hubspot) - # update_deal() returns False when discrepancies are found - if not was_up_to_date: - updated_count += 1 + if not was_up_to_date: + updated_count += 1 + print(f"🧩 Deal {deal} was updated.") + else: + print(f"πŸ“ˆ Deal {deal} already up to date.") + except Exception as e: + failed_info = { + "company": company.name, + "deal_id": deal, + "error": str(e) + } + failed_deals.append(failed_info) + all_failed_deals.append(failed_info) + print(f"❌ Failed to update deal {deal}: {e}") + + # Store per-company summary (don’t print yet) + summary_report[company.name] = { + "checked": checked_count, + "updated": updated_count, + "failed": len(failed_deals), + "up_to_date": checked_count - updated_count - len(failed_deals), + } + + # Company-level quick summary print(f"\nβœ… Finished checking {checked_count} deals for company {company.name}.") - print(f"🧩 {updated_count} deal(s) were updated.") - print(f"πŸ“ˆ {checked_count - updated_count} deal(s) were already up to date.") \ No newline at end of file + print(f" 🧩 {updated_count} deal(s) were updated.") + print(f" πŸ“ˆ {summary_report[company.name]['up_to_date']} deal(s) were already up to date.") + print(f" ⚠️ {len(failed_deals)} deal(s) failed.\n") + +# ---- Final Summary Report ---- +print("\n" + "=" * 100) +print("πŸ“Š FINAL SUMMARY REPORT (ALL COMPANIES)") +print("=" * 100) + +for company_name, stats in summary_report.items(): + print(f"\n🏒 {company_name}") + print(f" - Total deals checked: {stats['checked']}") + print(f" - Updated deals: {stats['updated']}") + print(f" - Up-to-date deals: {stats['up_to_date']}") + print(f" - Failed deals: {stats['failed']}") + +# ---- Global Failed Deals ---- +if all_failed_deals: + print("\n" + "=" * 100) + print("⚠️ FAILED DEALS DETAILS") + print("=" * 100) + for f in all_failed_deals: + print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}") +else: + print("\nπŸŽ‰ No failed deals across any company!") + +print("\n🏁 HubSpot deal consistency check complete!\n")