From b0974eb583f46f3dfeeb570417db78cd81ade6e1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 4 Dec 2025 19:34:47 +0000 Subject: [PATCH] update script --- .github/workflows/hubspot_sync.yml | 10 +++ etl/db/hubSpotLoad.py | 4 +- .../scripts/hubspot_abri_etl_first_time.py | 89 +++++-------------- .../scripts/hubspot_update_script.py | 1 + run_daily_script.sh | 2 +- 5 files changed, 38 insertions(+), 68 deletions(-) diff --git a/.github/workflows/hubspot_sync.yml b/.github/workflows/hubspot_sync.yml index 90c682d..2bec11d 100644 --- a/.github/workflows/hubspot_sync.yml +++ b/.github/workflows/hubspot_sync.yml @@ -21,6 +21,16 @@ jobs: pip install poetry poetry install --no-root + - name: Run scripts + env: + PYTHONPATH: ${{ github.workspace }} + DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }} + run: | + pwd + ls -la + poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py + + - name: Run scripts env: PYTHONPATH: ${{ github.workspace }} diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index 286aa44..dca83d5 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -11,9 +11,9 @@ class HubspotTodb: init_db() self.s3 = S3Uploader() - def new_record_to_hubspot_data(self, deal_data, company, listing): + def new_record_to_hubspot_data(self, deal_data, company, listing, hubspot_client): print("⚠️ Deprecated β€” use the new interface instead.") - return self.upsert_hubspot_deal(deal_data, company, listing) + return self.upsert_hubspot_deal(deal_data, company, listing, hubspot_client) def new_record_company(self, company_data): """Adds a new record to the hubspot_company_data table.""" diff --git a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py index 075d76f..936765a 100644 --- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py +++ b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py @@ -2,73 +2,32 @@ from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline from tqdm import tqdm from etl.db.hubSpotLoad import HubspotTodb +# get ALL deals hubspot = HubSpotClient() -hubspot.get_deal_stages() -db = HubspotTodb() -companies = [ - Companies.ABRI, - Companies.LIVEWEST, - Companies.SOUTHERN_HOUSING_GROUP, - Companies.SURESERVE +# All deals from a pipeline_id via filter +deals = hubspot.get_deal_ids_by_pipeline( + pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value, + ) + +# deals from companies we care about +valueable_deals = [ + Companies.ABRI.value, + Companies.SOUTHERN_HOUSING_GROUP.value, + Companies.SURESERVE.value, + Companies.LIVEWEST.value, ] +deals_to_add = [] -# Track all failures and summary data -all_failed_deals = [] -summary_report = {} -for company in companies: - records = db.find_all_deals_with_company_id(company.value) - - updated_count = 0 - checked_count = 0 - failed_deals = [] - - for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"): - checked_count += 1 - try: - print(f"πŸ” Working on deal {deal}") - was_up_to_date = db.update_deal(deal, hubspot) - - if not was_up_to_date: - updated_count += 1 - - except Exception as e: - failed_info = { - "company": company.name, - "deal_id": deal, - "error": str(e) - } - failed_deals.append(failed_info) - all_failed_deals.append(failed_info) - print(f"❌ Failed to update deal {deal}: {e}") - - # Store company-level summary (don’t print yet) - summary_report[company.name] = { - "checked": checked_count, - "updated": updated_count, - "up_to_date": checked_count - updated_count - len(failed_deals), - "failed": len(failed_deals), - } - -# ---- Final Summary Report ---- -print("\n" + "="*100) -print("πŸ“Š FINAL SUMMARY REPORT") -print("="*100) - -for company_name, stats in summary_report.items(): - print(f"\n🏒 {company_name}") - print(f" - Total deals checked: {stats['checked']}") - print(f" - Updated deals: {stats['updated']}") - print(f" - Up-to-date deals: {stats['up_to_date']}") - print(f" - Failed deals: {stats['failed']}") - -# ---- Global failure details ---- -if all_failed_deals: - print("\n" + "="*100) - print("⚠️ FAILED DEALS DETAILS") - print("="*100) - for f in all_failed_deals: - print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}") -else: - print("\nπŸŽ‰ No failed deals across any company!") +deal_to_companies = {} +loader = HubspotTodb() +# Get all deals we care about +for i,deal in enumerate(tqdm(deals)): + company = hubspot.from_deal_get_associated_company_id(deal) + if company in valueable_deals: + deals_to_add.append(deal) + deal_to_companies.update({deal: company}) + deal_data = hubspot.from_deal_get_info(deal_id=deal) + listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal) + loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot) \ No newline at end of file diff --git a/etl/hubSpotClient/scripts/hubspot_update_script.py b/etl/hubSpotClient/scripts/hubspot_update_script.py index 5705c23..2e1f4e3 100644 --- a/etl/hubSpotClient/scripts/hubspot_update_script.py +++ b/etl/hubSpotClient/scripts/hubspot_update_script.py @@ -10,6 +10,7 @@ companies = [ Companies.ABRI, Companies.LIVEWEST, Companies.SOUTHERN_HOUSING_GROUP, + Companies.SURESERVE, ] # Global trackers diff --git a/run_daily_script.sh b/run_daily_script.sh index 5248030..6f8e301 100644 --- a/run_daily_script.sh +++ b/run_daily_script.sh @@ -1,2 +1,2 @@ # Example of how to run python code in this environment -poetry run python etl/hubSpotClient/scripts/hubspot_company --debug +poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py --debug