Merge pull request #101 from Hestia-Homes/feature/scrape_livewest_and_southern

Feature/scrape livewest and southern
2026-06-08 11:17:29 +00:00 · 2025-11-04 20:17:07 +00:00 · 2025-11-04 20:17:07 +00:00 · 4ff233dbc9
commit 4ff233dbc9
parent 39f94f9420 ce2e327510
7 changed files with 217 additions and 61 deletions
--- a/.github/workflows/hubspot_abri_sync.yml
+++ b/.github/workflows/hubspot_abri_sync.yml
@ -1,4 +1,4 @@
-name: Hubspot Sync Abri
+name: Hubspot Sync

 on:
  schedule:
@ -6,7 +6,7 @@ on:
  workflow_dispatch:

 jobs:
-  hubspot-sync-abri:
+  hubspot-sync:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
@ -28,4 +28,4 @@ jobs:
        run: |
          pwd
          ls -la
-          poetry run python etl/hubSpotClient/scripts/hubspot_update_abri_script.py
+          poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py
--- a/etl/db/hubSpotLoad.py
+++ b/etl/db/hubSpotLoad.py
@ -8,7 +8,7 @@ class HubspotTodb():

    def new_record_to_hubspot_data(self, deal_data, company, listing):
        print("This has been depreciated using new interface")
-        self.upsert_hubspot_deal(self, deal_data, company, listing) 
+        self.upsert_hubspot_deal(deal_data, company, listing) 

        
    def new_record_company(self, company_data):
--- a/etl/hubSpotClient/hubspotClient.py
+++ b/etl/hubSpotClient/hubspotClient.py
@ -6,6 +6,8 @@ from hubspot.crm.associations import ApiException

 class Companies(Enum):
    ABRI = "237615001799"
+    SOUTHERN_HOUSING_GROUP = "109343619305"
+    LIVEWEST = "86205872354"

 class DealStage(Enum):
    SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914"
@ -143,3 +145,65 @@ class HubSpotClient():
        company_info = company.properties
        return company_info
    
+    def get_all_pipelines(self):
+        """
+        Retrieve all pipelines for deals, returning a list of dicts with pipeline names and IDs.
+        """
+        try:
+            pipelines_api = self.client.crm.pipelines.pipelines_api
+            response = pipelines_api.get_all(object_type="deals")
+
+            pipelines = [
+                {
+                    "name": pipeline.label,
+                    "id": pipeline.id
+                }
+                for pipeline in response.results
+            ]
+
+            self.logger.info(f"Retrieved {len(pipelines)} pipelines.")
+            return pipelines
+
+        except Exception as e:
+            self.logger.error(f"Error retrieving pipelines: {e}")
+            return []
+        
+    def get_deal_stages(self, pipeline_id=None):
+        """
+        Retrieve all deal stages for a given pipeline.
+        If no pipeline_id is provided, retrieves all stages for all pipelines.
+        Returns a list of dicts with pipeline name, stage name, and stage ID.
+        """
+        try:
+            pipelines_api = self.client.crm.pipelines.pipelines_api
+            response = pipelines_api.get_all(object_type="deals")
+
+            all_stages = []
+
+            for pipeline in response.results:
+                # Skip other pipelines if a specific one is requested
+                if pipeline_id and pipeline.id != str(pipeline_id):
+                    continue
+
+                stages = [
+                    {
+                        "pipeline_name": pipeline.label,
+                        "pipeline_id": pipeline.id,
+                        "stage_name": stage.label,
+                        "stage_id": stage.id
+                    }
+                    for stage in pipeline.stages
+                ]
+
+                all_stages.extend(stages)
+
+            if not all_stages:
+                self.logger.info(f"No deal stages found for pipeline {pipeline_id if pipeline_id else 'ALL'}")
+            else:
+                self.logger.info(f"Retrieved {len(all_stages)} deal stages.")
+
+            return all_stages
+
+        except Exception as e:
+            self.logger.error(f"Error retrieving deal stages: {e}")
+            return []
--- a/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py
+++ b/etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py
@ -2,44 +2,72 @@ from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
 from tqdm import tqdm
 from etl.db.hubSpotLoad import HubspotTodb

-'''
-# TODO:
-     get one deal from db, from db
-     for avri only so far
-     add it to the db
-     show in frontend
-'''
-
-# get ALL deals
 hubspot = HubSpotClient()
+hubspot.get_deal_stages()
+db = HubspotTodb()

-# All deals from a pipeline_id via filter
-deals = hubspot.get_deal_ids_by_pipeline(
-    pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value,
-    )
-
-# deals from companies we care about
-valueable_deals = [
-    Companies.ABRI.value
+companies = [
+    Companies.ABRI,
+    Companies.LIVEWEST,
+    Companies.SOUTHERN_HOUSING_GROUP,
 ]
-deals_to_add = []

+# Track all failures and summary data
+all_failed_deals = []
+summary_report = {}

-deal_to_companies = {}
-loader = HubspotTodb()
-# Get all deals we care about
-for i,deal in enumerate(tqdm(deals)):
-    company = hubspot.from_deal_get_associated_company_id(deal)
-    if company in valueable_deals:
-        deals_to_add.append(deal)
-        deal_to_companies.update({deal: company})
-        deal_data = hubspot.from_deal_get_info(deal_id=deal)
-        listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal)
-        loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data)
-    
+for company in companies:
+    records = db.find_all_deals_with_company_id(company.value)

+    updated_count = 0
+    checked_count = 0
+    failed_deals = []

-#TODO check if database has abri data
-# make companies table
-# make a scrip that updates table
+    for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"):
+        checked_count += 1
+        try:
+            print(f"🔍 Working on deal {deal}")
+            was_up_to_date = db.update_deal(deal, hubspot)

+            if not was_up_to_date:
+                updated_count += 1
+
+        except Exception as e:
+            failed_info = {
+                "company": company.name,
+                "deal_id": deal,
+                "error": str(e)
+            }
+            failed_deals.append(failed_info)
+            all_failed_deals.append(failed_info)
+            print(f"❌ Failed to update deal {deal}: {e}")
+
+    # Store company-level summary (don’t print yet)
+    summary_report[company.name] = {
+        "checked": checked_count,
+        "updated": updated_count,
+        "up_to_date": checked_count - updated_count - len(failed_deals),
+        "failed": len(failed_deals),
+    }
+
+# ---- Final Summary Report ----
+print("\n" + "="*100)
+print("📊 FINAL SUMMARY REPORT")
+print("="*100)
+
+for company_name, stats in summary_report.items():
+    print(f"\n🏢 {company_name}")
+    print(f"   - Total deals checked: {stats['checked']}")
+    print(f"   - Updated deals:       {stats['updated']}")
+    print(f"   - Up-to-date deals:    {stats['up_to_date']}")
+    print(f"   - Failed deals:        {stats['failed']}")
+
+# ---- Global failure details ----
+if all_failed_deals:
+    print("\n" + "="*100)
+    print("⚠️ FAILED DEALS DETAILS")
+    print("="*100)
+    for f in all_failed_deals:
+        print(f"   - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}")
+else:
+    print("\n🎉 No failed deals across any company!")
--- a/etl/hubSpotClient/scripts/hubspot_company.py
+++ b/etl/hubSpotClient/scripts/hubspot_company.py
@ -5,7 +5,7 @@ from etl.db.hubSpotLoad import HubspotTodb
 hubspot = HubSpotClient()

 # All deals from a pipeline_id via filter
-company = hubspot.get_company_information(Companies.ABRI.value)
+company = hubspot.get_company_information(Companies.SOUTHERN_HOUSING_GROUP.value)

 loader = HubspotTodb()
 loader.new_record_company(company)
--- a/etl/hubSpotClient/scripts/hubspot_update_abri_script.py
+++ b/etl/hubSpotClient/scripts/hubspot_update_abri_script.py
@ -1,23 +0,0 @@
-from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
-from tqdm import tqdm
-from etl.db.hubSpotLoad import HubspotTodb
-
-hubspot = HubSpotClient()
-db = HubspotTodb()
-
-records = db.find_all_deals_with_company_id(Companies.ABRI.value)
-
-updated_count = 0  # Counter for deals that needed updating
-checked_count = 0  # Optional: total processed counter
-
-for deal in tqdm(records, desc="Checking HubSpot deals"):
-    checked_count += 1
-    was_up_to_date = db.update_deal(deal, hubspot)
-
-    # update_deal() returns False when discrepancies are found
-    if not was_up_to_date:
-        updated_count += 1
-
-print(f"\n✅ Finished checking {checked_count} deals.")
-print(f"🧩 {updated_count} deal(s) were updated.")
-print(f"📈 {checked_count - updated_count} deal(s) were already up to date.")
--- a/etl/hubSpotClient/scripts/hubspot_update_script.py
+++ b/etl/hubSpotClient/scripts/hubspot_update_script.py
@ -0,0 +1,87 @@
+from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
+from tqdm import tqdm
+from etl.db.hubSpotLoad import HubspotTodb
+
+hubspot = HubSpotClient()
+hubspot.get_deal_stages()
+db = HubspotTodb()
+
+companies = [
+    Companies.ABRI,
+    Companies.LIVEWEST,
+    Companies.SOUTHERN_HOUSING_GROUP,
+]
+
+# Global trackers
+all_failed_deals = []
+summary_report = {}
+
+print("\n🚀 Starting HubSpot deal consistency check...\n")
+
+for company in companies:
+    print(f"\n🏢 Processing company: {company.name}")
+    records = db.find_all_deals_with_company_id(company.value)
+
+    updated_count = 0
+    checked_count = 0
+    failed_deals = []
+
+    for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"):
+        checked_count += 1
+        try:
+            print(f"🔍 Working on deal {deal}")
+            was_up_to_date = db.update_deal(deal, hubspot)
+
+            if not was_up_to_date:
+                updated_count += 1
+                print(f"🧩 Deal {deal} was updated.")
+            else:
+                print(f"📈 Deal {deal} already up to date.")
+
+        except Exception as e:
+            failed_info = {
+                "company": company.name,
+                "deal_id": deal,
+                "error": str(e)
+            }
+            failed_deals.append(failed_info)
+            all_failed_deals.append(failed_info)
+            print(f"❌ Failed to update deal {deal}: {e}")
+
+    # Store per-company summary (don’t print yet)
+    summary_report[company.name] = {
+        "checked": checked_count,
+        "updated": updated_count,
+        "failed": len(failed_deals),
+        "up_to_date": checked_count - updated_count - len(failed_deals),
+    }
+
+    # Company-level quick summary
+    print(f"\n✅ Finished checking {checked_count} deals for company {company.name}.")
+    print(f"   🧩 {updated_count} deal(s) were updated.")
+    print(f"   📈 {summary_report[company.name]['up_to_date']} deal(s) were already up to date.")
+    print(f"   ⚠️ {len(failed_deals)} deal(s) failed.\n")
+
+# ---- Final Summary Report ----
+print("\n" + "=" * 100)
+print("📊 FINAL SUMMARY REPORT (ALL COMPANIES)")
+print("=" * 100)
+
+for company_name, stats in summary_report.items():
+    print(f"\n🏢 {company_name}")
+    print(f"   - Total deals checked: {stats['checked']}")
+    print(f"   - Updated deals:       {stats['updated']}")
+    print(f"   - Up-to-date deals:    {stats['up_to_date']}")
+    print(f"   - Failed deals:        {stats['failed']}")
+
+# ---- Global Failed Deals ----
+if all_failed_deals:
+    print("\n" + "=" * 100)
+    print("⚠️ FAILED DEALS DETAILS")
+    print("=" * 100)
+    for f in all_failed_deals:
+        print(f"   - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}")
+else:
+    print("\n🎉 No failed deals across any company!")
+
+print("\n🏁 HubSpot deal consistency check complete!\n")