mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
Merge pull request #101 from Hestia-Homes/feature/scrape_livewest_and_southern
Feature/scrape livewest and southern
This commit is contained in:
commit
4ff233dbc9
7 changed files with 217 additions and 61 deletions
|
|
@ -1,4 +1,4 @@
|
|||
name: Hubspot Sync Abri
|
||||
name: Hubspot Sync
|
||||
|
||||
on:
|
||||
schedule:
|
||||
|
|
@ -6,7 +6,7 @@ on:
|
|||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
hubspot-sync-abri:
|
||||
hubspot-sync:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
@ -28,4 +28,4 @@ jobs:
|
|||
run: |
|
||||
pwd
|
||||
ls -la
|
||||
poetry run python etl/hubSpotClient/scripts/hubspot_update_abri_script.py
|
||||
poetry run python etl/hubSpotClient/scripts/hubspot_update_script.py
|
||||
|
|
@ -8,7 +8,7 @@ class HubspotTodb():
|
|||
|
||||
def new_record_to_hubspot_data(self, deal_data, company, listing):
|
||||
print("This has been depreciated using new interface")
|
||||
self.upsert_hubspot_deal(self, deal_data, company, listing)
|
||||
self.upsert_hubspot_deal(deal_data, company, listing)
|
||||
|
||||
|
||||
def new_record_company(self, company_data):
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ from hubspot.crm.associations import ApiException
|
|||
|
||||
class Companies(Enum):
|
||||
ABRI = "237615001799"
|
||||
SOUTHERN_HOUSING_GROUP = "109343619305"
|
||||
LIVEWEST = "86205872354"
|
||||
|
||||
class DealStage(Enum):
|
||||
SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914"
|
||||
|
|
@ -143,3 +145,65 @@ class HubSpotClient():
|
|||
company_info = company.properties
|
||||
return company_info
|
||||
|
||||
def get_all_pipelines(self):
|
||||
"""
|
||||
Retrieve all pipelines for deals, returning a list of dicts with pipeline names and IDs.
|
||||
"""
|
||||
try:
|
||||
pipelines_api = self.client.crm.pipelines.pipelines_api
|
||||
response = pipelines_api.get_all(object_type="deals")
|
||||
|
||||
pipelines = [
|
||||
{
|
||||
"name": pipeline.label,
|
||||
"id": pipeline.id
|
||||
}
|
||||
for pipeline in response.results
|
||||
]
|
||||
|
||||
self.logger.info(f"Retrieved {len(pipelines)} pipelines.")
|
||||
return pipelines
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error retrieving pipelines: {e}")
|
||||
return []
|
||||
|
||||
def get_deal_stages(self, pipeline_id=None):
|
||||
"""
|
||||
Retrieve all deal stages for a given pipeline.
|
||||
If no pipeline_id is provided, retrieves all stages for all pipelines.
|
||||
Returns a list of dicts with pipeline name, stage name, and stage ID.
|
||||
"""
|
||||
try:
|
||||
pipelines_api = self.client.crm.pipelines.pipelines_api
|
||||
response = pipelines_api.get_all(object_type="deals")
|
||||
|
||||
all_stages = []
|
||||
|
||||
for pipeline in response.results:
|
||||
# Skip other pipelines if a specific one is requested
|
||||
if pipeline_id and pipeline.id != str(pipeline_id):
|
||||
continue
|
||||
|
||||
stages = [
|
||||
{
|
||||
"pipeline_name": pipeline.label,
|
||||
"pipeline_id": pipeline.id,
|
||||
"stage_name": stage.label,
|
||||
"stage_id": stage.id
|
||||
}
|
||||
for stage in pipeline.stages
|
||||
]
|
||||
|
||||
all_stages.extend(stages)
|
||||
|
||||
if not all_stages:
|
||||
self.logger.info(f"No deal stages found for pipeline {pipeline_id if pipeline_id else 'ALL'}")
|
||||
else:
|
||||
self.logger.info(f"Retrieved {len(all_stages)} deal stages.")
|
||||
|
||||
return all_stages
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error retrieving deal stages: {e}")
|
||||
return []
|
||||
|
|
@ -2,44 +2,72 @@ from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
|
|||
from tqdm import tqdm
|
||||
from etl.db.hubSpotLoad import HubspotTodb
|
||||
|
||||
'''
|
||||
# TODO:
|
||||
get one deal from db, from db
|
||||
for avri only so far
|
||||
add it to the db
|
||||
show in frontend
|
||||
'''
|
||||
|
||||
# get ALL deals
|
||||
hubspot = HubSpotClient()
|
||||
hubspot.get_deal_stages()
|
||||
db = HubspotTodb()
|
||||
|
||||
# All deals from a pipeline_id via filter
|
||||
deals = hubspot.get_deal_ids_by_pipeline(
|
||||
pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value,
|
||||
)
|
||||
|
||||
# deals from companies we care about
|
||||
valueable_deals = [
|
||||
Companies.ABRI.value
|
||||
companies = [
|
||||
Companies.ABRI,
|
||||
Companies.LIVEWEST,
|
||||
Companies.SOUTHERN_HOUSING_GROUP,
|
||||
]
|
||||
deals_to_add = []
|
||||
|
||||
# Track all failures and summary data
|
||||
all_failed_deals = []
|
||||
summary_report = {}
|
||||
|
||||
deal_to_companies = {}
|
||||
loader = HubspotTodb()
|
||||
# Get all deals we care about
|
||||
for i,deal in enumerate(tqdm(deals)):
|
||||
company = hubspot.from_deal_get_associated_company_id(deal)
|
||||
if company in valueable_deals:
|
||||
deals_to_add.append(deal)
|
||||
deal_to_companies.update({deal: company})
|
||||
deal_data = hubspot.from_deal_get_info(deal_id=deal)
|
||||
listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal)
|
||||
loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data)
|
||||
|
||||
for company in companies:
|
||||
records = db.find_all_deals_with_company_id(company.value)
|
||||
|
||||
updated_count = 0
|
||||
checked_count = 0
|
||||
failed_deals = []
|
||||
|
||||
#TODO check if database has abri data
|
||||
# make companies table
|
||||
# make a scrip that updates table
|
||||
for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"):
|
||||
checked_count += 1
|
||||
try:
|
||||
print(f"🔍 Working on deal {deal}")
|
||||
was_up_to_date = db.update_deal(deal, hubspot)
|
||||
|
||||
if not was_up_to_date:
|
||||
updated_count += 1
|
||||
|
||||
except Exception as e:
|
||||
failed_info = {
|
||||
"company": company.name,
|
||||
"deal_id": deal,
|
||||
"error": str(e)
|
||||
}
|
||||
failed_deals.append(failed_info)
|
||||
all_failed_deals.append(failed_info)
|
||||
print(f"❌ Failed to update deal {deal}: {e}")
|
||||
|
||||
# Store company-level summary (don’t print yet)
|
||||
summary_report[company.name] = {
|
||||
"checked": checked_count,
|
||||
"updated": updated_count,
|
||||
"up_to_date": checked_count - updated_count - len(failed_deals),
|
||||
"failed": len(failed_deals),
|
||||
}
|
||||
|
||||
# ---- Final Summary Report ----
|
||||
print("\n" + "="*100)
|
||||
print("📊 FINAL SUMMARY REPORT")
|
||||
print("="*100)
|
||||
|
||||
for company_name, stats in summary_report.items():
|
||||
print(f"\n🏢 {company_name}")
|
||||
print(f" - Total deals checked: {stats['checked']}")
|
||||
print(f" - Updated deals: {stats['updated']}")
|
||||
print(f" - Up-to-date deals: {stats['up_to_date']}")
|
||||
print(f" - Failed deals: {stats['failed']}")
|
||||
|
||||
# ---- Global failure details ----
|
||||
if all_failed_deals:
|
||||
print("\n" + "="*100)
|
||||
print("⚠️ FAILED DEALS DETAILS")
|
||||
print("="*100)
|
||||
for f in all_failed_deals:
|
||||
print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}")
|
||||
else:
|
||||
print("\n🎉 No failed deals across any company!")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from etl.db.hubSpotLoad import HubspotTodb
|
|||
hubspot = HubSpotClient()
|
||||
|
||||
# All deals from a pipeline_id via filter
|
||||
company = hubspot.get_company_information(Companies.ABRI.value)
|
||||
company = hubspot.get_company_information(Companies.SOUTHERN_HOUSING_GROUP.value)
|
||||
|
||||
loader = HubspotTodb()
|
||||
loader.new_record_company(company)
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
|
||||
from tqdm import tqdm
|
||||
from etl.db.hubSpotLoad import HubspotTodb
|
||||
|
||||
hubspot = HubSpotClient()
|
||||
db = HubspotTodb()
|
||||
|
||||
records = db.find_all_deals_with_company_id(Companies.ABRI.value)
|
||||
|
||||
updated_count = 0 # Counter for deals that needed updating
|
||||
checked_count = 0 # Optional: total processed counter
|
||||
|
||||
for deal in tqdm(records, desc="Checking HubSpot deals"):
|
||||
checked_count += 1
|
||||
was_up_to_date = db.update_deal(deal, hubspot)
|
||||
|
||||
# update_deal() returns False when discrepancies are found
|
||||
if not was_up_to_date:
|
||||
updated_count += 1
|
||||
|
||||
print(f"\n✅ Finished checking {checked_count} deals.")
|
||||
print(f"🧩 {updated_count} deal(s) were updated.")
|
||||
print(f"📈 {checked_count - updated_count} deal(s) were already up to date.")
|
||||
87
etl/hubSpotClient/scripts/hubspot_update_script.py
Normal file
87
etl/hubSpotClient/scripts/hubspot_update_script.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
|
||||
from tqdm import tqdm
|
||||
from etl.db.hubSpotLoad import HubspotTodb
|
||||
|
||||
hubspot = HubSpotClient()
|
||||
hubspot.get_deal_stages()
|
||||
db = HubspotTodb()
|
||||
|
||||
companies = [
|
||||
Companies.ABRI,
|
||||
Companies.LIVEWEST,
|
||||
Companies.SOUTHERN_HOUSING_GROUP,
|
||||
]
|
||||
|
||||
# Global trackers
|
||||
all_failed_deals = []
|
||||
summary_report = {}
|
||||
|
||||
print("\n🚀 Starting HubSpot deal consistency check...\n")
|
||||
|
||||
for company in companies:
|
||||
print(f"\n🏢 Processing company: {company.name}")
|
||||
records = db.find_all_deals_with_company_id(company.value)
|
||||
|
||||
updated_count = 0
|
||||
checked_count = 0
|
||||
failed_deals = []
|
||||
|
||||
for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"):
|
||||
checked_count += 1
|
||||
try:
|
||||
print(f"🔍 Working on deal {deal}")
|
||||
was_up_to_date = db.update_deal(deal, hubspot)
|
||||
|
||||
if not was_up_to_date:
|
||||
updated_count += 1
|
||||
print(f"🧩 Deal {deal} was updated.")
|
||||
else:
|
||||
print(f"📈 Deal {deal} already up to date.")
|
||||
|
||||
except Exception as e:
|
||||
failed_info = {
|
||||
"company": company.name,
|
||||
"deal_id": deal,
|
||||
"error": str(e)
|
||||
}
|
||||
failed_deals.append(failed_info)
|
||||
all_failed_deals.append(failed_info)
|
||||
print(f"❌ Failed to update deal {deal}: {e}")
|
||||
|
||||
# Store per-company summary (don’t print yet)
|
||||
summary_report[company.name] = {
|
||||
"checked": checked_count,
|
||||
"updated": updated_count,
|
||||
"failed": len(failed_deals),
|
||||
"up_to_date": checked_count - updated_count - len(failed_deals),
|
||||
}
|
||||
|
||||
# Company-level quick summary
|
||||
print(f"\n✅ Finished checking {checked_count} deals for company {company.name}.")
|
||||
print(f" 🧩 {updated_count} deal(s) were updated.")
|
||||
print(f" 📈 {summary_report[company.name]['up_to_date']} deal(s) were already up to date.")
|
||||
print(f" ⚠️ {len(failed_deals)} deal(s) failed.\n")
|
||||
|
||||
# ---- Final Summary Report ----
|
||||
print("\n" + "=" * 100)
|
||||
print("📊 FINAL SUMMARY REPORT (ALL COMPANIES)")
|
||||
print("=" * 100)
|
||||
|
||||
for company_name, stats in summary_report.items():
|
||||
print(f"\n🏢 {company_name}")
|
||||
print(f" - Total deals checked: {stats['checked']}")
|
||||
print(f" - Updated deals: {stats['updated']}")
|
||||
print(f" - Up-to-date deals: {stats['up_to_date']}")
|
||||
print(f" - Failed deals: {stats['failed']}")
|
||||
|
||||
# ---- Global Failed Deals ----
|
||||
if all_failed_deals:
|
||||
print("\n" + "=" * 100)
|
||||
print("⚠️ FAILED DEALS DETAILS")
|
||||
print("=" * 100)
|
||||
for f in all_failed_deals:
|
||||
print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}")
|
||||
else:
|
||||
print("\n🎉 No failed deals across any company!")
|
||||
|
||||
print("\n🏁 HubSpot deal consistency check complete!\n")
|
||||
Loading…
Add table
Reference in a new issue