update script

This commit is contained in:
Jun-te Kim 2025-12-04 19:34:47 +00:00
parent 47d3a13838
commit b0974eb583
5 changed files with 38 additions and 68 deletions

View file

@ -21,6 +21,16 @@ jobs:
pip install poetry
poetry install --no-root
- name: Run scripts
env:
PYTHONPATH: ${{ github.workspace }}
DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }}
run: |
pwd
ls -la
poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py
- name: Run scripts
env:
PYTHONPATH: ${{ github.workspace }}

View file

@ -11,9 +11,9 @@ class HubspotTodb:
init_db()
self.s3 = S3Uploader()
def new_record_to_hubspot_data(self, deal_data, company, listing):
def new_record_to_hubspot_data(self, deal_data, company, listing, hubspot_client):
print("⚠️ Deprecated — use the new interface instead.")
return self.upsert_hubspot_deal(deal_data, company, listing)
return self.upsert_hubspot_deal(deal_data, company, listing, hubspot_client)
def new_record_company(self, company_data):
"""Adds a new record to the hubspot_company_data table."""

View file

@ -2,73 +2,32 @@ from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
from tqdm import tqdm
from etl.db.hubSpotLoad import HubspotTodb
# get ALL deals
hubspot = HubSpotClient()
hubspot.get_deal_stages()
db = HubspotTodb()
companies = [
Companies.ABRI,
Companies.LIVEWEST,
Companies.SOUTHERN_HOUSING_GROUP,
Companies.SURESERVE
# All deals from a pipeline_id via filter
deals = hubspot.get_deal_ids_by_pipeline(
pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value,
)
# deals from companies we care about
valueable_deals = [
Companies.ABRI.value,
Companies.SOUTHERN_HOUSING_GROUP.value,
Companies.SURESERVE.value,
Companies.LIVEWEST.value,
]
deals_to_add = []
# Track all failures and summary data
all_failed_deals = []
summary_report = {}
for company in companies:
records = db.find_all_deals_with_company_id(company.value)
updated_count = 0
checked_count = 0
failed_deals = []
for deal in tqdm(records, desc=f"Checking HubSpot deals for {company.name}"):
checked_count += 1
try:
print(f"🔍 Working on deal {deal}")
was_up_to_date = db.update_deal(deal, hubspot)
if not was_up_to_date:
updated_count += 1
except Exception as e:
failed_info = {
"company": company.name,
"deal_id": deal,
"error": str(e)
}
failed_deals.append(failed_info)
all_failed_deals.append(failed_info)
print(f"❌ Failed to update deal {deal}: {e}")
# Store company-level summary (dont print yet)
summary_report[company.name] = {
"checked": checked_count,
"updated": updated_count,
"up_to_date": checked_count - updated_count - len(failed_deals),
"failed": len(failed_deals),
}
# ---- Final Summary Report ----
print("\n" + "="*100)
print("📊 FINAL SUMMARY REPORT")
print("="*100)
for company_name, stats in summary_report.items():
print(f"\n🏢 {company_name}")
print(f" - Total deals checked: {stats['checked']}")
print(f" - Updated deals: {stats['updated']}")
print(f" - Up-to-date deals: {stats['up_to_date']}")
print(f" - Failed deals: {stats['failed']}")
# ---- Global failure details ----
if all_failed_deals:
print("\n" + "="*100)
print("⚠️ FAILED DEALS DETAILS")
print("="*100)
for f in all_failed_deals:
print(f" - Company: {f['company']:<25} | Deal ID: {f['deal_id']} | Error: {f['error']}")
else:
print("\n🎉 No failed deals across any company!")
deal_to_companies = {}
loader = HubspotTodb()
# Get all deals we care about
for i,deal in enumerate(tqdm(deals)):
company = hubspot.from_deal_get_associated_company_id(deal)
if company in valueable_deals:
deals_to_add.append(deal)
deal_to_companies.update({deal: company})
deal_data = hubspot.from_deal_get_info(deal_id=deal)
listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal)
loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot)

View file

@ -10,6 +10,7 @@ companies = [
Companies.ABRI,
Companies.LIVEWEST,
Companies.SOUTHERN_HOUSING_GROUP,
Companies.SURESERVE,
]
# Global trackers

View file

@ -1,2 +1,2 @@
# Example of how to run python code in this environment
poetry run python etl/hubSpotClient/scripts/hubspot_company --debug
poetry run python etl/hubSpotClient/scripts/hubspot_abri_etl_first_time.py --debug