Model/etl/hubspot/scripts/scraper/bulk_load.py

from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline
from etl.hubspot.scripts.scraper.main import handler
from tqdm import tqdm
import json


PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value

companies = list(
    [
        # Companies.THE_GUINESS_PARTNERSHIP,
        # Companies.SOUTHERN_HOUSING_GROUP,
        Companies.CALICO_HOMES,
    ]
)


def bulk_load(companies: list[Companies] | None = None) -> None:
    """
    Load all deals from the given companies (defaults to all Companies enum values)
    into the database, filtered to the Operations/Social Housing pipeline.
    """
    hubspot = HubspotClient()
    targets = companies or list(Companies)

    for company in tqdm(targets, desc="Companies", unit="co", leave=False):
        company_id = company.value
        deal_ids = hubspot.get_deal_ids_from_company(company_id)

        processed = 0
        with tqdm(
            deal_ids, desc=company.name, unit="deal", leave=True, position=0
        ) as deal_bar:
            for deal_id in deal_bar:
                deal_data = hubspot.from_deal_id_get_info(deal_id)
                if deal_data.get("pipeline") != PIPELINE_ID:
                    deal_bar.set_postfix({"status": "skip", "deal": deal_id})
                    continue
                deal_bar.set_postfix({"status": "uploading", "deal": deal_id})
                handler(
                    {"Records": [{"body": json.dumps({"hubspot_deal_id": deal_id})}]},
                    context=None,
                )
                processed += 1
                deal_bar.set_postfix({"status": "done", "deal": deal_id})

        tqdm.write(f"[{company.name}] {processed}/{len(deal_ids)} deals in pipeline")


if __name__ == "__main__":
    bulk_load(companies)