Model/etl/hubspot/scripts/scraper/bulk_load.py
2026-04-02 10:21:22 +00:00

51 lines
1.7 KiB
Python

from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline
from etl.hubspot.scripts.scraper.main import handler
from tqdm import tqdm
import json
PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
companies = list(
[
# Companies.THE_GUINESS_PARTNERSHIP,
# Companies.SOUTHERN_HOUSING_GROUP,
Companies.CALICO_HOMES,
]
)
def bulk_load(companies: list[Companies] | None = None) -> None:
"""
Load all deals from the given companies (defaults to all Companies enum values)
into the database, filtered to the Operations/Social Housing pipeline.
"""
hubspot = HubspotClient()
targets = companies or list(Companies)
for company in tqdm(targets, desc="Companies", unit="co", leave=False):
company_id = company.value
deal_ids = hubspot.get_deal_ids_from_company(company_id)
processed = 0
with tqdm(
deal_ids, desc=company.name, unit="deal", leave=True, position=0
) as deal_bar:
for deal_id in deal_bar:
deal_data = hubspot.from_deal_id_get_info(deal_id)
if deal_data.get("pipeline") != PIPELINE_ID:
deal_bar.set_postfix({"status": "skip", "deal": deal_id})
continue
deal_bar.set_postfix({"status": "uploading", "deal": deal_id})
handler(
{"Records": [{"body": json.dumps({"hubspot_deal_id": deal_id})}]},
context=None,
)
processed += 1
deal_bar.set_postfix({"status": "done", "deal": deal_id})
tqdm.write(f"[{company.name}] {processed}/{len(deal_ids)} deals in pipeline")
if __name__ == "__main__":
bulk_load(companies)