from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline from etl.hubspot.scripts.scraper.main import handler from tqdm import tqdm import json PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value companies = list( [ # Companies.THE_GUINESS_PARTNERSHIP, # Companies.SOUTHERN_HOUSING_GROUP, Companies.CALICO_HOMES, ] ) def bulk_load(companies: list[Companies] | None = None) -> None: """ Load all deals from the given companies (defaults to all Companies enum values) into the database, filtered to the Operations/Social Housing pipeline. """ hubspot = HubspotClient() targets = companies or list(Companies) for company in tqdm(targets, desc="Companies", unit="co", leave=False): company_id = company.value deal_ids = hubspot.get_deal_ids_from_company(company_id) processed = 0 with tqdm( deal_ids, desc=company.name, unit="deal", leave=True, position=0 ) as deal_bar: for deal_id in deal_bar: deal_data = hubspot.from_deal_id_get_info(deal_id) if deal_data.get("pipeline") != PIPELINE_ID: deal_bar.set_postfix({"status": "skip", "deal": deal_id}) continue deal_bar.set_postfix({"status": "uploading", "deal": deal_id}) handler( {"Records": [{"body": json.dumps({"hubspot_deal_id": deal_id})}]}, context=None, ) processed += 1 deal_bar.set_postfix({"status": "done", "deal": deal_id}) tqdm.write(f"[{company.name}] {processed}/{len(deal_ids)} deals in pipeline") if __name__ == "__main__": bulk_load(companies)