made the script faster

This commit is contained in:
Jun-te Kim 2025-12-17 11:25:10 +00:00
parent 4cd4db64d4
commit e8fa12824d
3 changed files with 121 additions and 27 deletions

22
.vscode/settings.json vendored
View file

@ -15,5 +15,27 @@
// "%load_ext autoreload", "%autoreload 2"
// ]
"vim.enableNeovim": false,
// Allow VSCode native keybindings to override Vim when needed
"vim.handleKeys": {
"<C-p>": false,
"<C-P>": false,
"<C-S-p>": false,
"<C-c>": false,
"<C-v>": false,
"<C-S-v>": false,
"<C-S-e>": false,
"<C-b>": false,
"<C-j>": false,
"<C-S-c>": false
},
// Terminal copy/paste via Ctrl+Shift+C / Ctrl+Shift+V
"terminal.integrated.copyOnSelection": false,
"terminal.integrated.commandsToSkipShell": [
"workbench.action.terminal.copySelection",
"workbench.action.terminal.paste"
],
}

View file

@ -15,6 +15,7 @@ class Companies(Enum):
SOUTHERN_HOUSING_GROUP = "109343619305"
LIVEWEST = "86205872354"
SURESERVE = "301745289413"
HOMEGROUP = "94946071794"
class DealStage(Enum):
SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914"
@ -83,6 +84,32 @@ class HubSpotClient():
self.logger.error(f"Error fetching associated company for deal {deal_id}: {e}")
return None
def get_deals_from_company(self, company_id: str) -> list[str]:
associations_api = self.client.crm.associations.v4.basic_api
deal_ids = []
after = None
while True:
response = associations_api.get_page(
object_type="companies",
object_id=company_id,
to_object_type="deals",
limit=100,
after=after
)
deal_ids.extend(
assoc.to_object_id for assoc in response.results
)
if not response.paging or not response.paging.next:
break
after = response.paging.next.after
return deal_ids
def from_deal_get_associated_listing(self, deal_id: str):
"""
Get the associated listing information for a given deal.
@ -126,6 +153,7 @@ class HubSpotClient():
properties=[
'dealname',
'dealstage',
'pipeline',
'outcome', #outcome,
'outcome_notes', #outcome notes
'project_code',

View file

@ -1,33 +1,77 @@
# from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
# from tqdm import tqdm
# from etl.db.hubSpotLoad import HubspotTodb
# # get ALL deals
# hubspot = HubSpotClient()
# # All deals from a pipeline_id via filter
# deals = hubspot.get_deal_ids_by_pipeline(
# pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value,
# )
# # deals from companies we care about
# valueable_deals = [
# # Companies.ABRI.value,
# # Companies.SOUTHERN_HOUSING_GROUP.value,
# # Companies.SURESERVE.value,
# # Companies.LIVEWEST.value,
# Companies.HOMEGROUP.value,
# ]
# deals_to_add = []
# deal_to_companies = {}
# loader = HubspotTodb()
# # Get all deals we care about
# for i,deal in enumerate(tqdm(deals)):
# company = hubspot.from_deal_get_associated_company_id(deal)
# if company in valueable_deals:
# deals_to_add.append(deal)
# deal_to_companies.update({deal: company})
# deal_data = hubspot.from_deal_get_info(deal_id=deal)
# listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal)
# loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot)
from etl.hubSpotClient.hubspotClient import HubSpotClient, Companies, Pipeline
from tqdm import tqdm
from etl.db.hubSpotLoad import HubspotTodb
# get ALL deals
hubspot = HubSpotClient()
# All deals from a pipeline_id via filter
deals = hubspot.get_deal_ids_by_pipeline(
pipeline_id=Pipeline.OPERATIONS_SOCIAL_HOUSING.value,
)
# deals from companies we care about
valueable_deals = [
Companies.ABRI.value,
Companies.SOUTHERN_HOUSING_GROUP.value,
Companies.SURESERVE.value,
Companies.LIVEWEST.value,
]
deals_to_add = []
deal_to_companies = {}
loader = HubspotTodb()
# Get all deals we care about
for i,deal in enumerate(tqdm(deals)):
company = hubspot.from_deal_get_associated_company_id(deal)
if company in valueable_deals:
deals_to_add.append(deal)
deal_to_companies.update({deal: company})
deal_data = hubspot.from_deal_get_info(deal_id=deal)
listing_data = hubspot.from_deal_get_associated_listing(deal_id=deal)
loader.new_record_to_hubspot_data(deal_data, deal_to_companies[deal], listing_data, hubspot)
PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
valuable_companies = [
Companies.HOMEGROUP.value,
]
deals_to_add = []
deal_to_companies = {}
for company_id in valuable_companies:
# 🔥 Cheap: company → deals
deal_ids = hubspot.get_deals_from_company(company_id)
for deal_id in tqdm(deal_ids, desc=f"Company {company_id}"):
# Fetch minimal deal info once
deal_data = hubspot.from_deal_get_info(deal_id)
print(f"working on deal {deal_id}")
# Filter by pipeline (small local filter)
if deal_data.get("pipeline") != PIPELINE_ID:
continue
deals_to_add.append(deal_id)
deal_to_companies[deal_id] = company_id
listing_data = hubspot.from_deal_get_associated_listing(deal_id)
loader.new_record_to_hubspot_data(
deal_data,
company_id,
listing_data,
hubspot
)
print(f"Uploaded deal_id {deal_id} to db")