From c7dc758d28a77c474a798d77b7323bc82fe1109c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 17 Apr 2025 14:04:19 +0000 Subject: [PATCH] script works? --- etl/dimitra_hubspot_notes_gather.py | 187 +++++++++++++++++----------- etl/hubSpotClient/hubspot.py | 17 ++- etl/hubspot_to_invoice.py | 2 +- 3 files changed, 132 insertions(+), 74 deletions(-) diff --git a/etl/dimitra_hubspot_notes_gather.py b/etl/dimitra_hubspot_notes_gather.py index 0260661..8e19f54 100644 --- a/etl/dimitra_hubspot_notes_gather.py +++ b/etl/dimitra_hubspot_notes_gather.py @@ -1,116 +1,159 @@ import os - -os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d" -os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ" -os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf" - -from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday -from etl.hubSpotClient.hubspot import HubSpotClient, DealStage +from datetime import timedelta, timezone +import datetime import pandas as pd from bs4 import BeautifulSoup from openpyxl import Workbook from openpyxl.styles import Font - +from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday +from etl.hubSpotClient.hubspot import HubSpotClient, DealStage +from collections import defaultdict +import time +# Auth credentials +os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ" +os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf" hubspot = HubSpotClient() -import time -pipelines_to_include =[ + +# Week calculation +today = datetime.datetime.now(datetime.UTC) +week1_start = today - timedelta(days=7) +week2_start = today - timedelta(days=14) +week3_start = today - timedelta(days=21) + +def get_week_label(created_at_str): + created_at = datetime.datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S') + created_at = created_at.replace(tzinfo=timezone.utc) + if week1_start <= created_at <= today: + return "Week 1" + elif week2_start <= created_at < week1_start: + return "Week 2" + elif week3_start <= created_at < week2_start: + return "Week 3" + return None # Ignore notes outside the 3-week range + +# Pipelines to include +pipelines_to_include = [ "SALES - SOCIAL HOUSING", "PVT PAY", "NRLA GENERAL ENQUIRIES", - # "OSMOSIS - SALES", - ] + exclude_stage = { - "SALES - SOCIAL HOUSING" : [ - "HA TO REENGAGE", - "APPOINTMENT SCHEDULED", - "AWAITING ASSET LIST", - "ASSET LIST RECEIVED", - "ASSET LIST STANDARDISED", - "ROUTE MARCH CREATED", + "SALES - SOCIAL HOUSING": [ + "HA TO REENGAGE", "APPOINTMENT SCHEDULED", "AWAITING ASSET LIST", + "ASSET LIST RECEIVED", "ASSET LIST STANDARDISED", "ROUTE MARCH CREATED", "HA WEEKLY REPORTING", ], "PVT PAY": [ - "LIVE OPPORTUNITY", - "CLOSED LOST", - "INVOICED", - "COLD - KIT", - "CLOSED WON", - + "LIVE OPPORTUNITY", "CLOSED LOST", "INVOICED", "COLD - KIT", "CLOSED WON", ], "NRLA GENERAL ENQUIRIES": [ - "CUSTOMER CONTACTED", - "LOST", - "COLD", + "CUSTOMER CONTACTED", "LOST", "COLD", ] } -include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include] -exclude_stage_upper = [s.upper().strip() for s in exclude_stage] -notes_data = [] +notes_data = defaultdict(list) pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals") + for pipeline in pipelines.results: pipeline_name = pipeline.label.upper().strip() if pipeline_name in pipelines_to_include: for stage in pipeline.stages: - if stage.label.upper().strip() not in exclude_stage[pipeline_name]: - for deal_id in hubspot.get_all_deals_from_stage_id(stage.id): - notes = hubspot.get_notes_from_deals_id(deal_id) + if stage.label.upper().strip() not in [s.upper() for s in exclude_stage.get(pipeline_name, [])]: + for deals in hubspot.get_all_deals_from_stage_id(stage.id): + time.sleep(1) + deal_notes_by_week = {"Week 1": [], "Week 2": [], "Week 3": []} + notes = hubspot.get_notes_from_deals_id(deals["deal_id"]) + for note in notes: - deal_name = hubspot.get_deal_name_by_id(deal_id) + week_label = get_week_label(note["created_at"]) + if not week_label: + continue html_body = note['note'] soup = BeautifulSoup(html_body, "html.parser") - plain_text = soup.get_text(separator="\n") # Keeps line breaks - notes_data.append({ - "note_body": plain_text, - "deal_name": deal_name, # Include deal_id to relate the note to the deal - "pipeline_name": pipeline.label # Add the pipeline name + plain_text = soup.get_text(separator="\n") + deal_notes_by_week[week_label].append(plain_text) + + if any(deal_notes_by_week.values()): + deal_name = hubspot.get_deal_name_by_id(deals["deal_id"]) + owner_name = "not assigned" + if deals["deal_owner"]: + owner_name = hubspot.get_owner_name_from_id(deals['deal_owner']) + + notes_data[pipeline_name].append({ + "Deal Name": deal_name.upper(), + "Deal Owner": owner_name, + "Deal Stage": stage.label.upper(), + "Value": deals["value"], + "Notes Week 1": "\n---\n".join(deal_notes_by_week["Week 1"]), + "Notes Week 2": "\n---\n".join(deal_notes_by_week["Week 2"]), + "Notes Week 3": "\n---\n".join(deal_notes_by_week["Week 3"]), }) - - time.sleep(2) print("delay to not bombard the server") + time.sleep(2) -notes_df = pd.DataFrame(notes_data) -notes_df.to_csv("output.csv") -df = notes_df - +# Create Excel Workbook wb = Workbook() -wb.remove(wb.active) # Remove default sheet +wb.remove(wb.active) -for pipeline, group_df in df.groupby("pipeline_name"): - ws = wb.create_sheet(title=pipeline[:31]) # Excel sheet name limit = 31 chars +for pipeline, deals in notes_data.items(): + ws = wb.create_sheet(title=pipeline[:31]) - # Sort by deal name - group_df = group_df.sort_values("deal_name") + headers = ["Deal Name", "Deal Owner", "Deal Stage", "Value", "Notes Week 1", "Notes Week 2", "Notes Week 3"] + ws.append(headers) + for cell in ws[1]: + cell.font = Font(bold=True) - current_row = 1 - for deal_name, deal_notes in group_df.groupby("deal_name"): - # Bold header for each deal - ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}") - ws.cell(row=current_row, column=1).font = Font(bold=True) - current_row += 1 + for row in deals: + # Normalize notes to always be lists + week_notes = {} + for week in range(1, 4): + key = f"Notes Week {week}" + note_data = row.get(key, []) + if isinstance(note_data, str): + note_data = [note_data] + week_notes[week] = note_data - # Notes for the deal - for note in deal_notes["note_body"]: - ws.cell(row=current_row, column=2, value=note) - current_row += 1 + # Get first note per week (if any) + first_notes = [week_notes[week][0] if len(week_notes[week]) > 0 else "" for week in range(1, 4)] - # Add a blank row between groups - current_row += 1 + # Add main deal row + first notes + ws.append([ + row["Deal Name"], + row["Deal Owner"], + row["Deal Stage"], + row["Value"], + *first_notes + ]) -# Save to Excel -from datetime import datetime, timedelta -today = datetime.today() + # Determine max number of remaining notes + max_additional_notes = max(len(week_notes[week]) for week in range(1, 4)) - 1 + + # Add remaining notes + for i in range(1, max_additional_notes + 1): + note_row = ["", "", "", ""] # Empty deal columns + for week in range(1, 4): + notes = week_notes[week] + note = notes[i] if i < len(notes) else "" + note_row.append(note) + ws.append(note_row) + + +# Generate file name with next Monday’s date days_ahead = (7 - today.weekday()) % 7 -days_ahead = 7 if days_ahead == 0 else days_ahead # If today is Monday, get *next* Monday +days_ahead = 7 if days_ahead == 0 else days_ahead next_monday = today + timedelta(days=days_ahead) - formatted = next_monday.strftime("%d-%m-%Y Monday") - - file_name = f"{formatted} DEAL_NOTES_FROM_HUBSPOT.xlsx" -wb.save(file_name) output_path = os.path.abspath(file_name) +wb.save(output_path) + +# Upload to SharePoint sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA) -sharepoint_client.upload_file(output_path, f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",file_name) +sharepoint_client.upload_file( + output_path, + f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}", + file_name +) diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 4924d27..d8b04e9 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -3,6 +3,7 @@ from enum import Enum from hubspot.crm.deals import PublicObjectSearchRequest from hubspot.crm.deals.models import SimplePublicObjectInput from etl.hubSpotClient.types import SubmissionInfoFromDeal +import time @@ -20,10 +21,17 @@ class HubSpotClient(): def get_all_deals(self): return self.client.crm.deals.get_all() + def get_owner_name_from_id(self, owner_id): + owner = self.client.crm.owners.owners_api.get_by_id(owner_id) + time.sleep(0.5) + first_name = owner.first_name or "" + last_name = owner.last_name or "" + return f"{first_name} {last_name}".strip() def get_deal_name_by_id(self, deal_id): try: deal = self.client.crm.deals.basic_api.get_by_id(deal_id) + time.sleep(0.5) return deal.properties.get("dealname", "No deal name") except Exception as e: return "Unknown Deal" # Fallback if the deal name is not found @@ -66,6 +74,7 @@ class HubSpotClient(): all_notes.append({ "note_id": note.id, "note": note_body, + "created_at": note.created_at.strftime("%Y-%m-%d %H:%M:%S"), }) return all_notes @@ -84,6 +93,8 @@ class HubSpotClient(): }], properties=[ "dealname", + "amount", + "hubspot_owner_id", ], limit=200, after=after, @@ -96,7 +107,11 @@ class HubSpotClient(): all_deals = [] for deal in found_deals: - all_deals.append(deal.id) + all_deals.append({ + "deal_id": deal.id, + "value": deal.properties["amount"], + "deal_owner": deal.properties.get("hubspot_owner_id"), + }) return all_deals def get_deals_from_deal_stage(self, deal_stage: DealStage): diff --git a/etl/hubspot_to_invoice.py b/etl/hubspot_to_invoice.py index fbf490f..d3f742d 100644 --- a/etl/hubspot_to_invoice.py +++ b/etl/hubspot_to_invoice.py @@ -44,7 +44,7 @@ sp.move_deals_to_completed(deal_ids) P3) Write documentation for tech demos from Khalims demo Tuesday - P1) - Get ready for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total + P1) - Get read for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total P2) Review deem score with last weeks deem score values to ensure accuracy P3) Figure out what to do if I see an address that isn't registered but surveyrod """ \ No newline at end of file