script works?

This commit is contained in:
Jun-te Kim 2025-04-17 14:04:19 +00:00
parent 04a27c4353
commit c7dc758d28
3 changed files with 132 additions and 74 deletions

View file

@ -1,116 +1,159 @@
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
from datetime import timedelta, timezone
import datetime
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl.styles import Font
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
from collections import defaultdict
import time
# Auth credentials
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
hubspot = HubSpotClient()
import time
pipelines_to_include =[
# Week calculation
today = datetime.datetime.now(datetime.UTC)
week1_start = today - timedelta(days=7)
week2_start = today - timedelta(days=14)
week3_start = today - timedelta(days=21)
def get_week_label(created_at_str):
created_at = datetime.datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
created_at = created_at.replace(tzinfo=timezone.utc)
if week1_start <= created_at <= today:
return "Week 1"
elif week2_start <= created_at < week1_start:
return "Week 2"
elif week3_start <= created_at < week2_start:
return "Week 3"
return None # Ignore notes outside the 3-week range
# Pipelines to include
pipelines_to_include = [
"SALES - SOCIAL HOUSING",
"PVT PAY",
"NRLA GENERAL ENQUIRIES",
# "OSMOSIS - SALES",
]
exclude_stage = {
"SALES - SOCIAL HOUSING" : [
"HA TO REENGAGE",
"APPOINTMENT SCHEDULED",
"AWAITING ASSET LIST",
"ASSET LIST RECEIVED",
"ASSET LIST STANDARDISED",
"ROUTE MARCH CREATED",
"SALES - SOCIAL HOUSING": [
"HA TO REENGAGE", "APPOINTMENT SCHEDULED", "AWAITING ASSET LIST",
"ASSET LIST RECEIVED", "ASSET LIST STANDARDISED", "ROUTE MARCH CREATED",
"HA WEEKLY REPORTING",
],
"PVT PAY": [
"LIVE OPPORTUNITY",
"CLOSED LOST",
"INVOICED",
"COLD - KIT",
"CLOSED WON",
"LIVE OPPORTUNITY", "CLOSED LOST", "INVOICED", "COLD - KIT", "CLOSED WON",
],
"NRLA GENERAL ENQUIRIES": [
"CUSTOMER CONTACTED",
"LOST",
"COLD",
"CUSTOMER CONTACTED", "LOST", "COLD",
]
}
include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include]
exclude_stage_upper = [s.upper().strip() for s in exclude_stage]
notes_data = []
notes_data = defaultdict(list)
pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals")
for pipeline in pipelines.results:
pipeline_name = pipeline.label.upper().strip()
if pipeline_name in pipelines_to_include:
for stage in pipeline.stages:
if stage.label.upper().strip() not in exclude_stage[pipeline_name]:
for deal_id in hubspot.get_all_deals_from_stage_id(stage.id):
notes = hubspot.get_notes_from_deals_id(deal_id)
if stage.label.upper().strip() not in [s.upper() for s in exclude_stage.get(pipeline_name, [])]:
for deals in hubspot.get_all_deals_from_stage_id(stage.id):
time.sleep(1)
deal_notes_by_week = {"Week 1": [], "Week 2": [], "Week 3": []}
notes = hubspot.get_notes_from_deals_id(deals["deal_id"])
for note in notes:
deal_name = hubspot.get_deal_name_by_id(deal_id)
week_label = get_week_label(note["created_at"])
if not week_label:
continue
html_body = note['note']
soup = BeautifulSoup(html_body, "html.parser")
plain_text = soup.get_text(separator="\n") # Keeps line breaks
notes_data.append({
"note_body": plain_text,
"deal_name": deal_name, # Include deal_id to relate the note to the deal
"pipeline_name": pipeline.label # Add the pipeline name
plain_text = soup.get_text(separator="\n")
deal_notes_by_week[week_label].append(plain_text)
if any(deal_notes_by_week.values()):
deal_name = hubspot.get_deal_name_by_id(deals["deal_id"])
owner_name = "not assigned"
if deals["deal_owner"]:
owner_name = hubspot.get_owner_name_from_id(deals['deal_owner'])
notes_data[pipeline_name].append({
"Deal Name": deal_name.upper(),
"Deal Owner": owner_name,
"Deal Stage": stage.label.upper(),
"Value": deals["value"],
"Notes Week 1": "\n---\n".join(deal_notes_by_week["Week 1"]),
"Notes Week 2": "\n---\n".join(deal_notes_by_week["Week 2"]),
"Notes Week 3": "\n---\n".join(deal_notes_by_week["Week 3"]),
})
time.sleep(2)
print("delay to not bombard the server")
time.sleep(2)
notes_df = pd.DataFrame(notes_data)
notes_df.to_csv("output.csv")
df = notes_df
# Create Excel Workbook
wb = Workbook()
wb.remove(wb.active) # Remove default sheet
wb.remove(wb.active)
for pipeline, group_df in df.groupby("pipeline_name"):
ws = wb.create_sheet(title=pipeline[:31]) # Excel sheet name limit = 31 chars
for pipeline, deals in notes_data.items():
ws = wb.create_sheet(title=pipeline[:31])
# Sort by deal name
group_df = group_df.sort_values("deal_name")
headers = ["Deal Name", "Deal Owner", "Deal Stage", "Value", "Notes Week 1", "Notes Week 2", "Notes Week 3"]
ws.append(headers)
for cell in ws[1]:
cell.font = Font(bold=True)
current_row = 1
for deal_name, deal_notes in group_df.groupby("deal_name"):
# Bold header for each deal
ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}")
ws.cell(row=current_row, column=1).font = Font(bold=True)
current_row += 1
for row in deals:
# Normalize notes to always be lists
week_notes = {}
for week in range(1, 4):
key = f"Notes Week {week}"
note_data = row.get(key, [])
if isinstance(note_data, str):
note_data = [note_data]
week_notes[week] = note_data
# Notes for the deal
for note in deal_notes["note_body"]:
ws.cell(row=current_row, column=2, value=note)
current_row += 1
# Get first note per week (if any)
first_notes = [week_notes[week][0] if len(week_notes[week]) > 0 else "" for week in range(1, 4)]
# Add a blank row between groups
current_row += 1
# Add main deal row + first notes
ws.append([
row["Deal Name"],
row["Deal Owner"],
row["Deal Stage"],
row["Value"],
*first_notes
])
# Save to Excel
from datetime import datetime, timedelta
today = datetime.today()
# Determine max number of remaining notes
max_additional_notes = max(len(week_notes[week]) for week in range(1, 4)) - 1
# Add remaining notes
for i in range(1, max_additional_notes + 1):
note_row = ["", "", "", ""] # Empty deal columns
for week in range(1, 4):
notes = week_notes[week]
note = notes[i] if i < len(notes) else ""
note_row.append(note)
ws.append(note_row)
# Generate file name with next Mondays date
days_ahead = (7 - today.weekday()) % 7
days_ahead = 7 if days_ahead == 0 else days_ahead # If today is Monday, get *next* Monday
days_ahead = 7 if days_ahead == 0 else days_ahead
next_monday = today + timedelta(days=days_ahead)
formatted = next_monday.strftime("%d-%m-%Y Monday")
file_name = f"{formatted} DEAL_NOTES_FROM_HUBSPOT.xlsx"
wb.save(file_name)
output_path = os.path.abspath(file_name)
wb.save(output_path)
# Upload to SharePoint
sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA)
sharepoint_client.upload_file(output_path, f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",file_name)
sharepoint_client.upload_file(
output_path,
f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",
file_name
)

View file

@ -3,6 +3,7 @@ from enum import Enum
from hubspot.crm.deals import PublicObjectSearchRequest
from hubspot.crm.deals.models import SimplePublicObjectInput
from etl.hubSpotClient.types import SubmissionInfoFromDeal
import time
@ -20,10 +21,17 @@ class HubSpotClient():
def get_all_deals(self):
return self.client.crm.deals.get_all()
def get_owner_name_from_id(self, owner_id):
owner = self.client.crm.owners.owners_api.get_by_id(owner_id)
time.sleep(0.5)
first_name = owner.first_name or ""
last_name = owner.last_name or ""
return f"{first_name} {last_name}".strip()
def get_deal_name_by_id(self, deal_id):
try:
deal = self.client.crm.deals.basic_api.get_by_id(deal_id)
time.sleep(0.5)
return deal.properties.get("dealname", "No deal name")
except Exception as e:
return "Unknown Deal" # Fallback if the deal name is not found
@ -66,6 +74,7 @@ class HubSpotClient():
all_notes.append({
"note_id": note.id,
"note": note_body,
"created_at": note.created_at.strftime("%Y-%m-%d %H:%M:%S"),
})
return all_notes
@ -84,6 +93,8 @@ class HubSpotClient():
}],
properties=[
"dealname",
"amount",
"hubspot_owner_id",
],
limit=200,
after=after,
@ -96,7 +107,11 @@ class HubSpotClient():
all_deals = []
for deal in found_deals:
all_deals.append(deal.id)
all_deals.append({
"deal_id": deal.id,
"value": deal.properties["amount"],
"deal_owner": deal.properties.get("hubspot_owner_id"),
})
return all_deals
def get_deals_from_deal_stage(self, deal_stage: DealStage):

View file

@ -44,7 +44,7 @@ sp.move_deals_to_completed(deal_ids)
P3) Write documentation for tech demos from Khalims demo
Tuesday
P1) - Get ready for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
P1) - Get read for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
P2) Review deem score with last weeks deem score values to ensure accuracy
P3) Figure out what to do if I see an address that isn't registered but surveyrod
"""