added a script that allow domna notes to be extracted from deals

This commit is contained in:
Jun-te Kim 2025-04-16 17:48:01 +00:00
parent 2ccee63a8c
commit 2fafc0d4fb
4 changed files with 226 additions and 1 deletions

View file

@ -0,0 +1,109 @@
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl.styles import Font
hubspot = HubSpotClient()
import time
pipelines_to_include =[
"SALES - SOCIAL HOUSING",
"PVT PAY",
"NRLA GENERAL ENQUIRIES",
# "OSMOSIS - SALES",
]
exclude_stage = {
"SALES - SOCIAL HOUSING" : [
"HA TO REENGAGE",
"APPOINTMENT SCHEDULED",
"AWAITING ASSET LIST",
"ASSET LIST RECEIVED",
"ASSET LIST STANDARDISED",
"ROUTE MARCH CREATED",
"HA WEEKLY REPORTING",
],
"PVT PAY": [
"LIVE OPPORTUNITY",
"CLOSED LOST",
"INVOICED",
"COLD - KIT",
"CLOSED WON",
],
"NRLA GENERAL ENQUIRIES": [
"CUSTOMER CONTACTED",
"LOST",
"COLD",
]
}
include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include]
exclude_stage_upper = [s.upper().strip() for s in exclude_stage]
notes_data = []
pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals")
for pipeline in pipelines.results:
pipeline_name = pipeline.label.upper().strip()
if pipeline_name in pipelines_to_include:
for stage in pipeline.stages:
if stage.label.upper().strip() not in exclude_stage[pipeline_name]:
for deal_id in hubspot.get_all_deals_from_stage_id(stage.id):
notes = hubspot.get_notes_from_deals_id(deal_id)
for note in notes:
deal_name = hubspot.get_deal_name_by_id(deal_id)
html_body = note['note']
soup = BeautifulSoup(html_body, "html.parser")
plain_text = soup.get_text(separator="\n") # Keeps line breaks
notes_data.append({
"note_body": plain_text,
"deal_name": deal_name, # Include deal_id to relate the note to the deal
"pipeline_name": pipeline.label # Add the pipeline name
})
time.sleep(0.75)
print("delay to not bombard the server")
notes_df = pd.DataFrame(notes_data)
notes_df.to_csv("output.csv")
df = notes_df
wb = Workbook()
wb.remove(wb.active) # Remove default sheet
for pipeline, group_df in df.groupby("pipeline_name"):
ws = wb.create_sheet(title=pipeline[:31]) # Excel sheet name limit = 31 chars
# Sort by deal name
group_df = group_df.sort_values("deal_name")
current_row = 1
for deal_name, deal_notes in group_df.groupby("deal_name"):
# Bold header for each deal
ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}")
ws.cell(row=current_row, column=1).font = Font(bold=True)
current_row += 1
# Notes for the deal
for note in deal_notes["note_body"]:
ws.cell(row=current_row, column=2, value=note)
current_row += 1
# Add a blank row between groups
current_row += 1
# Save to Excel
file_name = "DEAL_NOTES_FROM_HUBSPOT.xlsx"
wb.save(file_name)
output_path = os.path.abspath(file_name)
sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA)
sharepoint_client.upload_file(output_path, f"/02. Sales and Marketing/02. DEAL Notes from Hubspot/{previous_monday}",file_name)

View file

@ -4,6 +4,8 @@ from hubspot.crm.deals import PublicObjectSearchRequest
from hubspot.crm.deals.models import SimplePublicObjectInput
from etl.hubSpotClient.types import SubmissionInfoFromDeal
class DealStage(Enum):
SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914"
SURVEYED_NO_ACCESS_NEED_SIGN_OFF = "1617223915"
@ -17,7 +19,85 @@ class HubSpotClient():
def get_all_deals(self):
return self.client.crm.deals.get_all()
def get_deal_name_by_id(self, deal_id):
try:
deal = self.client.crm.deals.basic_api.get_by_id(deal_id)
return deal.properties.get("dealname", "No deal name")
except Exception as e:
return "Unknown Deal" # Fallback if the deal name is not found
def get_notes_from_deals_id(self, deals_id):
from hubspot.crm.objects import PublicObjectSearchRequest
found_notes = []
after = None
while True:
# Correct filter for notes associated with the given deal ID
search_request = PublicObjectSearchRequest(
filter_groups=[{
"filters": [{
"propertyName": "associations.deal", # Filter by association to the deal
"operator": "EQ",
"value": deals_id,
}]
}],
properties=["hs_note_body", "hubspot_owner_id"], # Properties of the note you need
limit=200,
after=after,
)
# Call the search API
response = self.client.crm.objects.search_api.do_search(object_type="notes", public_object_search_request=search_request)
# Add the results to the found_notes list
found_notes.extend(response.results)
# Handle pagination if more results are available
if not response.paging or not response.paging.next:
break
after = response.paging.next.after
all_notes = []
for note in found_notes:
# Extract note content and author information
note_body = note.properties.get("hs_note_body", "No content")
# Collect note details in a dictionary
all_notes.append({
"note_id": note.id,
"note": note_body,
})
return all_notes
def get_all_deals_from_stage_id(self, stage_id):
found_deals = []
after = None
while True:
search_request = PublicObjectSearchRequest(
filter_groups=[{
"filters": [{
"propertyName": "dealstage",
"operator": "EQ",
"value": stage_id,
}]
}],
properties=[
"dealname",
],
limit=200,
after=after,
)
response = self.client.crm.deals.search_api.do_search(search_request)
found_deals.extend(response.results)
if not response.paging or not response.paging.next:
break
after = response.paging.next.after
all_deals = []
for deal in found_deals:
all_deals.append(deal.id)
return all_deals
def get_deals_from_deal_stage(self, deal_stage: DealStage):
found_deals = []

37
poetry.lock generated
View file

@ -61,6 +61,29 @@ files = [
astroid = ["astroid (>=2,<4)"]
test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
[[package]]
name = "beautifulsoup4"
version = "4.13.4"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.7.0"
groups = ["main"]
files = [
{file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"},
{file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"},
]
[package.dependencies]
soupsieve = ">1.2"
typing-extensions = ">=4.0.0"
[package.extras]
cchardet = ["cchardet"]
chardet = ["chardet"]
charset-normalizer = ["charset-normalizer"]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "certifi"
version = "2025.1.31"
@ -1684,6 +1707,18 @@ files = [
{file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
]
[[package]]
name = "soupsieve"
version = "2.6"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
]
[[package]]
name = "sqlalchemy"
version = "2.0.40"
@ -1925,4 +1960,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.12"
content-hash = "55a974b3a81d57c429f61ee6a12a84d38f5c703fdfdfdf2553bec6ba21c29bf5"
content-hash = "9b3e5a8f963d63fbb5fafd8595901358d10aba9f5261b398b9051504ce9320c2"

View file

@ -21,6 +21,7 @@ dependencies = [
"pytest (>=8.3.5,<9.0.0)",
"hubspot-api-client (>=11.1.0,<12.0.0)",
"monday (>=2.0.1,<3.0.0)",
"beautifulsoup4 (>=4.13.4,<5.0.0)",
]
[tool.poetry]