mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
added a script that allow domna notes to be extracted from deals
This commit is contained in:
parent
2ccee63a8c
commit
2fafc0d4fb
4 changed files with 226 additions and 1 deletions
109
etl/dimitra_hubspot_notes_gather.py
Normal file
109
etl/dimitra_hubspot_notes_gather.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import os
|
||||
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
|
||||
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font
|
||||
|
||||
|
||||
hubspot = HubSpotClient()
|
||||
import time
|
||||
pipelines_to_include =[
|
||||
"SALES - SOCIAL HOUSING",
|
||||
"PVT PAY",
|
||||
"NRLA GENERAL ENQUIRIES",
|
||||
# "OSMOSIS - SALES",
|
||||
|
||||
]
|
||||
exclude_stage = {
|
||||
"SALES - SOCIAL HOUSING" : [
|
||||
"HA TO REENGAGE",
|
||||
"APPOINTMENT SCHEDULED",
|
||||
"AWAITING ASSET LIST",
|
||||
"ASSET LIST RECEIVED",
|
||||
"ASSET LIST STANDARDISED",
|
||||
"ROUTE MARCH CREATED",
|
||||
"HA WEEKLY REPORTING",
|
||||
],
|
||||
"PVT PAY": [
|
||||
"LIVE OPPORTUNITY",
|
||||
"CLOSED LOST",
|
||||
"INVOICED",
|
||||
"COLD - KIT",
|
||||
"CLOSED WON",
|
||||
|
||||
],
|
||||
"NRLA GENERAL ENQUIRIES": [
|
||||
"CUSTOMER CONTACTED",
|
||||
"LOST",
|
||||
"COLD",
|
||||
]
|
||||
}
|
||||
|
||||
include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include]
|
||||
exclude_stage_upper = [s.upper().strip() for s in exclude_stage]
|
||||
notes_data = []
|
||||
pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals")
|
||||
for pipeline in pipelines.results:
|
||||
pipeline_name = pipeline.label.upper().strip()
|
||||
if pipeline_name in pipelines_to_include:
|
||||
for stage in pipeline.stages:
|
||||
if stage.label.upper().strip() not in exclude_stage[pipeline_name]:
|
||||
for deal_id in hubspot.get_all_deals_from_stage_id(stage.id):
|
||||
notes = hubspot.get_notes_from_deals_id(deal_id)
|
||||
for note in notes:
|
||||
deal_name = hubspot.get_deal_name_by_id(deal_id)
|
||||
html_body = note['note']
|
||||
soup = BeautifulSoup(html_body, "html.parser")
|
||||
plain_text = soup.get_text(separator="\n") # Keeps line breaks
|
||||
notes_data.append({
|
||||
"note_body": plain_text,
|
||||
"deal_name": deal_name, # Include deal_id to relate the note to the deal
|
||||
"pipeline_name": pipeline.label # Add the pipeline name
|
||||
})
|
||||
|
||||
time.sleep(0.75)
|
||||
print("delay to not bombard the server")
|
||||
|
||||
notes_df = pd.DataFrame(notes_data)
|
||||
notes_df.to_csv("output.csv")
|
||||
df = notes_df
|
||||
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active) # Remove default sheet
|
||||
|
||||
for pipeline, group_df in df.groupby("pipeline_name"):
|
||||
ws = wb.create_sheet(title=pipeline[:31]) # Excel sheet name limit = 31 chars
|
||||
|
||||
# Sort by deal name
|
||||
group_df = group_df.sort_values("deal_name")
|
||||
|
||||
current_row = 1
|
||||
for deal_name, deal_notes in group_df.groupby("deal_name"):
|
||||
# Bold header for each deal
|
||||
ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}")
|
||||
ws.cell(row=current_row, column=1).font = Font(bold=True)
|
||||
current_row += 1
|
||||
|
||||
# Notes for the deal
|
||||
for note in deal_notes["note_body"]:
|
||||
ws.cell(row=current_row, column=2, value=note)
|
||||
current_row += 1
|
||||
|
||||
# Add a blank row between groups
|
||||
current_row += 1
|
||||
|
||||
# Save to Excel
|
||||
file_name = "DEAL_NOTES_FROM_HUBSPOT.xlsx"
|
||||
wb.save(file_name)
|
||||
output_path = os.path.abspath(file_name)
|
||||
sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA)
|
||||
sharepoint_client.upload_file(output_path, f"/02. Sales and Marketing/02. DEAL Notes from Hubspot/{previous_monday}",file_name)
|
||||
|
|
@ -4,6 +4,8 @@ from hubspot.crm.deals import PublicObjectSearchRequest
|
|||
from hubspot.crm.deals.models import SimplePublicObjectInput
|
||||
from etl.hubSpotClient.types import SubmissionInfoFromDeal
|
||||
|
||||
|
||||
|
||||
class DealStage(Enum):
|
||||
SURVEYED_COMPLETE_NEEDS_SIGN_OFF = "1617223914"
|
||||
SURVEYED_NO_ACCESS_NEED_SIGN_OFF = "1617223915"
|
||||
|
|
@ -17,7 +19,85 @@ class HubSpotClient():
|
|||
|
||||
def get_all_deals(self):
|
||||
return self.client.crm.deals.get_all()
|
||||
|
||||
|
||||
def get_deal_name_by_id(self, deal_id):
|
||||
try:
|
||||
deal = self.client.crm.deals.basic_api.get_by_id(deal_id)
|
||||
return deal.properties.get("dealname", "No deal name")
|
||||
except Exception as e:
|
||||
return "Unknown Deal" # Fallback if the deal name is not found
|
||||
|
||||
def get_notes_from_deals_id(self, deals_id):
|
||||
from hubspot.crm.objects import PublicObjectSearchRequest
|
||||
found_notes = []
|
||||
after = None
|
||||
while True:
|
||||
# Correct filter for notes associated with the given deal ID
|
||||
search_request = PublicObjectSearchRequest(
|
||||
filter_groups=[{
|
||||
"filters": [{
|
||||
"propertyName": "associations.deal", # Filter by association to the deal
|
||||
"operator": "EQ",
|
||||
"value": deals_id,
|
||||
}]
|
||||
}],
|
||||
properties=["hs_note_body", "hubspot_owner_id"], # Properties of the note you need
|
||||
limit=200,
|
||||
after=after,
|
||||
)
|
||||
# Call the search API
|
||||
response = self.client.crm.objects.search_api.do_search(object_type="notes", public_object_search_request=search_request)
|
||||
|
||||
# Add the results to the found_notes list
|
||||
found_notes.extend(response.results)
|
||||
|
||||
# Handle pagination if more results are available
|
||||
if not response.paging or not response.paging.next:
|
||||
break
|
||||
after = response.paging.next.after
|
||||
|
||||
all_notes = []
|
||||
for note in found_notes:
|
||||
# Extract note content and author information
|
||||
note_body = note.properties.get("hs_note_body", "No content")
|
||||
|
||||
# Collect note details in a dictionary
|
||||
all_notes.append({
|
||||
"note_id": note.id,
|
||||
"note": note_body,
|
||||
})
|
||||
return all_notes
|
||||
|
||||
|
||||
def get_all_deals_from_stage_id(self, stage_id):
|
||||
found_deals = []
|
||||
after = None
|
||||
while True:
|
||||
search_request = PublicObjectSearchRequest(
|
||||
filter_groups=[{
|
||||
"filters": [{
|
||||
"propertyName": "dealstage",
|
||||
"operator": "EQ",
|
||||
"value": stage_id,
|
||||
}]
|
||||
}],
|
||||
properties=[
|
||||
"dealname",
|
||||
],
|
||||
limit=200,
|
||||
after=after,
|
||||
)
|
||||
response = self.client.crm.deals.search_api.do_search(search_request)
|
||||
found_deals.extend(response.results)
|
||||
if not response.paging or not response.paging.next:
|
||||
break
|
||||
after = response.paging.next.after
|
||||
|
||||
all_deals = []
|
||||
for deal in found_deals:
|
||||
all_deals.append(deal.id)
|
||||
return all_deals
|
||||
|
||||
def get_deals_from_deal_stage(self, deal_stage: DealStage):
|
||||
found_deals = []
|
||||
|
|
|
|||
37
poetry.lock
generated
37
poetry.lock
generated
|
|
@ -61,6 +61,29 @@ files = [
|
|||
astroid = ["astroid (>=2,<4)"]
|
||||
test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.13.4"
|
||||
description = "Screen-scraping library"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"},
|
||||
{file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
typing-extensions = ">=4.0.0"
|
||||
|
||||
[package.extras]
|
||||
cchardet = ["cchardet"]
|
||||
chardet = ["chardet"]
|
||||
charset-normalizer = ["charset-normalizer"]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2025.1.31"
|
||||
|
|
@ -1684,6 +1707,18 @@ files = [
|
|||
{file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.6"
|
||||
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
|
||||
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlalchemy"
|
||||
version = "2.0.40"
|
||||
|
|
@ -1925,4 +1960,4 @@ files = [
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.12"
|
||||
content-hash = "55a974b3a81d57c429f61ee6a12a84d38f5c703fdfdfdf2553bec6ba21c29bf5"
|
||||
content-hash = "9b3e5a8f963d63fbb5fafd8595901358d10aba9f5261b398b9051504ce9320c2"
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ dependencies = [
|
|||
"pytest (>=8.3.5,<9.0.0)",
|
||||
"hubspot-api-client (>=11.1.0,<12.0.0)",
|
||||
"monday (>=2.0.1,<3.0.0)",
|
||||
"beautifulsoup4 (>=4.13.4,<5.0.0)",
|
||||
]
|
||||
|
||||
[tool.poetry]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue