mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
script works?
This commit is contained in:
parent
04a27c4353
commit
c7dc758d28
3 changed files with 132 additions and 74 deletions
|
|
@ -1,116 +1,159 @@
|
|||
import os
|
||||
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
|
||||
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
|
||||
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
|
||||
from datetime import timedelta, timezone
|
||||
import datetime
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font
|
||||
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
|
||||
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
|
||||
from collections import defaultdict
|
||||
import time
|
||||
# Auth credentials
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
|
||||
|
||||
hubspot = HubSpotClient()
|
||||
import time
|
||||
pipelines_to_include =[
|
||||
|
||||
# Week calculation
|
||||
today = datetime.datetime.now(datetime.UTC)
|
||||
week1_start = today - timedelta(days=7)
|
||||
week2_start = today - timedelta(days=14)
|
||||
week3_start = today - timedelta(days=21)
|
||||
|
||||
def get_week_label(created_at_str):
|
||||
created_at = datetime.datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
|
||||
created_at = created_at.replace(tzinfo=timezone.utc)
|
||||
if week1_start <= created_at <= today:
|
||||
return "Week 1"
|
||||
elif week2_start <= created_at < week1_start:
|
||||
return "Week 2"
|
||||
elif week3_start <= created_at < week2_start:
|
||||
return "Week 3"
|
||||
return None # Ignore notes outside the 3-week range
|
||||
|
||||
# Pipelines to include
|
||||
pipelines_to_include = [
|
||||
"SALES - SOCIAL HOUSING",
|
||||
"PVT PAY",
|
||||
"NRLA GENERAL ENQUIRIES",
|
||||
# "OSMOSIS - SALES",
|
||||
|
||||
]
|
||||
|
||||
exclude_stage = {
|
||||
"SALES - SOCIAL HOUSING" : [
|
||||
"HA TO REENGAGE",
|
||||
"APPOINTMENT SCHEDULED",
|
||||
"AWAITING ASSET LIST",
|
||||
"ASSET LIST RECEIVED",
|
||||
"ASSET LIST STANDARDISED",
|
||||
"ROUTE MARCH CREATED",
|
||||
"SALES - SOCIAL HOUSING": [
|
||||
"HA TO REENGAGE", "APPOINTMENT SCHEDULED", "AWAITING ASSET LIST",
|
||||
"ASSET LIST RECEIVED", "ASSET LIST STANDARDISED", "ROUTE MARCH CREATED",
|
||||
"HA WEEKLY REPORTING",
|
||||
],
|
||||
"PVT PAY": [
|
||||
"LIVE OPPORTUNITY",
|
||||
"CLOSED LOST",
|
||||
"INVOICED",
|
||||
"COLD - KIT",
|
||||
"CLOSED WON",
|
||||
|
||||
"LIVE OPPORTUNITY", "CLOSED LOST", "INVOICED", "COLD - KIT", "CLOSED WON",
|
||||
],
|
||||
"NRLA GENERAL ENQUIRIES": [
|
||||
"CUSTOMER CONTACTED",
|
||||
"LOST",
|
||||
"COLD",
|
||||
"CUSTOMER CONTACTED", "LOST", "COLD",
|
||||
]
|
||||
}
|
||||
|
||||
include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include]
|
||||
exclude_stage_upper = [s.upper().strip() for s in exclude_stage]
|
||||
notes_data = []
|
||||
notes_data = defaultdict(list)
|
||||
pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals")
|
||||
|
||||
for pipeline in pipelines.results:
|
||||
pipeline_name = pipeline.label.upper().strip()
|
||||
if pipeline_name in pipelines_to_include:
|
||||
for stage in pipeline.stages:
|
||||
if stage.label.upper().strip() not in exclude_stage[pipeline_name]:
|
||||
for deal_id in hubspot.get_all_deals_from_stage_id(stage.id):
|
||||
notes = hubspot.get_notes_from_deals_id(deal_id)
|
||||
if stage.label.upper().strip() not in [s.upper() for s in exclude_stage.get(pipeline_name, [])]:
|
||||
for deals in hubspot.get_all_deals_from_stage_id(stage.id):
|
||||
time.sleep(1)
|
||||
deal_notes_by_week = {"Week 1": [], "Week 2": [], "Week 3": []}
|
||||
notes = hubspot.get_notes_from_deals_id(deals["deal_id"])
|
||||
|
||||
for note in notes:
|
||||
deal_name = hubspot.get_deal_name_by_id(deal_id)
|
||||
week_label = get_week_label(note["created_at"])
|
||||
if not week_label:
|
||||
continue
|
||||
html_body = note['note']
|
||||
soup = BeautifulSoup(html_body, "html.parser")
|
||||
plain_text = soup.get_text(separator="\n") # Keeps line breaks
|
||||
notes_data.append({
|
||||
"note_body": plain_text,
|
||||
"deal_name": deal_name, # Include deal_id to relate the note to the deal
|
||||
"pipeline_name": pipeline.label # Add the pipeline name
|
||||
plain_text = soup.get_text(separator="\n")
|
||||
deal_notes_by_week[week_label].append(plain_text)
|
||||
|
||||
if any(deal_notes_by_week.values()):
|
||||
deal_name = hubspot.get_deal_name_by_id(deals["deal_id"])
|
||||
owner_name = "not assigned"
|
||||
if deals["deal_owner"]:
|
||||
owner_name = hubspot.get_owner_name_from_id(deals['deal_owner'])
|
||||
|
||||
notes_data[pipeline_name].append({
|
||||
"Deal Name": deal_name.upper(),
|
||||
"Deal Owner": owner_name,
|
||||
"Deal Stage": stage.label.upper(),
|
||||
"Value": deals["value"],
|
||||
"Notes Week 1": "\n---\n".join(deal_notes_by_week["Week 1"]),
|
||||
"Notes Week 2": "\n---\n".join(deal_notes_by_week["Week 2"]),
|
||||
"Notes Week 3": "\n---\n".join(deal_notes_by_week["Week 3"]),
|
||||
})
|
||||
|
||||
time.sleep(2)
|
||||
print("delay to not bombard the server")
|
||||
time.sleep(2)
|
||||
|
||||
notes_df = pd.DataFrame(notes_data)
|
||||
notes_df.to_csv("output.csv")
|
||||
df = notes_df
|
||||
|
||||
# Create Excel Workbook
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active) # Remove default sheet
|
||||
wb.remove(wb.active)
|
||||
|
||||
for pipeline, group_df in df.groupby("pipeline_name"):
|
||||
ws = wb.create_sheet(title=pipeline[:31]) # Excel sheet name limit = 31 chars
|
||||
for pipeline, deals in notes_data.items():
|
||||
ws = wb.create_sheet(title=pipeline[:31])
|
||||
|
||||
# Sort by deal name
|
||||
group_df = group_df.sort_values("deal_name")
|
||||
headers = ["Deal Name", "Deal Owner", "Deal Stage", "Value", "Notes Week 1", "Notes Week 2", "Notes Week 3"]
|
||||
ws.append(headers)
|
||||
for cell in ws[1]:
|
||||
cell.font = Font(bold=True)
|
||||
|
||||
current_row = 1
|
||||
for deal_name, deal_notes in group_df.groupby("deal_name"):
|
||||
# Bold header for each deal
|
||||
ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}")
|
||||
ws.cell(row=current_row, column=1).font = Font(bold=True)
|
||||
current_row += 1
|
||||
for row in deals:
|
||||
# Normalize notes to always be lists
|
||||
week_notes = {}
|
||||
for week in range(1, 4):
|
||||
key = f"Notes Week {week}"
|
||||
note_data = row.get(key, [])
|
||||
if isinstance(note_data, str):
|
||||
note_data = [note_data]
|
||||
week_notes[week] = note_data
|
||||
|
||||
# Notes for the deal
|
||||
for note in deal_notes["note_body"]:
|
||||
ws.cell(row=current_row, column=2, value=note)
|
||||
current_row += 1
|
||||
# Get first note per week (if any)
|
||||
first_notes = [week_notes[week][0] if len(week_notes[week]) > 0 else "" for week in range(1, 4)]
|
||||
|
||||
# Add a blank row between groups
|
||||
current_row += 1
|
||||
# Add main deal row + first notes
|
||||
ws.append([
|
||||
row["Deal Name"],
|
||||
row["Deal Owner"],
|
||||
row["Deal Stage"],
|
||||
row["Value"],
|
||||
*first_notes
|
||||
])
|
||||
|
||||
# Save to Excel
|
||||
from datetime import datetime, timedelta
|
||||
today = datetime.today()
|
||||
# Determine max number of remaining notes
|
||||
max_additional_notes = max(len(week_notes[week]) for week in range(1, 4)) - 1
|
||||
|
||||
# Add remaining notes
|
||||
for i in range(1, max_additional_notes + 1):
|
||||
note_row = ["", "", "", ""] # Empty deal columns
|
||||
for week in range(1, 4):
|
||||
notes = week_notes[week]
|
||||
note = notes[i] if i < len(notes) else ""
|
||||
note_row.append(note)
|
||||
ws.append(note_row)
|
||||
|
||||
|
||||
# Generate file name with next Monday’s date
|
||||
days_ahead = (7 - today.weekday()) % 7
|
||||
days_ahead = 7 if days_ahead == 0 else days_ahead # If today is Monday, get *next* Monday
|
||||
days_ahead = 7 if days_ahead == 0 else days_ahead
|
||||
next_monday = today + timedelta(days=days_ahead)
|
||||
|
||||
formatted = next_monday.strftime("%d-%m-%Y Monday")
|
||||
|
||||
|
||||
file_name = f"{formatted} DEAL_NOTES_FROM_HUBSPOT.xlsx"
|
||||
wb.save(file_name)
|
||||
output_path = os.path.abspath(file_name)
|
||||
wb.save(output_path)
|
||||
|
||||
# Upload to SharePoint
|
||||
sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA)
|
||||
sharepoint_client.upload_file(output_path, f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",file_name)
|
||||
sharepoint_client.upload_file(
|
||||
output_path,
|
||||
f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",
|
||||
file_name
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from enum import Enum
|
|||
from hubspot.crm.deals import PublicObjectSearchRequest
|
||||
from hubspot.crm.deals.models import SimplePublicObjectInput
|
||||
from etl.hubSpotClient.types import SubmissionInfoFromDeal
|
||||
import time
|
||||
|
||||
|
||||
|
||||
|
|
@ -20,10 +21,17 @@ class HubSpotClient():
|
|||
def get_all_deals(self):
|
||||
return self.client.crm.deals.get_all()
|
||||
|
||||
def get_owner_name_from_id(self, owner_id):
|
||||
owner = self.client.crm.owners.owners_api.get_by_id(owner_id)
|
||||
time.sleep(0.5)
|
||||
first_name = owner.first_name or ""
|
||||
last_name = owner.last_name or ""
|
||||
return f"{first_name} {last_name}".strip()
|
||||
|
||||
def get_deal_name_by_id(self, deal_id):
|
||||
try:
|
||||
deal = self.client.crm.deals.basic_api.get_by_id(deal_id)
|
||||
time.sleep(0.5)
|
||||
return deal.properties.get("dealname", "No deal name")
|
||||
except Exception as e:
|
||||
return "Unknown Deal" # Fallback if the deal name is not found
|
||||
|
|
@ -66,6 +74,7 @@ class HubSpotClient():
|
|||
all_notes.append({
|
||||
"note_id": note.id,
|
||||
"note": note_body,
|
||||
"created_at": note.created_at.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
})
|
||||
return all_notes
|
||||
|
||||
|
|
@ -84,6 +93,8 @@ class HubSpotClient():
|
|||
}],
|
||||
properties=[
|
||||
"dealname",
|
||||
"amount",
|
||||
"hubspot_owner_id",
|
||||
],
|
||||
limit=200,
|
||||
after=after,
|
||||
|
|
@ -96,7 +107,11 @@ class HubSpotClient():
|
|||
|
||||
all_deals = []
|
||||
for deal in found_deals:
|
||||
all_deals.append(deal.id)
|
||||
all_deals.append({
|
||||
"deal_id": deal.id,
|
||||
"value": deal.properties["amount"],
|
||||
"deal_owner": deal.properties.get("hubspot_owner_id"),
|
||||
})
|
||||
return all_deals
|
||||
|
||||
def get_deals_from_deal_stage(self, deal_stage: DealStage):
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ sp.move_deals_to_completed(deal_ids)
|
|||
P3) Write documentation for tech demos from Khalims demo
|
||||
|
||||
Tuesday
|
||||
P1) - Get ready for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
|
||||
P1) - Get read for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
|
||||
P2) Review deem score with last weeks deem score values to ensure accuracy
|
||||
P3) Figure out what to do if I see an address that isn't registered but surveyrod
|
||||
"""
|
||||
Loading…
Add table
Reference in a new issue