script works?

2026-06-30 13:10:56 +00:00 · 2025-04-17 14:04:19 +00:00 · 2025-04-17 14:04:19 +00:00 · c7dc758d28
commit c7dc758d28
parent 04a27c4353
3 changed files with 132 additions and 74 deletions
--- a/etl/dimitra_hubspot_notes_gather.py
+++ b/etl/dimitra_hubspot_notes_gather.py
@ -1,116 +1,159 @@
 import os
-
-os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
-os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
-os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
-
-from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
-from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
+from datetime import timedelta, timezone
+import datetime
 import pandas as pd
 from bs4 import BeautifulSoup
 from openpyxl import Workbook
 from openpyxl.styles import Font
-
+from etl.scraper.scraper import SharePointScraper, SharePointInstaller, previous_monday
+from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
+from collections import defaultdict
+import time
+# Auth credentials
+os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
+os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
+os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"

 hubspot = HubSpotClient()
-import time
-pipelines_to_include =[
+
+# Week calculation
+today = datetime.datetime.now(datetime.UTC)
+week1_start = today - timedelta(days=7)
+week2_start = today - timedelta(days=14)
+week3_start = today - timedelta(days=21)
+
+def get_week_label(created_at_str):
+    created_at = datetime.datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
+    created_at = created_at.replace(tzinfo=timezone.utc)
+    if week1_start <= created_at <= today:
+        return "Week 1"
+    elif week2_start <= created_at < week1_start:
+        return "Week 2"
+    elif week3_start <= created_at < week2_start:
+        return "Week 3"
+    return None  # Ignore notes outside the 3-week range
+
+# Pipelines to include
+pipelines_to_include = [
    "SALES - SOCIAL HOUSING",
    "PVT PAY",
    "NRLA GENERAL ENQUIRIES",
-    # "OSMOSIS - SALES",
-
 ]
+
 exclude_stage = {
-    "SALES - SOCIAL HOUSING" : [
-        "HA TO REENGAGE",
-        "APPOINTMENT SCHEDULED",
-        "AWAITING ASSET LIST",
-        "ASSET LIST RECEIVED",
-        "ASSET LIST STANDARDISED",
-        "ROUTE MARCH CREATED",
+    "SALES - SOCIAL HOUSING": [
+        "HA TO REENGAGE", "APPOINTMENT SCHEDULED", "AWAITING ASSET LIST", 
+        "ASSET LIST RECEIVED", "ASSET LIST STANDARDISED", "ROUTE MARCH CREATED", 
        "HA WEEKLY REPORTING",
    ],
    "PVT PAY": [
-        "LIVE OPPORTUNITY",
-        "CLOSED LOST",
-        "INVOICED",
-        "COLD - KIT",
-        "CLOSED WON",
-
+        "LIVE OPPORTUNITY", "CLOSED LOST", "INVOICED", "COLD - KIT", "CLOSED WON",
    ],
    "NRLA GENERAL ENQUIRIES": [
-        "CUSTOMER CONTACTED",
-        "LOST",
-        "COLD",
+        "CUSTOMER CONTACTED", "LOST", "COLD",
    ]
 }

-include_pipeline_upper = [s.upper().strip() for s in pipelines_to_include]
-exclude_stage_upper = [s.upper().strip() for s in exclude_stage]
-notes_data = []
+notes_data = defaultdict(list)
 pipelines = hubspot.client.crm.pipelines.pipelines_api.get_all(object_type="deals")
+
 for pipeline in pipelines.results:
    pipeline_name = pipeline.label.upper().strip()
    if pipeline_name in pipelines_to_include:
        for stage in pipeline.stages:
-            if stage.label.upper().strip() not in exclude_stage[pipeline_name]:
-                for deal_id in hubspot.get_all_deals_from_stage_id(stage.id):
-                    notes = hubspot.get_notes_from_deals_id(deal_id)
+            if stage.label.upper().strip() not in [s.upper() for s in exclude_stage.get(pipeline_name, [])]:
+                for deals in hubspot.get_all_deals_from_stage_id(stage.id):
+                    time.sleep(1)
+                    deal_notes_by_week = {"Week 1": [], "Week 2": [], "Week 3": []}
+                    notes = hubspot.get_notes_from_deals_id(deals["deal_id"])
+                    
                    for note in notes:
-                        deal_name = hubspot.get_deal_name_by_id(deal_id)
+                        week_label = get_week_label(note["created_at"])
+                        if not week_label:
+                            continue
                        html_body = note['note']
                        soup = BeautifulSoup(html_body, "html.parser")
-                        plain_text = soup.get_text(separator="\n")  # Keeps line breaks
-                        notes_data.append({
-                            "note_body": plain_text,
-                            "deal_name": deal_name,  # Include deal_id to relate the note to the deal
-                            "pipeline_name": pipeline.label  # Add the pipeline name
+                        plain_text = soup.get_text(separator="\n")
+                        deal_notes_by_week[week_label].append(plain_text)
+
+                    if any(deal_notes_by_week.values()):
+                        deal_name = hubspot.get_deal_name_by_id(deals["deal_id"])
+                        owner_name = "not assigned"
+                        if deals["deal_owner"]:
+                            owner_name = hubspot.get_owner_name_from_id(deals['deal_owner'])
+
+                        notes_data[pipeline_name].append({
+                            "Deal Name": deal_name.upper(),
+                            "Deal Owner": owner_name,
+                            "Deal Stage": stage.label.upper(),
+                            "Value": deals["value"],
+                            "Notes Week 1": "\n---\n".join(deal_notes_by_week["Week 1"]),
+                            "Notes Week 2": "\n---\n".join(deal_notes_by_week["Week 2"]),
+                            "Notes Week 3": "\n---\n".join(deal_notes_by_week["Week 3"]),
                        })
-
-                        time.sleep(2)
                        print("delay to not bombard the server")
+                        time.sleep(2)

-notes_df = pd.DataFrame(notes_data)
-notes_df.to_csv("output.csv")
-df = notes_df
-
+# Create Excel Workbook
 wb = Workbook()
-wb.remove(wb.active)  # Remove default sheet
+wb.remove(wb.active)

-for pipeline, group_df in df.groupby("pipeline_name"):
-    ws = wb.create_sheet(title=pipeline[:31])  # Excel sheet name limit = 31 chars
+for pipeline, deals in notes_data.items():
+    ws = wb.create_sheet(title=pipeline[:31])

-    # Sort by deal name
-    group_df = group_df.sort_values("deal_name")
+    headers = ["Deal Name", "Deal Owner", "Deal Stage", "Value", "Notes Week 1", "Notes Week 2", "Notes Week 3"]
+    ws.append(headers)
+    for cell in ws[1]:
+        cell.font = Font(bold=True)

-    current_row = 1
-    for deal_name, deal_notes in group_df.groupby("deal_name"):
-        # Bold header for each deal
-        ws.cell(row=current_row, column=1, value=f"Deal Stage: {deal_name}")
-        ws.cell(row=current_row, column=1).font = Font(bold=True)
-        current_row += 1
+    for row in deals:
+        # Normalize notes to always be lists
+        week_notes = {}
+        for week in range(1, 4):
+            key = f"Notes Week {week}"
+            note_data = row.get(key, [])
+            if isinstance(note_data, str):
+                note_data = [note_data]
+            week_notes[week] = note_data

-        # Notes for the deal
-        for note in deal_notes["note_body"]:
-            ws.cell(row=current_row, column=2, value=note)
-            current_row += 1
+        # Get first note per week (if any)
+        first_notes = [week_notes[week][0] if len(week_notes[week]) > 0 else "" for week in range(1, 4)]

-        # Add a blank row between groups
-        current_row += 1
+        # Add main deal row + first notes
+        ws.append([
+            row["Deal Name"],
+            row["Deal Owner"],
+            row["Deal Stage"],
+            row["Value"],
+            *first_notes
+        ])

-# Save to Excel
-from datetime import datetime, timedelta
-today = datetime.today()
+        # Determine max number of remaining notes
+        max_additional_notes = max(len(week_notes[week]) for week in range(1, 4)) - 1
+
+        # Add remaining notes
+        for i in range(1, max_additional_notes + 1):
+            note_row = ["", "", "", ""]  # Empty deal columns
+            for week in range(1, 4):
+                notes = week_notes[week]
+                note = notes[i] if i < len(notes) else ""
+                note_row.append(note)
+            ws.append(note_row)
+
+
+# Generate file name with next Monday’s date
 days_ahead = (7 - today.weekday()) % 7
-days_ahead = 7 if days_ahead == 0 else days_ahead  # If today is Monday, get *next* Monday
+days_ahead = 7 if days_ahead == 0 else days_ahead
 next_monday = today + timedelta(days=days_ahead)
-
 formatted = next_monday.strftime("%d-%m-%Y Monday")
-
-
 file_name = f"{formatted} DEAL_NOTES_FROM_HUBSPOT.xlsx"
-wb.save(file_name)
 output_path = os.path.abspath(file_name)
+wb.save(output_path)
+
+# Upload to SharePoint
 sharepoint_client = SharePointScraper(SharePointInstaller.DOMNA)
-sharepoint_client.upload_file(output_path, f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",file_name)
+sharepoint_client.upload_file(
+    output_path,
+    f"02. Sales and Marketing/02. Deal Notes from Hubspot/{formatted}",
+    file_name
+)
--- a/etl/hubSpotClient/hubspot.py
+++ b/etl/hubSpotClient/hubspot.py
@ -3,6 +3,7 @@ from enum import Enum
 from hubspot.crm.deals import PublicObjectSearchRequest
 from hubspot.crm.deals.models import SimplePublicObjectInput
 from etl.hubSpotClient.types import SubmissionInfoFromDeal
+import time



@ -20,10 +21,17 @@ class HubSpotClient():
    def get_all_deals(self):
        return self.client.crm.deals.get_all()

+    def get_owner_name_from_id(self, owner_id):
+        owner = self.client.crm.owners.owners_api.get_by_id(owner_id)
+        time.sleep(0.5)
+        first_name = owner.first_name or ""
+        last_name = owner.last_name or ""
+        return f"{first_name} {last_name}".strip()
        
    def get_deal_name_by_id(self, deal_id):
        try:
            deal = self.client.crm.deals.basic_api.get_by_id(deal_id)
+            time.sleep(0.5)
            return deal.properties.get("dealname", "No deal name")
        except Exception as e:
            return "Unknown Deal"  # Fallback if the deal name is not found
@ -66,6 +74,7 @@ class HubSpotClient():
            all_notes.append({
                "note_id": note.id,
                "note": note_body,
+                "created_at": note.created_at.strftime("%Y-%m-%d %H:%M:%S"),
            })
        return all_notes
    
@ -84,6 +93,8 @@ class HubSpotClient():
                }],
                properties=[
                    "dealname",
+                    "amount",
+                    "hubspot_owner_id",
                ],
                limit=200,
                after=after,
@ -96,7 +107,11 @@ class HubSpotClient():

        all_deals = []
        for deal in found_deals:
-            all_deals.append(deal.id)
+            all_deals.append({
+                "deal_id": deal.id,
+                "value": deal.properties["amount"],
+                "deal_owner": deal.properties.get("hubspot_owner_id"),
+            })
        return all_deals

    def get_deals_from_deal_stage(self, deal_stage: DealStage):
--- a/etl/hubspot_to_invoice.py
+++ b/etl/hubspot_to_invoice.py
@ -44,7 +44,7 @@ sp.move_deals_to_completed(deal_ids)
 P3) Write documentation for tech demos from Khalims demo

 Tuesday 
- P1) - Get ready for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
+ P1) - Get read for demo, 3 examples of solar ( JJC AND SCIS), 3 examples of cavity wall ( SCIS and JJC) 12 in total
 P2) Review deem score with last weeks deem score values to ensure accuracy
 P3) Figure out what to do if I see an address that isn't registered but surveyrod
 """