This commit is contained in:
Jun-te Kim 2025-12-12 19:49:11 +00:00
parent c64b871b74
commit ff28c5654f
9 changed files with 359 additions and 166 deletions

21
.vscode/settings.json vendored
View file

@ -10,6 +10,27 @@
}
},
// Allow VSCode native keybindings to override Vim when needed
"vim.handleKeys": {
"<C-p>": false,
"<C-P>": false,
"<C-S-p>": false,
"<C-c>": false,
"<C-v>": false,
"<C-S-v>": false,
"<C-S-c>": false,
"<C-S-e>": false,
"<C-j>": false,
"<C-b>": false
},
// Terminal copy/paste via Ctrl+Shift+C / Ctrl+Shift+V
"terminal.integrated.copyOnSelection": false,
"terminal.integrated.commandsToSkipShell": [
"workbench.action.terminal.copySelection",
"workbench.action.terminal.paste"
],
// Hot reload setting that needs to be in user settings
// Ctrl + Shit + P , Prefernce: Open User Setting (JSON)

View file

@ -1,139 +1,301 @@
# pages/sales_forecast.py
import dash
from dash import html, dcc, dash_table, Input, Output
from dash import html, dcc, dash_table, Input, Output, State, ctx
import dash_bootstrap_components as dbc
import pandas as pd
from datetime import datetime
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
from dashboard.components.pivot_charts import week_start_monday
import os
dash.register_page(__name__, path="/sales-forecast", name="Sales Forecast")
import json
# -----------------------
# Load base dataframe
# -----------------------
dash.register_page(__name__, path="/sales-forecast", name="Sales Forecast")
SAFE_DELIM = "\\\\"
# -----------------------------------------------------
# Load & Build Master DF
# Helpers
# -----------------------------------------------------
def build_master_df(local=False):
if local is False:
def weeks_between(start, end):
start = pd.to_datetime(start, errors="coerce")
end = pd.to_datetime(end, errors="coerce")
if pd.isna(start) or pd.isna(end) or start > end:
return []
current = pd.to_datetime(week_start_monday(start))
end_week = pd.to_datetime(week_start_monday(end))
weeks = []
while current <= end_week:
weeks.append(current)
current += pd.Timedelta(weeks=1)
return weeks
# -----------------------------------------------------
# Build Forecast DF
# -----------------------------------------------------
def build_master_df(local=False) -> pd.DataFrame:
if not local:
s3 = FileManager()
key, path, data = s3.download_and_read_latest(
_, _, data = s3.download_and_read_latest(
bucket="retrofit-data-dev",
prefix="hubspot_insight/sales_forecast/",
path="sales_forecast",
)
else:
file_path = os.path.join(os.path.dirname(__file__), "data.json")
with open(file_path, "r") as f:
with open(os.path.join(os.path.dirname(__file__), "data.json")) as f:
data = json.load(f)
hubspot_data = jsonReader(data)
for p in hubspot_data.line_item_names:
df = hubspot_data.generate_df_via_product_type(p)
rows = []
# show each deal
for work_type in hubspot_data.line_item_names:
for deal in hubspot_data.deals_by_line_item.get(work_type, []):
return hubspot_data
if deal["company_info"]["name"] == "Apple":
continue
price = next(
(
float(item.get("price", 0))
for item in deal.get("line_items", [])
if work_type.lower() in item.get("name", "").lower()
),
0,
)
start = pd.to_datetime(
hubspot_data.to_date_only(
deal["deal_properties"].get("expected_project_start_date")
),
errors="coerce",
)
end = pd.to_datetime(
hubspot_data.to_date_only(
deal["deal_properties"].get("expected_project_end_date")
),
errors="coerce",
)
if pd.isna(start):
continue
if pd.notna(end) and end >= start:
weeks = weeks_between(start, end)
weekly_rev = price / len(weeks) if weeks else 0
else:
weeks = [pd.to_datetime(week_start_monday(start))]
weekly_rev = price
for w in weeks:
rows.append(
{
"hubspot_id": str(deal["deal_properties"]["deal_id"]),
"deal_name": deal["deal_properties"]["dealname"],
"company_name": deal["company_info"]["name"],
"work_type": work_type,
"Planned Week": w.strftime("%Y-%m-%d"),
"revenue": weekly_rev,
}
)
df = pd.DataFrame(rows)
df["revenue"] = pd.to_numeric(df["revenue"], errors="coerce").fillna(0)
return df
# Load data once (refresh button can rebuild)
df = build_master_df()
# -----------------------
# Page Layout
# -----------------------
# -----------------------------------------------------
# Layout
# -----------------------------------------------------
layout = html.Div(
[
html.H1("Sales Forecast", className="text-center"),
html.Hr(),
layout = html.Div([
# ---------------- SUMMARY TABLES ----------------
html.H3("Revenue by Work Type"),
dash_table.DataTable(
id="sf-worktype-table",
sort_action="native",
style_cell={"textAlign": "center"},
),
html.H1("Sales Forecast", className="text-center"),
html.Hr(),
html.P(
"This page projects expected revenue and job volume into future weeks "
"based on existing HubSpot data.",
className="text-center text-muted"
),
html.H3("Revenue by Company"),
dash_table.DataTable(
id="sf-company-table",
sort_action="native",
style_cell={"textAlign": "center"},
),
html.Hr(),
html.Hr(),
dcc.Dropdown(
id="forecast-product-filter",
options=[{"label": p, "value": p} for p in sorted(df["product_type"].unique())],
multi=True,
placeholder="Filter by product type…",
style={"width": "400px", "margin": "0 auto"},
),
# ---------------- WEEKLY FUTURE VIEW ----------------
html.H3("Weekly Planned Revenue (£)"),
dash_table.DataTable(
id="sf-weekly-table",
page_size=20,
style_table={"overflowX": "scroll"},
style_cell={"textAlign": "center"},
),
html.Br(),
html.Hr(),
dash_table.DataTable(
id="forecast-table",
page_size=20,
style_table={"overflowX": "auto"},
style_cell={"textAlign": "center"},
),
dcc.Graph(id="sf-weekly-revenue-graph"),
html.Hr(),
html.H2("Forecasted Revenue (£)", className="text-center"),
dcc.Graph(id="forecast-revenue-graph"),
html.H2("Forecasted Job Volume", className="text-center mt-4"),
dcc.Graph(id="forecast-volume-graph"),
])
# -----------------------
# Callbacks
# -----------------------
@dash.callback(
Output("forecast-table", "data"),
Output("forecast-table", "columns"),
Output("forecast-revenue-graph", "figure"),
Output("forecast-volume-graph", "figure"),
Input("forecast-product-filter", "value"),
# ---------------- MODAL (NAMESPACED) ----------------
dbc.Modal(
[
dbc.ModalHeader("HubSpot Deals"),
dbc.ModalBody(id="sf-modal-body"),
dbc.ModalFooter(
dbc.Button("Close", id="sf-close-modal", className="ms-auto")
),
],
id="sf-hubspot-modal",
size="lg",
is_open=False,
),
]
)
def build_forecast(products):
df_filtered = df.copy()
if products:
df_filtered = df_filtered[df_filtered["product_type"].isin(products)]
# -----------------------------------------------------
# Build Tables + Graph
# -----------------------------------------------------
@dash.callback(
Output("sf-worktype-table", "data"),
Output("sf-worktype-table", "columns"),
Output("sf-company-table", "data"),
Output("sf-company-table", "columns"),
Output("sf-weekly-table", "data"),
Output("sf-weekly-table", "columns"),
Output("sf-weekly-revenue-graph", "figure"),
Input("sf-weekly-table", "id"), # run once
)
def build_outputs(_):
# ----------------------------------------
# Basic aggregation per week (extend later)
# ----------------------------------------
weekly = df_filtered.groupby("Planned Week").agg(
jobs=("hubspot_id", "count"),
revenue=("price", "sum")
).reset_index()
# -------- Revenue by work type --------
by_work = (
df.groupby("work_type")
.agg(
revenue=("revenue", "sum"),
jobs=("hubspot_id", "nunique"),
)
.reset_index()
)
weekly = weekly.sort_values("Planned Week")
# -------- Revenue by company --------
by_company = (
df.groupby("company_name")
.agg(
revenue=("revenue", "sum"),
jobs=("hubspot_id", "nunique"),
)
.reset_index()
)
# ----------------------------------------
# TABLE
# ----------------------------------------
columns = [{"name": c, "id": c} for c in weekly.columns]
data = weekly.to_dict("records")
# -------- Weekly pivot --------
pivot = (
df.groupby(["work_type", "Planned Week"])
.agg(
revenue=("revenue", "sum"),
ids=("hubspot_id", lambda x: SAFE_DELIM.join(sorted(set(x)))),
)
.reset_index()
)
revenue_tbl = pivot.pivot(
index="work_type", columns="Planned Week", values="revenue"
).fillna(0)
ids_tbl = pivot.pivot(
index="work_type", columns="Planned Week", values="ids"
).fillna("")
revenue_tbl["Work Type"] = revenue_tbl.index
ids_tbl["Work Type"] = ids_tbl.index
weekly = revenue_tbl.merge(ids_tbl, on="Work Type", suffixes=("", "_ids"))
# -------- Weekly graph --------
weekly_rev = (
df.groupby("Planned Week")["revenue"]
.sum()
.reset_index()
.sort_values("Planned Week")
)
# ----------------------------------------
# GRAPHS
# ----------------------------------------
import plotly.express as px
revenue_fig = px.line(
weekly,
fig = px.line(
weekly_rev,
x="Planned Week",
y="revenue",
title="Expected Revenue per Week"
markers=True,
title="Weekly Planned Revenue (£)",
)
volume_fig = px.line(
weekly,
x="Planned Week",
y="jobs",
title="Expected Job Count per Week"
return (
by_work.to_dict("records"),
[{"name": c.replace("_", " ").title(), "id": c} for c in by_work.columns],
by_company.to_dict("records"),
[{"name": c.replace("_", " ").title(), "id": c} for c in by_company.columns],
weekly.to_dict("records"),
[{"name": c, "id": c} for c in weekly.columns],
fig,
)
return data, columns, revenue_fig, volume_fig
# -----------------------------------------------------
# Modal: HubSpot debug (SINGLE CALLBACK, NAMESPACED)
# -----------------------------------------------------
def id_to_link(deal_id):
url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}"
match = df.loc[df["hubspot_id"] == deal_id]
label = match.iloc[0]["deal_name"] if not match.empty else deal_id
return html.Li(html.A(label, href=url, target="_blank"))
@dash.callback(
Output("sf-hubspot-modal", "is_open"),
Output("sf-modal-body", "children"),
Output("sf-weekly-table", "active_cell"),
Input("sf-weekly-table", "active_cell"),
Input("sf-close-modal", "n_clicks"),
State("sf-weekly-table", "data"),
State("sf-hubspot-modal", "is_open"),
)
def open_modal(cell, close_click, table_data, is_open):
triggered = ctx.triggered_id
if triggered == "sf-close-modal":
return False, "", None
if not cell:
return is_open, "", None
row = table_data[cell["row"]]
col = cell["column_id"]
if col == "Work Type":
return True, html.P("Select a week column to view HubSpot deals."), None
ids = row.get(f"{col}_ids", "")
if not ids:
return True, html.P("No deals recorded for this cell."), None
return (
True,
html.Ul([id_to_link(d) for d in ids.split(SAFE_DELIM)]),
None,
)
# TODO: FOROGT TO ADD QUANTITY - IDIOT

View file

@ -20,5 +20,4 @@ async def main():
if __name__ == "__main__":
deals = await main()
print(deals)
deals = await main()

View file

@ -42,6 +42,7 @@ async def main():
"2774202608",
"2337194212",
"2870263028",
"2992620766"
])
total = len(deals)
@ -83,6 +84,7 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())
# await main()
fm = FileManager()
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")

View file

@ -5,6 +5,7 @@ from datetime import datetime
from dashboard.services.hubspot_client import Pipeline
from dashboard.services.hubspot_client_async import HubSpotClientAsync
from dashboard.services.file_manager import FileManager
from pprint import pprint
OUTPUT_FILE = "hubspot_deals.json"
@ -12,8 +13,8 @@ OUTPUT_FILE = "hubspot_deals.json"
async def main():
hubspot = HubSpotClientAsync()
deal_id = "234993273058"
print(f"Fetching only deal: {deal_id}")
deal_id = "398392991967" # doesn't have compnay
# deal_id = "253867733217" # has company
try:
data = await hubspot.from_deal_get_info(deal_id)

View file

@ -62,7 +62,8 @@ class FileManager:
def download_and_read_latest(
self,
bucket: str = "retrofit-data-dev",
prefix: str = "hubspot_insight/"
prefix: str = "hubspot_insight/",
path = "",
):
os.makedirs(self.download_dir, exist_ok=True)
@ -91,7 +92,7 @@ class FileManager:
# ------------------------------------------------------
# Download
# ------------------------------------------------------
filename = latest_key.split("/")[-1]
filename = path + "_"+ latest_key.split("/")[-1]
local_path = os.path.join(self.download_dir, filename)
self.s3.download_file(bucket, latest_key, local_path)

View file

@ -127,6 +127,8 @@ class HubSpotClientAsync:
'item_id__monday_com_',
'funding_type',
'coordination_status__stage_1_',
'expected_project_start_date',
'expected_project_end_date'
]
)
@ -138,7 +140,7 @@ class HubSpotClientAsync:
line_items = await self.from_deal_get_line_items(deal_id)
company_id = await self.from_deal_get_associated_company_id(deal_id)
company_info = await self.get_company_information(company_id) if company_id else {}
company_info = await self.get_company_information(company_id)
appointments = await self.from_deal_get_appointments(deal_id)
return {
@ -152,12 +154,19 @@ class HubSpotClientAsync:
# Company Info
# -----------------------------------
async def get_company_information(self, company_id):
if company_id is None:
return {
"name": "NO COMPANY ASSOCIATION IN HUBSPOT - FIX ME"
}
company = await self._run(
self.client.crm.companies.basic_api.get_by_id,
company_id,
properties=['name']
)
return company.properties
return company.properties
# -----------------------------------
# Pipelines

View file

@ -60,64 +60,80 @@ class jsonReader:
def _return_df_from_deal_info(self, deal, product_type):
rows = []
print(deal)
if deal["company_info"]["name"] != "Apple":
if deal["attempts"]:
# Multiple attempts => multiple rows
for attempt in deal["attempts"]:
data = {
"submission_date": self.to_date_only(attempt["submission_date"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"expected_commencement_date": self.to_date_only(attempt["expected_commencement_date"]),
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
),
"deal_name": deal["deal_properties"]["dealname"],
"company_name": deal["company_info"]["name"],
}
data = self._use_different_expected_commencement_data(data, deal)
rows.append(data)
else:
def historical_ecd_value_processes(timestamp):
if timestamp is None or timestamp == '':
return None
dt = datetime.strptime(timestamp, "%Y-%m-%d")
return dt.strftime("%Y-%m-%d")
history = deal["deal_properties"]["expected_commencement_history"]
if deal["attempts"]:
# Multiple attempts => multiple rows
for attempt in deal["attempts"]:
data = {
"submission_date": self.to_date_only(attempt["submission_date"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"expected_commencement_date": self.to_date_only(attempt["expected_commencement_date"]),
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
),
"deal_name": deal["deal_properties"]["dealname"],
"company_name": deal["company_info"]["name"],
}
data = self._use_different_expected_commencement_data(data, deal)
rows.append(data)
else:
def historical_ecd_value_processes(timestamp):
if timestamp is None or timestamp == '':
return None
dt = datetime.strptime(timestamp, "%Y-%m-%d")
return dt.strftime("%Y-%m-%d")
history = deal["deal_properties"]["expected_commencement_history"]
# ---- SORT HISTORY: latest first ----
history_sorted = sorted(
history,
key=lambda h: datetime.strptime(h["timestamp"].split("T")[0], "%Y-%m-%d"),
reverse=True
)
# ---- SORT HISTORY: latest first ----
history_sorted = sorted(
history,
key=lambda h: datetime.strptime(h["timestamp"].split("T")[0], "%Y-%m-%d"),
reverse=True
)
# Extract latest expected commencement date
if history_sorted:
latest = history_sorted[0]
latest_ecd = historical_ecd_value_processes(latest["value"]) # returns YYYY-MM-DD or None
# Extract latest expected commencement date
if history_sorted:
latest = history_sorted[0]
latest_ecd = historical_ecd_value_processes(latest["value"]) # returns YYYY-MM-DD or None
# Convert submission date
raw_submission_date = deal["deal_properties"].get("last_submission_date")
submission_date = self.to_date_only(raw_submission_date) if raw_submission_date else None
# Convert submission date
raw_submission_date = deal["deal_properties"].get("last_submission_date")
submission_date = self.to_date_only(raw_submission_date) if raw_submission_date else None
# Convert both to datetime for comparison
if submission_date and latest_ecd:
dt_sub = datetime.strptime(submission_date, "%Y-%m-%d")
dt_ecd = datetime.strptime(latest_ecd, "%Y-%m-%d")
# Convert both to datetime for comparison
if submission_date and latest_ecd:
dt_sub = datetime.strptime(submission_date, "%Y-%m-%d")
dt_ecd = datetime.strptime(latest_ecd, "%Y-%m-%d")
# Only keep submission date if submission_date > latest ECD
if dt_sub <= dt_ecd:
submission_date = None
else:
# Only keep submission date if submission_date > latest ECD
if dt_sub <= dt_ecd:
submission_date = None
else:
submission_date = None
# 1⃣ Add latest expected commencement date WITH conditional submission date
# 1⃣ Add latest expected commencement date WITH conditional submission date
data = {
"submission_date": submission_date,
"expected_commencement_date": latest_ecd,
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
),
"deal_name": deal["deal_properties"]["dealname"],
"company_name": deal["company_info"]["name"],
}
data = self._use_different_expected_commencement_data(data, deal)
rows.append(data)
# 2⃣ Add the remaining history WITHOUT submission date
for attempt in history_sorted[1:]:
data = {
"submission_date": submission_date,
"expected_commencement_date": latest_ecd,
"submission_date": None,
"expected_commencement_date": historical_ecd_value_processes(attempt["value"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
@ -130,23 +146,6 @@ class jsonReader:
data = self._use_different_expected_commencement_data(data, deal)
rows.append(data)
# 2⃣ Add the remaining history WITHOUT submission date
for attempt in history_sorted[1:]:
data = {
"submission_date": None,
"expected_commencement_date": historical_ecd_value_processes(attempt["value"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
),
"deal_name": deal["deal_properties"]["dealname"],
"company_name": deal["company_info"]["name"],
}
data = self._use_different_expected_commencement_data(data, deal)
rows.append(data)
@ -170,8 +169,6 @@ class jsonReader:
def find_all_job_with_line_item(self):
for i, deal in enumerate(self.raw_data):
if len(deal["line_items"])>0:
print(deal)
print(i)
break
def print_raw_data(self):

View file

@ -1 +1,2 @@
cd backend && poetry run python src/dashboard/app.py