added json reader for live production

This commit is contained in:
Jun-te Kim 2025-11-30 19:44:49 +00:00
parent 8bbf41cb7d
commit 59f4b1c2be
3 changed files with 391 additions and 211 deletions

View file

@ -0,0 +1,225 @@
import pandas as pd
import plotly.express as px
from datetime import timedelta, datetime
SAFE_DELIM = "\\\\"
# -----------------------------------------------------
# Helper Functions
# -----------------------------------------------------
def week_start_monday(date_str):
if not date_str or pd.isna(date_str):
return None
date = pd.to_datetime(date_str)
monday = date - timedelta(days=date.weekday())
return monday.strftime("%Y-%m-%d")
def get_previous_weeks(selected_week, n=3):
base = pd.to_datetime(selected_week)
return [(base - pd.Timedelta(weeks=i)).strftime("%Y-%m-%d") for i in range(n+1)]
# -----------------------------------------------------
# MASTER PIVOT BUILDER
# -----------------------------------------------------
def build_pivot_tables(df, selected_week):
"""
Returns:
- jobs_records
- jobs_columns
- revenue_records
- revenue_columns
"""
# Determine week window
if selected_week == "All Dates":
week_list = sorted(df["Planned Week"].dropna().unique())
else:
week_list = get_previous_weeks(selected_week)
# Filter data
planned_df = df[df["Planned Week"].isin(week_list)].copy()
completed_df = df[df["Completed Week"].isin(week_list)].copy()
# Group planned
planned = (
planned_df.groupby(["Planned Week", "product_type"])
.agg(
planned_jobs=("hubspot_id", "nunique"),
planned_total=("price", "sum"),
planned_ids=("hubspot_id",
lambda x: SAFE_DELIM.join(sorted(x.astype(str)))))
.reset_index()
)
# Group actual
completed = (
completed_df.groupby(["Completed Week", "product_type"])
.agg(
actual_jobs=("hubspot_id", "nunique"),
actual_total=("price", "sum"),
actual_ids=("hubspot_id",
lambda x: SAFE_DELIM.join(sorted(x.astype(str)))))
.reset_index()
)
planned.rename(columns={"Planned Week": "Week"}, inplace=True)
completed.rename(columns={"Completed Week": "Week"}, inplace=True)
# Merge into dict rows
rows = {}
for _, r in planned.iterrows():
pt, wk = r["product_type"], r["Week"]
rows.setdefault(pt, {})
rows[pt][f"{wk}_planned_jobs"] = r["planned_jobs"]
rows[pt][f"{wk}_planned_total"] = r["planned_total"]
rows[pt][f"{wk}_planned_ids"] = r["planned_ids"]
for _, r in completed.iterrows():
pt, wk = r["product_type"], r["Week"]
rows.setdefault(pt, {})
rows[pt][f"{wk}_actual_jobs"] = r["actual_jobs"]
rows[pt][f"{wk}_actual_total"] = r["actual_total"]
rows[pt][f"{wk}_actual_ids"] = r["actual_ids"]
week_list_sorted = sorted(week_list)
# ---------------- JOBS TABLE ----------------
jobs_records = []
for pt, vals in rows.items():
row = {"Product Type": pt}
for wk in week_list_sorted:
row[f"{wk}_Planned Jobs"] = vals.get(f"{wk}_planned_jobs", 0)
row[f"{wk}_Actual Jobs"] = vals.get(f"{wk}_actual_jobs", 0)
jobs_records.append(row)
# ----- ADD TOTAL ROW -----
total_row = {"Product Type": "TOTAL"}
for wk in week_list_sorted:
total_row[f"{wk}_Planned Jobs"] = sum(r.get(f"{wk}_Planned Jobs", 0) for r in jobs_records)
total_row[f"{wk}_Actual Jobs"] = sum(r.get(f"{wk}_Actual Jobs", 0) for r in jobs_records)
jobs_records.append(total_row)
jobs_columns = [{"name": "Product Type", "id": "Product Type"}]
for wk in week_list_sorted:
jobs_columns.append({"name": f"{wk} Planned Jobs", "id": f"{wk}_Planned Jobs"})
jobs_columns.append({"name": f"{wk} Actual Jobs", "id": f"{wk}_Actual Jobs"})
# ---------------- REVENUE TABLE ----------------
revenue_records = []
for pt, vals in rows.items():
row = {"Product Type": pt}
for wk in week_list_sorted:
row[f"{wk} Planned £"] = vals.get(f"{wk}_planned_total", 0)
row[f"{wk} Actual £"] = vals.get(f"{wk}_actual_total", 0)
revenue_records.append(row)
# ----- ADD TOTAL ROW -----
total_rev = {"Product Type": "TOTAL"}
for wk in week_list_sorted:
total_rev[f"{wk} Planned £"] = sum(r.get(f"{wk} Planned £", 0) for r in revenue_records)
total_rev[f"{wk} Actual £"] = sum(r.get(f"{wk} Actual £", 0) for r in revenue_records)
revenue_records.append(total_rev)
revenue_columns = [{"name": "Product Type", "id": "Product Type"}]
for wk in week_list_sorted:
revenue_columns.append({"name": f"{wk} Planned £", "id": f"{wk} Planned £"})
revenue_columns.append({"name": f"{wk} Actual £", "id": f"{wk} Actual £"})
return jobs_records, jobs_columns, revenue_records, revenue_columns, week_list_sorted
# -----------------------------------------------------
# LINE GRAPH BUILDER
# -----------------------------------------------------
def build_line_charts(jobs_records, revenue_records, week_list_sorted):
"""
Returns:
- jobs_fig (px.line)
- revenue_fig (px.line)
"""
# ---------------- JOBS CHART ----------------
jobs_df = pd.DataFrame({
"Week": week_list_sorted,
"Planned Jobs": [
sum(r.get(f"{wk}_Planned Jobs", 0) for r in jobs_records[:-1]) # exclude TOTAL
for wk in week_list_sorted
],
"Actual Jobs": [
sum(r.get(f"{wk}_Actual Jobs", 0) for r in jobs_records[:-1])
for wk in week_list_sorted
]
})
jobs_fig = px.line(
jobs_df,
x="Week",
y=["Planned Jobs", "Actual Jobs"],
markers=True,
title="Jobs — Planned vs Actual"
)
jobs_fig.update_layout(xaxis_title="Week", yaxis_title="Jobs")
# ---------------- REVENUE CHART ----------------
revenue_df = pd.DataFrame({
"Week": week_list_sorted,
"Planned £": [
sum(r.get(f"{wk} Planned £", 0) for r in revenue_records[:-1])
for wk in week_list_sorted
],
"Actual £": [
sum(r.get(f"{wk} Actual £", 0) for r in revenue_records[:-1])
for wk in week_list_sorted
]
})
revenue_fig = px.line(
revenue_df,
x="Week",
y=["Planned £", "Actual £"],
markers=True,
title="Revenue — Planned vs Actual"
)
revenue_fig.update_layout(xaxis_title="Week", yaxis_title="£")
return jobs_fig, revenue_fig
# -----------------------------------------------------
# MAIN COMBINED FUNCTION (import this)
# -----------------------------------------------------
def build_pivot_tables_and_charts(df, selected_week):
"""
Main function you import.
Returns:
- jobs_records
- jobs_cols
- revenue_records
- revenue_cols
- jobs_fig
- revenue_fig
"""
(
jobs_records,
jobs_cols,
revenue_records,
revenue_cols,
week_list_sorted
) = build_pivot_tables(df, selected_week)
jobs_fig, revenue_fig = build_line_charts(
jobs_records,
revenue_records,
week_list_sorted
)
return (
jobs_records, jobs_cols,
revenue_records, revenue_cols,
jobs_fig, revenue_fig
)

View file

@ -1,103 +1,77 @@
from dash import Dash, html, dcc, dash_table, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
from datetime import timedelta
from datetime import datetime, timedelta
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
from datetime import datetime
from dashboard.components.pivot_charts import build_pivot_tables_and_charts, week_start_monday
SAFE_DELIM = "\\\\"
# -----------------------------------------------------
# Helper: Current Monday
# -----------------------------------------------------
def current_week_start():
today = datetime.today()
monday = today - timedelta(days=today.weekday())
return monday.strftime("%Y-%m-%d")
# -----------------------------------------------------
# Product types
# Product Types
# -----------------------------------------------------
PRODUCT_TYPES = [
"Empty Cavity - ECO4",
"Solar PV - ECO4",
"Extract & Fill - ECO4",
"Solar PV + Heating Upgrade - ECO4",
"Solar PV + HHRSH - ECO4",
"ECO4 empty cavity survey",
"ECO4 Retrofit Coordination",
"ECO4 Solar with client contribution",
"EPC",
"Empty Cavity - ECO4", "Solar PV - ECO4", "Extract & Fill - ECO4",
"Solar PV + Heating Upgrade - ECO4", "Solar PV + HHRSH - ECO4",
"ECO4 empty cavity survey", "ECO4 Retrofit Coordination",
"ECO4 Solar with client contribution", "EPC",
]
# -----------------------------------------------------
# Helpers
# Load & Build Master DF
# -----------------------------------------------------
def week_start_monday(date_str):
if not date_str or pd.isna(date_str):
return None
date = pd.to_datetime(date_str)
monday = date - timedelta(days=date.weekday()) # Monday = week start
return monday.strftime("%Y-%m-%d")
# -----------------------------------------------------
# Build master DF (single S3 read)
# -----------------------------------------------------
def build_master_df(product_types):
def build_master_df():
s3 = FileManager()
key, path, data = s3.download_and_read_latest()
hubspot_data = jsonReader(data)
frames = []
for p in product_types:
for p in PRODUCT_TYPES:
df = hubspot_data.generate_df_via_product_type(p)
if df is None or not isinstance(df, pd.DataFrame) or df.empty:
if df is None or df.empty:
continue
df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
df["product_type"] = p
df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
# Planned week
df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday)
# Raw completed week
df["raw_completed_week"] = df.get("submission_date", None)
df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday)
# Completed-week logic
def corrected_completed_week(row):
# corrected completed week logic
def corrected(row):
planned = row["Planned Week"]
submitted = row["raw_completed_week"]
if not submitted:
return None # no completion
return None
if not planned:
return submitted # fallback
return submitted
return planned if submitted > planned else submitted
# override if submitted > planned
if submitted > planned:
return planned
return submitted
df["Completed Week"] = df.apply(corrected_completed_week, axis=1)
df["Completed Week"] = df.apply(corrected, axis=1)
df.drop(columns=["raw_completed_week"], inplace=True)
frames.append(df)
if not frames:
return pd.DataFrame()
return pd.concat(frames, ignore_index=True)
return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
# -----------------------------------------------------
# Initial DF load
# -----------------------------------------------------
df = build_master_df(PRODUCT_TYPES)
df = build_master_df()
# -----------------------------------------------------
@ -112,68 +86,85 @@ server = app.server
# -----------------------------------------------------
app.layout = html.Div([
html.H1("Planned vs Completed", style={"textAlign": "center"}),
html.H1("Planned vs Completed — Pivot Tables + Charts",
style={"textAlign": "center"}),
# ---------------- FILTERS ----------------
dcc.Dropdown(
id="date-filter",
options=[{"label": "All Dates", "value": "All Dates"}] +
[{"label": d, "value": d}
for d in sorted(df["Planned Week"].dropna().unique())],
value=current_week_start() if current_week_start() in df["Planned Week"].unique() else "All Dates",
value=current_week_start()
if current_week_start() in df["Planned Week"].unique()
else "All Dates",
clearable=False,
style={"width": "300px", "margin": "20px auto"}
),
html.Button(
"Refresh Data",
id="refresh-btn",
n_clicks=0,
style={"margin": "10px", "padding": "10px 20px"}
),
html.Button("Refresh Data", id="refresh-btn", n_clicks=0),
html.H2("Planned", style={"marginTop": "30px"}),
html.Hr(),
# ---------------- JOBS TABLE ----------------
html.H2("Jobs Pivot Table", style={"textAlign": "center"}),
dash_table.DataTable(
id="planned-table",
columns=[
{"name": "Product Type", "id": "Product Type"},
{"name": "Total Deals", "id": "Total Deals"},
{"name": "Total Amount (£)", "id": "Total Amount (£)"},
],
page_size=20,
id="jobs-table",
page_size=40,
sort_action="native",
row_selectable="single",
cell_selectable=False,
cell_selectable=True,
style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
style_cell_conditional=[
{"if": {"column_id": "Product Type"}, "textAlign": "left"},
]
{"if": {"column_id": "Product Type"},
"textAlign": "left",
"fontWeight": "bold",
"minWidth": "150px"},
],
style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
),
html.H2("Actual", style={"marginTop": "40px"}),
html.Hr(),
# ---------------- REVENUE TABLE ----------------
html.H2("Revenue (£) Pivot Table", style={"textAlign": "center"}),
dash_table.DataTable(
id="completed-table",
columns=[
{"name": "Product Type", "id": "Product Type"},
{"name": "Total Deals", "id": "Total Deals"},
{"name": "Total Amount (£)", "id": "Total Amount (£)"},
],
page_size=20,
id="revenue-table",
page_size=40,
sort_action="native",
row_selectable="single",
cell_selectable=False,
cell_selectable=True,
style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
style_cell_conditional=[
{"if": {"column_id": "Product Type"}, "textAlign": "left"},
]
{"if": {"column_id": "Product Type"},
"textAlign": "left",
"fontWeight": "bold",
"minWidth": "150px"},
],
style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
),
html.Hr(),
# ---------------- CHARTS ----------------
html.H2("Jobs Line Chart", style={"textAlign": "center"}),
dcc.Graph(id="jobs-graph", style={"height": "400px"}),
html.Hr(),
html.H2("Revenue Line Chart (£)", style={"textAlign": "center"}),
dcc.Graph(id="revenue-graph", style={"height": "400px"}),
html.Hr(),
# ---------------- MODAL ----------------
dbc.Modal(
[
dbc.ModalHeader("HubSpot IDs"),
dbc.ModalBody(id="modal-body"),
dbc.ModalFooter(
dbc.Button("Close", id="close-modal", className="ms-auto")
),
dbc.ModalFooter(dbc.Button("Close", id="close-modal", className="ms-auto")),
],
id="hubspot-modal",
size="lg",
@ -183,121 +174,81 @@ app.layout = html.Div([
# -----------------------------------------------------
# Callback: Table Updates
# Callback: Update tables + charts
# -----------------------------------------------------
@app.callback(
Output("planned-table", "data"),
Output("completed-table", "data"),
Output("jobs-table", "data"),
Output("jobs-table", "columns"),
Output("revenue-table", "data"),
Output("revenue-table", "columns"),
Output("jobs-graph", "figure"),
Output("revenue-graph", "figure"),
Input("date-filter", "value"),
Input("refresh-btn", "n_clicks")
Input("refresh-btn", "n_clicks"),
)
def update_tables(selected_date, n_clicks):
def update_outputs(selected_week, n_clicks):
global df
if n_clicks > 0:
df = build_master_df(PRODUCT_TYPES)
df = build_master_df()
# Filter
if selected_date == "All Dates":
planned_df = df.copy()
completed_df = df.copy()
else:
planned_df = df[df["Planned Week"] == selected_date].copy()
completed_df = df[df["Completed Week"] == selected_date].copy()
# ------------- PLANNED GROUPING -------------
planned_grouped = (
planned_df.groupby(["Planned Week", "product_type"], dropna=False)
.agg(
Total_Deals=("hubspot_id", "nunique"),
Total_Amount=("price", "sum"),
HubSpot_IDs=("hubspot_id",
lambda x: ", ".join(sorted(set(x.astype(str)))))
)
.reset_index()
)
planned_grouped.rename(columns={
"product_type": "Product Type",
"Total_Deals": "Total Deals",
"Total_Amount": "Total Amount (£)",
"HubSpot_IDs": "HubSpot IDs",
}, inplace=True)
planned_records = [] if planned_grouped.empty else \
planned_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
# ------------- COMPLETED GROUPING -------------
completed_df = completed_df[completed_df["Completed Week"].notna()]
completed_grouped = (
completed_df.groupby(["Completed Week", "product_type"], dropna=False)
.agg(
Total_Deals=("hubspot_id", "nunique"),
Total_Amount=("price", "sum"),
HubSpot_IDs=("hubspot_id",
lambda x: ", ".join(sorted(set(x.astype(str)))))
)
.reset_index()
)
# ❗ You forgot these renames — THIS caused the crash.
completed_grouped.rename(columns={
"product_type": "Product Type",
"Total_Deals": "Total Deals",
"Total_Amount": "Total Amount (£)",
"HubSpot_IDs": "HubSpot IDs",
}, inplace=True)
completed_records = [] if completed_grouped.empty else \
completed_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
return planned_records, completed_records
return build_pivot_tables_and_charts(df, selected_week)
# -----------------------------------------------------
# Modal: Display HubSpot IDs when clicking a cell
# -----------------------------------------------------
def id_to_link(deal_id):
url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}"
return html.Li(
html.A(deal_id, href=url, target="_blank", style={"textDecoration": "none"})
)
return html.Li(html.A(deal_id, href=url, target="_blank"))
# -----------------------------------------------------
# Callback: Modal for Planned + Completed tables
# -----------------------------------------------------
@app.callback(
Output("hubspot-modal", "is_open"),
Output("modal-body", "children"),
Output("planned-table", "selected_rows"),
Output("completed-table", "selected_rows"),
Input("planned-table", "selected_rows"),
Input("completed-table", "selected_rows"),
Output("jobs-table", "active_cell"),
Output("revenue-table", "active_cell"),
Input("jobs-table", "active_cell"),
Input("revenue-table", "active_cell"),
Input("close-modal", "n_clicks"),
State("planned-table", "data"),
State("completed-table", "data"),
State("jobs-table", "data"),
State("revenue-table", "data"),
State("hubspot-modal", "is_open")
)
def open_modal(planned_rows, completed_rows, close_click,
planned_data, completed_data, is_open):
def open_modal(jobs_cell, revenue_cell, close_click, jobs_data, revenue_data, is_open):
if close_click:
return False, "", [], []
return False, "", None, None
# Planned table
if planned_rows:
row = planned_data[planned_rows[0]]
ids = row["HubSpot IDs"].split(", ")
links = [id_to_link(i) for i in ids]
return True, html.Ul(links), [], []
# ----- Jobs Table Click -----
if jobs_cell:
row = jobs_data[jobs_cell["row"]]
col = jobs_cell["column_id"]
# Completed table
if completed_rows:
row = completed_data[completed_rows[0]]
ids = row["HubSpot IDs"].split(", ")
links = [id_to_link(i) for i in ids]
return True, html.Ul(links), [], []
wk = col.replace(" Planned Jobs", "").replace(" Actual Jobs", "")
id_key = f"{wk}_planned_ids" if "Planned" in col else f"{wk}_actual_ids"
return False, "", [], []
ids = row.get(id_key, "")
links = [id_to_link(i) for i in ids.split(SAFE_DELIM)] if ids else []
return True, html.Ul(links), None, None
# ----- Revenue Table Click -----
if revenue_cell:
row = revenue_data[revenue_cell["row"]]
col = revenue_cell["column_id"]
wk = col.replace(" Planned £", "").replace(" Actual £", "")
id_key = f"{wk}_planned_ids" if "Planned" in col else f"{wk}_actual_ids"
ids = row.get(id_key, "")
links = [id_to_link(i) for i in ids.split(SAFE_DELIM)] if ids else []
return True, html.Ul(links), None, None
return False, "", None, None
# -----------------------------------------------------
if __name__ == "__main__":
app.run(debug=True)

View file

@ -48,7 +48,8 @@ class jsonReader:
rows = []
for deals in self.deals_by_line_item[product_type]:
row = self._return_df_from_deal_info(deals, product_type)
rows.append(row)
if row is not None:
rows.append(row)
if rows:
return pd.concat(rows, ignore_index=True)
@ -56,7 +57,6 @@ class jsonReader:
def _return_df_from_deal_info(self, deal, product_type):
rows = []
if "ECO" in product_type or "EPC" in product_type:
if deal["attempts"]:
# Multiple attempts => multiple rows
@ -69,7 +69,8 @@ class jsonReader:
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
),
"deal_name": deal["deal_properties"]["dealname"],
})
else:
def historical_ecd_value_processes(timestamp):
@ -87,49 +88,52 @@ class jsonReader:
)
# Extract latest expected commencement date
latest = history_sorted[0]
latest_ecd = historical_ecd_value_processes(latest["value"]) # returns YYYY-MM-DD or None
if history_sorted:
latest = history_sorted[0]
latest_ecd = historical_ecd_value_processes(latest["value"]) # returns YYYY-MM-DD or None
# Convert submission date
raw_submission_date = deal["deal_properties"].get("last_submission_date")
submission_date = self.to_date_only(raw_submission_date) if raw_submission_date else None
# Convert submission date
raw_submission_date = deal["deal_properties"].get("last_submission_date")
submission_date = self.to_date_only(raw_submission_date) if raw_submission_date else None
# Convert both to datetime for comparison
if submission_date and latest_ecd:
dt_sub = datetime.strptime(submission_date, "%Y-%m-%d")
dt_ecd = datetime.strptime(latest_ecd, "%Y-%m-%d")
# Convert both to datetime for comparison
if submission_date and latest_ecd:
dt_sub = datetime.strptime(submission_date, "%Y-%m-%d")
dt_ecd = datetime.strptime(latest_ecd, "%Y-%m-%d")
# Only keep submission date if submission_date > latest ECD
if dt_sub <= dt_ecd:
# Only keep submission date if submission_date > latest ECD
if dt_sub <= dt_ecd:
submission_date = None
else:
submission_date = None
else:
submission_date = None
# 1⃣ Add latest expected commencement date WITH conditional submission date
rows.append({
"submission_date": submission_date,
"expected_commencement_date": latest_ecd,
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
})
# 2⃣ Add the remaining history WITHOUT submission date
for attempt in history_sorted[1:]:
# 1⃣ Add latest expected commencement date WITH conditional submission date
rows.append({
"submission_date": None,
"expected_commencement_date": historical_ecd_value_processes(attempt["value"]),
"submission_date": submission_date,
"expected_commencement_date": latest_ecd,
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
),
"deal_name": deal["deal_properties"]["dealname"],
})
# 2⃣ Add the remaining history WITHOUT submission date
for attempt in history_sorted[1:]:
rows.append({
"submission_date": None,
"expected_commencement_date": historical_ecd_value_processes(attempt["value"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
),
"deal_name": deal["deal_properties"]["dealname"],
})
# Return a DataFrame or None
return pd.DataFrame(rows) if rows else None