Merge pull request #22 from Hestia-Homes/feature/make_it_live_ready

Feature/make it live ready
This commit is contained in:
Jun-te Kim 2025-12-12 11:32:40 +00:00 committed by GitHub
commit e68ca49adb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 631 additions and 10 deletions

View file

@ -6,18 +6,19 @@ on:
workflow_dispatch:
jobs:
gather_hubspot_data_and_upload_to_s3:
hubspot_raw_to_s3:
name: HubSpot → S3 (raw data)
runs-on: [self-hosted, mist]
timeout-minutes: 720
steps:
- uses: actions/checkout@v4
# Build Docker image using .devcontainer/Dockerfile
- name: Build Docker image
run: |
docker build -f .devcontainer/Dockerfile -t latest-image .
# Install dependencies + run script inside container
- name: Poetry install & run script
- name: Run raw HubSpot export
run: |
docker run \
-u $(id -u):$(id -g) \
@ -28,3 +29,27 @@ jobs:
poetry install &&
poetry run python src/dashboard/scripts/hubspot_to_s3.py
"
hubspot_sales_forecast_to_s3:
name: HubSpot → S3 (sales forecast)
runs-on: [self-hosted, mist]
timeout-minutes: 720
steps:
- uses: actions/checkout@v4
- name: Build Docker image
run: |
docker build -f .devcontainer/Dockerfile -t latest-image .
- name: Run sales forecast export
run: |
docker run \
-u $(id -u):$(id -g) \
-v ${{ github.workspace }}:/workspaces/insight \
-w /workspaces/insight/backend \
latest-image \
bash -c "
poetry install &&
poetry run python src/dashboard/scripts/hubspot_to_s3_sales_forecast.py
"

0
backend/__init__.py Normal file
View file

0
backend/src/__init__.py Normal file
View file

View file

@ -0,0 +1,27 @@
# app.py
from dash import Dash, html, dcc
import dash
import dash_bootstrap_components as dbc
app = Dash(
__name__,
use_pages=True,
external_stylesheets=[dbc.themes.BOOTSTRAP],
)
server = app.server
app.layout = dbc.Container([
html.H1("Welcome to DomnaInsights", className="text-center my-4"),
# Navigation bar
dbc.Nav([
dbc.NavLink("Planned vs Completed", href="/", active="exact"),
dbc.NavLink("Sales Forecast", href="/sales-forecast", active="exact"),
], pills=True, justified=True, className="mb-4"),
dash.page_container # <-- Page content loads here
], fluid=True)
if __name__ == "__main__":
app.run(debug=True)

View file

@ -0,0 +1,302 @@
# pages/planned_vs_completed.py
import dash
from dash import html, dcc, dash_table, Input, Output, State, ctx
import dash_bootstrap_components as dbc
import pandas as pd
from datetime import datetime, timedelta
import json
import os
# from backend.src.dashboard.services.file_manager import FileManager
# from backend.src.dashboard.services.json_reader import jsonReader
# from backend.src.dashboard.components.pivot_charts import (
# build_pivot_tables_and_charts,
# week_start_monday,
# )
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
from dashboard.components.pivot_charts import (
build_pivot_tables_and_charts,
week_start_monday,
)
# -----------------------------------------------------
# Register Page
# -----------------------------------------------------
dash.register_page(__name__, path="/", name="Planned vs Completed")
SAFE_DELIM = "\\\\"
# -----------------------------------------------------
# Helper: Current Monday
# -----------------------------------------------------
def current_week_start():
today = datetime.today()
monday = today - timedelta(days=today.weekday())
return monday.strftime("%Y-%m-%d")
# -----------------------------------------------------
# Load & Build Master DF
# -----------------------------------------------------
def build_master_df(local=False):
if local is False:
s3 = FileManager()
key, path, data = s3.download_and_read_latest()
else:
file_path = os.path.join(os.path.dirname(__file__), "data.json")
with open(file_path, "r") as f:
data = json.load(f)
hubspot_data = jsonReader(data)
frames = []
for p in hubspot_data.line_item_names:
df = hubspot_data.generate_df_via_product_type(p)
if df is None or df.empty:
continue
df["product_type"] = p
df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday)
df["raw_completed_week"] = df.get("submission_date", None)
df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday)
# corrected completed week logic
def corrected(row):
planned = row["Planned Week"]
submitted = row["raw_completed_week"]
if not submitted:
return None
if not planned:
return submitted
return planned if submitted > planned else submitted
df["Completed Week"] = df.apply(corrected, axis=1)
df.drop(columns=["raw_completed_week"], inplace=True)
frames.append(df)
return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
# Load data once (refresh button can rebuild)
df = build_master_df()
# -----------------------------------------------------
# Page Layout
# -----------------------------------------------------
layout = html.Div([
html.H1("Planned vs Completed — Pivot Tables + Charts",
style={"textAlign": "center"}),
# ---------------- FILTERS ----------------
dcc.Dropdown(
id="date-filter",
options=[{"label": "All Dates", "value": "All Dates"}] +
[{"label": d, "value": d}
for d in sorted(df["Planned Week"].dropna().unique())],
value=current_week_start()
if current_week_start() in df["Planned Week"].unique()
else "All Dates",
clearable=False,
style={"width": "300px", "margin": "20px auto"},
),
html.Button("Refresh Data", id="refresh-btn", n_clicks=0),
html.Hr(),
# ---------------- JOBS TABLE ----------------
html.H2("Jobs Pivot Table", style={"textAlign": "center"}),
dash_table.DataTable(
id="jobs-table",
page_size=40,
sort_action="native",
cell_selectable=True,
style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
style_cell_conditional=[
{"if": {"column_id": "Product Type"},
"textAlign": "left",
"fontWeight": "bold",
"minWidth": "150px"},
],
style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
style_data_conditional=[
{
"if": {"filter_query": "{Product Type} = 'TOTAL'"},
"fontWeight": "bold",
"backgroundColor": "#f0f0f0",
}
],
),
html.Hr(),
# ---------------- REVENUE TABLE ----------------
html.H2("Revenue (£) Pivot Table", style={"textAlign": "center"}),
dash_table.DataTable(
id="revenue-table",
page_size=40,
sort_action="native",
cell_selectable=True,
style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
style_cell_conditional=[
{"if": {"column_id": "Product Type"},
"textAlign": "left",
"fontWeight": "bold",
"minWidth": "150px"},
],
style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
style_data_conditional=[
{
"if": {"filter_query": "{Product Type} = 'TOTAL'"},
"fontWeight": "bold",
"backgroundColor": "#f0f0f0",
}
],
),
html.Hr(),
# ---------------- CHARTS ----------------
html.H2("Jobs Line Chart", style={"textAlign": "center"}),
dcc.Graph(id="jobs-graph", style={"height": "400px"}),
html.Hr(),
html.H2("Revenue Line Chart (£)", style={"textAlign": "center"}),
dcc.Graph(id="revenue-graph", style={"height": "400px"}),
html.Hr(),
# ---------------- MODAL ----------------
dbc.Modal(
[
dbc.ModalHeader("HubSpot IDs"),
dbc.ModalBody(id="modal-body"),
dbc.ModalFooter(
dbc.Button("Close", id="close-modal", className="ms-auto")
),
],
id="hubspot-modal",
size="lg",
is_open=False,
),
])
# -----------------------------------------------------
# Callback: Update tables + charts
# -----------------------------------------------------
@dash.callback(
Output("jobs-table", "data"),
Output("jobs-table", "columns"),
Output("revenue-table", "data"),
Output("revenue-table", "columns"),
Output("jobs-graph", "figure"),
Output("revenue-graph", "figure"),
Input("date-filter", "value"),
Input("refresh-btn", "n_clicks"),
)
def update_outputs(selected_week, n_clicks):
global df
if n_clicks > 0:
df = build_master_df()
return build_pivot_tables_and_charts(df, selected_week)
# -----------------------------------------------------
# Modal: Display HubSpot IDs when clicking a cell
# -----------------------------------------------------
def id_to_link(deal_id):
url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}"
match = df.loc[df["hubspot_id"].astype(str) == str(deal_id)]
return html.Li(html.A(match.iloc[0].get("deal_name"), href=url, target="_blank"))
@dash.callback(
Output("hubspot-modal", "is_open"),
Output("modal-body", "children"),
Output("jobs-table", "active_cell"),
Output("revenue-table", "active_cell"),
Input("jobs-table", "active_cell"),
Input("revenue-table", "active_cell"),
Input("close-modal", "n_clicks"),
State("jobs-table", "data"),
State("revenue-table", "data"),
State("hubspot-modal", "is_open"),
)
def open_modal(jobs_cell, revenue_cell, close_click, jobs_data, revenue_data, is_open):
triggered = ctx.triggered_id
# -------------------------
# CLOSE THE MODAL
# -------------------------
if triggered == "close-modal":
return False, "", None, None
# -------------------------
# Helper: renderer for modal content
# -------------------------
def build_modal(row, col_id):
if col_id == "Product Type":
return html.P("This column has no IDs.")
parts = col_id.split(" ")
# Jobs table style → 2025-02-05_Planned
if "_" in parts[0]:
week = parts[0].split("_")[0]
else:
# Revenue table style → 2025-02-05 Planned £
week = parts[0]
label = col_id.lower()
is_planned = "planned" in label
id_key = f"{week}_planned_ids" if is_planned else f"{week}_actual_ids"
raw_ids = row.get(id_key, "")
if not raw_ids:
return html.P("No IDs recorded for this cell.")
ids = raw_ids.split(SAFE_DELIM)
seen = set()
return html.Ul([id_to_link(d) for d in ids if not (d in seen or seen.add(d))])
# -------------------------
# JOBS TABLE CLICK
# -------------------------
if triggered == "jobs-table" and jobs_cell:
row = jobs_data[jobs_cell["row"]]
col_id = jobs_cell["column_id"]
return True, build_modal(row, col_id), None, None
# -------------------------
# REVENUE TABLE CLICK
# -------------------------
if triggered == "revenue-table" and revenue_cell:
row = revenue_data[revenue_cell["row"]]
col_id = revenue_cell["column_id"]
return True, build_modal(row, col_id), None, None
# -------------------------
# DEFAULT
# -------------------------
return is_open, "", None, None

View file

@ -0,0 +1,139 @@
# pages/sales_forecast.py
import dash
from dash import html, dcc, dash_table, Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
from datetime import datetime
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
from dashboard.components.pivot_charts import week_start_monday
import os
dash.register_page(__name__, path="/sales-forecast", name="Sales Forecast")
import json
# -----------------------
# Load base dataframe
# -----------------------
# -----------------------------------------------------
# Load & Build Master DF
# -----------------------------------------------------
def build_master_df(local=False):
if local is False:
s3 = FileManager()
key, path, data = s3.download_and_read_latest(
bucket="retrofit-data-dev",
prefix="hubspot_insight/sales_forecast/",
)
else:
file_path = os.path.join(os.path.dirname(__file__), "data.json")
with open(file_path, "r") as f:
data = json.load(f)
hubspot_data = jsonReader(data)
for p in hubspot_data.line_item_names:
df = hubspot_data.generate_df_via_product_type(p)
# show each deal
return hubspot_data
# Load data once (refresh button can rebuild)
df = build_master_df()
# -----------------------
# Page Layout
# -----------------------
layout = html.Div([
html.H1("Sales Forecast", className="text-center"),
html.P(
"This page projects expected revenue and job volume into future weeks "
"based on existing HubSpot data.",
className="text-center text-muted"
),
html.Hr(),
dcc.Dropdown(
id="forecast-product-filter",
options=[{"label": p, "value": p} for p in sorted(df["product_type"].unique())],
multi=True,
placeholder="Filter by product type…",
style={"width": "400px", "margin": "0 auto"},
),
html.Br(),
dash_table.DataTable(
id="forecast-table",
page_size=20,
style_table={"overflowX": "auto"},
style_cell={"textAlign": "center"},
),
html.Hr(),
html.H2("Forecasted Revenue (£)", className="text-center"),
dcc.Graph(id="forecast-revenue-graph"),
html.H2("Forecasted Job Volume", className="text-center mt-4"),
dcc.Graph(id="forecast-volume-graph"),
])
# -----------------------
# Callbacks
# -----------------------
@dash.callback(
Output("forecast-table", "data"),
Output("forecast-table", "columns"),
Output("forecast-revenue-graph", "figure"),
Output("forecast-volume-graph", "figure"),
Input("forecast-product-filter", "value"),
)
def build_forecast(products):
df_filtered = df.copy()
if products:
df_filtered = df_filtered[df_filtered["product_type"].isin(products)]
# ----------------------------------------
# Basic aggregation per week (extend later)
# ----------------------------------------
weekly = df_filtered.groupby("Planned Week").agg(
jobs=("hubspot_id", "count"),
revenue=("price", "sum")
).reset_index()
weekly = weekly.sort_values("Planned Week")
# ----------------------------------------
# TABLE
# ----------------------------------------
columns = [{"name": c, "id": c} for c in weekly.columns]
data = weekly.to_dict("records")
# ----------------------------------------
# GRAPHS
# ----------------------------------------
import plotly.express as px
revenue_fig = px.line(
weekly,
x="Planned Week",
y="revenue",
title="Expected Revenue per Week"
)
volume_fig = px.line(
weekly,
x="Planned Week",
y="jobs",
title="Expected Job Count per Week"
)
return data, columns, revenue_fig, volume_fig

View file

@ -38,7 +38,7 @@ async def main():
# Fetch all deals in the pipeline
deals = await hubspot.get_deal_ids_by_pipeline(
Pipeline.OPERATIONS_SOCIAL_HOUSING.value
[Pipeline.OPERATIONS_SOCIAL_HOUSING.value]
)
total = len(deals)

View file

@ -0,0 +1,95 @@
import asyncio
import json
from tqdm import tqdm
from datetime import datetime
from dashboard.services.hubspot_client import Pipeline
from dashboard.services.hubspot_client_async import HubSpotClientAsync
from dashboard.services.file_manager import FileManager
OUTPUT_FILE = "hubspot_deals.json"
# -------------------------------------------------------
# WORKER — pulls deals from the queue and fetches info
# -------------------------------------------------------
async def worker(id, queue, hubspot, results, pbar):
while True:
deal_id = await queue.get()
if deal_id is None: # poison pill = stop worker
queue.task_done()
break
try:
data = await hubspot.from_deal_get_info(deal_id)
results.append(data)
except Exception as e:
# You can add logging here if needed
pass
pbar.update(1)
queue.task_done()
# -------------------------------------------------------
# MAIN EXECUTION
# -------------------------------------------------------
async def main():
hubspot = HubSpotClientAsync()
# Fetch all deals in the pipeline
deals = await hubspot.get_deal_ids_by_pipeline([
"2761590974",
"2774202608",
"2337194212",
"2870263028",
])
total = len(deals)
print(f"Total deals: {total}")
queue = asyncio.Queue()
results = []
# prefill queue
for deal_id in deals:
await queue.put(deal_id)
# PROPER concurrency — same as semaphore limit
NUM_WORKERS = 5
pbar = tqdm(total=total, desc="Fetching Deals", unit="deal", dynamic_ncols=True)
workers = [
asyncio.create_task(worker(i, queue, hubspot, results, pbar))
for i in range(NUM_WORKERS)
]
await queue.join()
# Stop workers
for _ in range(NUM_WORKERS):
await queue.put(None)
await asyncio.gather(*workers)
pbar.close()
# Save output
with open(OUTPUT_FILE, "w") as f:
json.dump(results, f, indent=2)
print(f"Done! Saved {len(results)} deals.")
if __name__ == "__main__":
asyncio.run(main())
fm = FileManager()
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
s3_filename = f"hubspot_deals_{timestamp}.json"
fm.upload_to_s3(
OUTPUT_FILE,
bucket="retrofit-data-dev",
object_name=f"hubspot_insight/sales_forecast/{s3_filename}"
)

View file

@ -59,14 +59,46 @@ class FileManager:
return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"):
def download_and_read_latest(
self,
bucket: str = "retrofit-data-dev",
prefix: str = "hubspot_insight/"
):
os.makedirs(self.download_dir, exist_ok=True)
latest_key = self.get_latest_s3_file(bucket, prefix)
local_path = os.path.join(self.download_dir, latest_key.split("/")[-1])
# ------------------------------------------------------
# Find latest file only at the top level in prefix
# ------------------------------------------------------
response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
raise FileNotFoundError(f"No files found in {bucket}/{prefix}")
# Filter ONLY files directly under hubspot_insight/
top_level_files = [
obj for obj in response["Contents"]
if obj["Key"].count("/") == prefix.count("/")
]
if not top_level_files:
raise FileNotFoundError(
f"No top-level files found in {bucket}/{prefix} (only subfolders exist)."
)
latest = max(top_level_files, key=lambda x: x["LastModified"])
latest_key = latest["Key"]
# ------------------------------------------------------
# Download
# ------------------------------------------------------
filename = latest_key.split("/")[-1]
local_path = os.path.join(self.download_dir, filename)
self.s3.download_file(bucket, latest_key, local_path)
# ------------------------------------------------------
# Read JSON
# ------------------------------------------------------
with open(local_path, "r") as f:
data = json.load(f)

View file

@ -51,7 +51,7 @@ class HubSpotClientAsync:
return [
deal.id
for deal in self.all_deals
if deal.properties.get("pipeline") == str(pipeline_id)
if deal.properties.get("pipeline") in pipeline_id
]
# -----------------------------------

View file

@ -60,6 +60,7 @@ class jsonReader:
def _return_df_from_deal_info(self, deal, product_type):
rows = []
print(deal)
if deal["company_info"]["name"] != "Apple":
if deal["attempts"]:
# Multiple attempts => multiple rows

View file

@ -1 +1 @@
cd backend && poetry run python src/dashboard/main.py
cd backend && poetry run python src/dashboard/app.py