diff --git a/.github/workflows/gather_hubspot_data.yml b/.github/workflows/gather_hubspot_data.yml index 9c24e63..570470d 100644 --- a/.github/workflows/gather_hubspot_data.yml +++ b/.github/workflows/gather_hubspot_data.yml @@ -6,18 +6,19 @@ on: workflow_dispatch: jobs: - gather_hubspot_data_and_upload_to_s3: + hubspot_raw_to_s3: + name: HubSpot → S3 (raw data) runs-on: [self-hosted, mist] + timeout-minutes: 720 + steps: - uses: actions/checkout@v4 - # Build Docker image using .devcontainer/Dockerfile - name: Build Docker image run: | docker build -f .devcontainer/Dockerfile -t latest-image . - # Install dependencies + run script inside container - - name: Poetry install & run script + - name: Run raw HubSpot export run: | docker run \ -u $(id -u):$(id -g) \ @@ -28,3 +29,27 @@ jobs: poetry install && poetry run python src/dashboard/scripts/hubspot_to_s3.py " + + hubspot_sales_forecast_to_s3: + name: HubSpot → S3 (sales forecast) + runs-on: [self-hosted, mist] + timeout-minutes: 720 + + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image + run: | + docker build -f .devcontainer/Dockerfile -t latest-image . + + - name: Run sales forecast export + run: | + docker run \ + -u $(id -u):$(id -g) \ + -v ${{ github.workspace }}:/workspaces/insight \ + -w /workspaces/insight/backend \ + latest-image \ + bash -c " + poetry install && + poetry run python src/dashboard/scripts/hubspot_to_s3_sales_forecast.py + " diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/src/__init__.py b/backend/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/src/dashboard/app.py b/backend/src/dashboard/app.py new file mode 100644 index 0000000..46770f8 --- /dev/null +++ b/backend/src/dashboard/app.py @@ -0,0 +1,27 @@ +# app.py +from dash import Dash, html, dcc +import dash +import dash_bootstrap_components as dbc + +app = Dash( + __name__, + use_pages=True, + external_stylesheets=[dbc.themes.BOOTSTRAP], +) + +server = app.server + +app.layout = dbc.Container([ + html.H1("Welcome to DomnaInsights", className="text-center my-4"), + + # Navigation bar + dbc.Nav([ + dbc.NavLink("Planned vs Completed", href="/", active="exact"), + dbc.NavLink("Sales Forecast", href="/sales-forecast", active="exact"), + ], pills=True, justified=True, className="mb-4"), + + dash.page_container # <-- Page content loads here +], fluid=True) + +if __name__ == "__main__": + app.run(debug=True) diff --git a/backend/src/dashboard/pages/planned_vs_complete.py b/backend/src/dashboard/pages/planned_vs_complete.py new file mode 100644 index 0000000..f08c412 --- /dev/null +++ b/backend/src/dashboard/pages/planned_vs_complete.py @@ -0,0 +1,302 @@ +# pages/planned_vs_completed.py + +import dash +from dash import html, dcc, dash_table, Input, Output, State, ctx +import dash_bootstrap_components as dbc +import pandas as pd +from datetime import datetime, timedelta +import json +import os + +# from backend.src.dashboard.services.file_manager import FileManager +# from backend.src.dashboard.services.json_reader import jsonReader +# from backend.src.dashboard.components.pivot_charts import ( +# build_pivot_tables_and_charts, +# week_start_monday, +# ) + +from dashboard.services.file_manager import FileManager +from dashboard.services.json_reader import jsonReader +from dashboard.components.pivot_charts import ( + build_pivot_tables_and_charts, + week_start_monday, +) +# ----------------------------------------------------- +# Register Page +# ----------------------------------------------------- +dash.register_page(__name__, path="/", name="Planned vs Completed") + +SAFE_DELIM = "\\\\" + + +# ----------------------------------------------------- +# Helper: Current Monday +# ----------------------------------------------------- +def current_week_start(): + today = datetime.today() + monday = today - timedelta(days=today.weekday()) + return monday.strftime("%Y-%m-%d") + + +# ----------------------------------------------------- +# Load & Build Master DF +# ----------------------------------------------------- +def build_master_df(local=False): + if local is False: + s3 = FileManager() + key, path, data = s3.download_and_read_latest() + else: + file_path = os.path.join(os.path.dirname(__file__), "data.json") + with open(file_path, "r") as f: + data = json.load(f) + + hubspot_data = jsonReader(data) + frames = [] + + for p in hubspot_data.line_item_names: + df = hubspot_data.generate_df_via_product_type(p) + if df is None or df.empty: + continue + + df["product_type"] = p + df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0) + + df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday) + df["raw_completed_week"] = df.get("submission_date", None) + df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday) + + # corrected completed week logic + def corrected(row): + planned = row["Planned Week"] + submitted = row["raw_completed_week"] + if not submitted: + return None + if not planned: + return submitted + return planned if submitted > planned else submitted + + df["Completed Week"] = df.apply(corrected, axis=1) + df.drop(columns=["raw_completed_week"], inplace=True) + + frames.append(df) + + return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame() + + +# Load data once (refresh button can rebuild) +df = build_master_df() + + +# ----------------------------------------------------- +# Page Layout +# ----------------------------------------------------- +layout = html.Div([ + + html.H1("Planned vs Completed — Pivot Tables + Charts", + style={"textAlign": "center"}), + + # ---------------- FILTERS ---------------- + dcc.Dropdown( + id="date-filter", + options=[{"label": "All Dates", "value": "All Dates"}] + + [{"label": d, "value": d} + for d in sorted(df["Planned Week"].dropna().unique())], + value=current_week_start() + if current_week_start() in df["Planned Week"].unique() + else "All Dates", + clearable=False, + style={"width": "300px", "margin": "20px auto"}, + ), + + html.Button("Refresh Data", id="refresh-btn", n_clicks=0), + + html.Hr(), + + # ---------------- JOBS TABLE ---------------- + html.H2("Jobs Pivot Table", style={"textAlign": "center"}), + + dash_table.DataTable( + id="jobs-table", + page_size=40, + sort_action="native", + cell_selectable=True, + style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"}, + style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"}, + style_cell_conditional=[ + {"if": {"column_id": "Product Type"}, + "textAlign": "left", + "fontWeight": "bold", + "minWidth": "150px"}, + ], + style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"}, + style_data_conditional=[ + { + "if": {"filter_query": "{Product Type} = 'TOTAL'"}, + "fontWeight": "bold", + "backgroundColor": "#f0f0f0", + } + ], + ), + + html.Hr(), + + # ---------------- REVENUE TABLE ---------------- + html.H2("Revenue (£) Pivot Table", style={"textAlign": "center"}), + + dash_table.DataTable( + id="revenue-table", + page_size=40, + sort_action="native", + cell_selectable=True, + style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"}, + style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"}, + style_cell_conditional=[ + {"if": {"column_id": "Product Type"}, + "textAlign": "left", + "fontWeight": "bold", + "minWidth": "150px"}, + ], + style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"}, + style_data_conditional=[ + { + "if": {"filter_query": "{Product Type} = 'TOTAL'"}, + "fontWeight": "bold", + "backgroundColor": "#f0f0f0", + } + ], + ), + + html.Hr(), + + # ---------------- CHARTS ---------------- + html.H2("Jobs Line Chart", style={"textAlign": "center"}), + dcc.Graph(id="jobs-graph", style={"height": "400px"}), + + html.Hr(), + + html.H2("Revenue Line Chart (£)", style={"textAlign": "center"}), + dcc.Graph(id="revenue-graph", style={"height": "400px"}), + + html.Hr(), + + # ---------------- MODAL ---------------- + dbc.Modal( + [ + dbc.ModalHeader("HubSpot IDs"), + dbc.ModalBody(id="modal-body"), + dbc.ModalFooter( + dbc.Button("Close", id="close-modal", className="ms-auto") + ), + ], + id="hubspot-modal", + size="lg", + is_open=False, + ), +]) + + +# ----------------------------------------------------- +# Callback: Update tables + charts +# ----------------------------------------------------- +@dash.callback( + Output("jobs-table", "data"), + Output("jobs-table", "columns"), + Output("revenue-table", "data"), + Output("revenue-table", "columns"), + Output("jobs-graph", "figure"), + Output("revenue-graph", "figure"), + Input("date-filter", "value"), + Input("refresh-btn", "n_clicks"), +) +def update_outputs(selected_week, n_clicks): + global df + + if n_clicks > 0: + df = build_master_df() + + return build_pivot_tables_and_charts(df, selected_week) + + +# ----------------------------------------------------- +# Modal: Display HubSpot IDs when clicking a cell +# ----------------------------------------------------- +def id_to_link(deal_id): + url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}" + match = df.loc[df["hubspot_id"].astype(str) == str(deal_id)] + return html.Li(html.A(match.iloc[0].get("deal_name"), href=url, target="_blank")) + + +@dash.callback( + Output("hubspot-modal", "is_open"), + Output("modal-body", "children"), + Output("jobs-table", "active_cell"), + Output("revenue-table", "active_cell"), + + Input("jobs-table", "active_cell"), + Input("revenue-table", "active_cell"), + Input("close-modal", "n_clicks"), + + State("jobs-table", "data"), + State("revenue-table", "data"), + State("hubspot-modal", "is_open"), +) +def open_modal(jobs_cell, revenue_cell, close_click, jobs_data, revenue_data, is_open): + + triggered = ctx.triggered_id + + # ------------------------- + # CLOSE THE MODAL + # ------------------------- + if triggered == "close-modal": + return False, "", None, None + + # ------------------------- + # Helper: renderer for modal content + # ------------------------- + def build_modal(row, col_id): + + if col_id == "Product Type": + return html.P("This column has no IDs.") + + parts = col_id.split(" ") + + # Jobs table style → 2025-02-05_Planned + if "_" in parts[0]: + week = parts[0].split("_")[0] + else: + # Revenue table style → 2025-02-05 Planned £ + week = parts[0] + + label = col_id.lower() + is_planned = "planned" in label + + id_key = f"{week}_planned_ids" if is_planned else f"{week}_actual_ids" + raw_ids = row.get(id_key, "") + + if not raw_ids: + return html.P("No IDs recorded for this cell.") + + ids = raw_ids.split(SAFE_DELIM) + seen = set() + return html.Ul([id_to_link(d) for d in ids if not (d in seen or seen.add(d))]) + + # ------------------------- + # JOBS TABLE CLICK + # ------------------------- + if triggered == "jobs-table" and jobs_cell: + row = jobs_data[jobs_cell["row"]] + col_id = jobs_cell["column_id"] + return True, build_modal(row, col_id), None, None + + # ------------------------- + # REVENUE TABLE CLICK + # ------------------------- + if triggered == "revenue-table" and revenue_cell: + row = revenue_data[revenue_cell["row"]] + col_id = revenue_cell["column_id"] + return True, build_modal(row, col_id), None, None + + # ------------------------- + # DEFAULT + # ------------------------- + return is_open, "", None, None diff --git a/backend/src/dashboard/pages/sales_forecast.py b/backend/src/dashboard/pages/sales_forecast.py new file mode 100644 index 0000000..20e16ff --- /dev/null +++ b/backend/src/dashboard/pages/sales_forecast.py @@ -0,0 +1,139 @@ +# pages/sales_forecast.py + +import dash +from dash import html, dcc, dash_table, Input, Output +import dash_bootstrap_components as dbc +import pandas as pd +from datetime import datetime +from dashboard.services.file_manager import FileManager +from dashboard.services.json_reader import jsonReader +from dashboard.components.pivot_charts import week_start_monday +import os +dash.register_page(__name__, path="/sales-forecast", name="Sales Forecast") +import json + +# ----------------------- +# Load base dataframe +# ----------------------- +# ----------------------------------------------------- +# Load & Build Master DF +# ----------------------------------------------------- +def build_master_df(local=False): + if local is False: + s3 = FileManager() + key, path, data = s3.download_and_read_latest( + bucket="retrofit-data-dev", + prefix="hubspot_insight/sales_forecast/", + ) + else: + file_path = os.path.join(os.path.dirname(__file__), "data.json") + with open(file_path, "r") as f: + data = json.load(f) + + hubspot_data = jsonReader(data) + for p in hubspot_data.line_item_names: + df = hubspot_data.generate_df_via_product_type(p) + + # show each deal + + return hubspot_data + + +# Load data once (refresh button can rebuild) +df = build_master_df() + +# ----------------------- +# Page Layout +# ----------------------- + +layout = html.Div([ + + html.H1("Sales Forecast", className="text-center"), + + html.P( + "This page projects expected revenue and job volume into future weeks " + "based on existing HubSpot data.", + className="text-center text-muted" + ), + + html.Hr(), + + dcc.Dropdown( + id="forecast-product-filter", + options=[{"label": p, "value": p} for p in sorted(df["product_type"].unique())], + multi=True, + placeholder="Filter by product type…", + style={"width": "400px", "margin": "0 auto"}, + ), + + html.Br(), + + dash_table.DataTable( + id="forecast-table", + page_size=20, + style_table={"overflowX": "auto"}, + style_cell={"textAlign": "center"}, + ), + + html.Hr(), + + html.H2("Forecasted Revenue (£)", className="text-center"), + dcc.Graph(id="forecast-revenue-graph"), + + html.H2("Forecasted Job Volume", className="text-center mt-4"), + dcc.Graph(id="forecast-volume-graph"), +]) + + +# ----------------------- +# Callbacks +# ----------------------- +@dash.callback( + Output("forecast-table", "data"), + Output("forecast-table", "columns"), + Output("forecast-revenue-graph", "figure"), + Output("forecast-volume-graph", "figure"), + Input("forecast-product-filter", "value"), +) +def build_forecast(products): + + df_filtered = df.copy() + if products: + df_filtered = df_filtered[df_filtered["product_type"].isin(products)] + + # ---------------------------------------- + # Basic aggregation per week (extend later) + # ---------------------------------------- + weekly = df_filtered.groupby("Planned Week").agg( + jobs=("hubspot_id", "count"), + revenue=("price", "sum") + ).reset_index() + + weekly = weekly.sort_values("Planned Week") + + # ---------------------------------------- + # TABLE + # ---------------------------------------- + columns = [{"name": c, "id": c} for c in weekly.columns] + data = weekly.to_dict("records") + + # ---------------------------------------- + # GRAPHS + # ---------------------------------------- + import plotly.express as px + + revenue_fig = px.line( + weekly, + x="Planned Week", + y="revenue", + title="Expected Revenue per Week" + ) + + volume_fig = px.line( + weekly, + x="Planned Week", + y="jobs", + title="Expected Job Count per Week" + ) + + return data, columns, revenue_fig, volume_fig diff --git a/backend/src/dashboard/scripts/hubspot_to_s3.py b/backend/src/dashboard/scripts/hubspot_to_s3.py index d2a2e86..5f811fc 100644 --- a/backend/src/dashboard/scripts/hubspot_to_s3.py +++ b/backend/src/dashboard/scripts/hubspot_to_s3.py @@ -38,7 +38,7 @@ async def main(): # Fetch all deals in the pipeline deals = await hubspot.get_deal_ids_by_pipeline( - Pipeline.OPERATIONS_SOCIAL_HOUSING.value + [Pipeline.OPERATIONS_SOCIAL_HOUSING.value] ) total = len(deals) diff --git a/backend/src/dashboard/scripts/hubspot_to_s3_sales_forecast.py b/backend/src/dashboard/scripts/hubspot_to_s3_sales_forecast.py new file mode 100644 index 0000000..162b439 --- /dev/null +++ b/backend/src/dashboard/scripts/hubspot_to_s3_sales_forecast.py @@ -0,0 +1,95 @@ +import asyncio +import json +from tqdm import tqdm +from datetime import datetime + +from dashboard.services.hubspot_client import Pipeline +from dashboard.services.hubspot_client_async import HubSpotClientAsync +from dashboard.services.file_manager import FileManager + +OUTPUT_FILE = "hubspot_deals.json" + +# ------------------------------------------------------- +# WORKER — pulls deals from the queue and fetches info +# ------------------------------------------------------- +async def worker(id, queue, hubspot, results, pbar): + while True: + deal_id = await queue.get() + if deal_id is None: # poison pill = stop worker + queue.task_done() + break + + try: + data = await hubspot.from_deal_get_info(deal_id) + results.append(data) + except Exception as e: + # You can add logging here if needed + pass + + pbar.update(1) + queue.task_done() + + +# ------------------------------------------------------- +# MAIN EXECUTION +# ------------------------------------------------------- +async def main(): + hubspot = HubSpotClientAsync() + + # Fetch all deals in the pipeline + deals = await hubspot.get_deal_ids_by_pipeline([ + "2761590974", + "2774202608", + "2337194212", + "2870263028", + ]) + + total = len(deals) + print(f"Total deals: {total}") + + queue = asyncio.Queue() + results = [] + + # prefill queue + for deal_id in deals: + await queue.put(deal_id) + + # PROPER concurrency — same as semaphore limit + NUM_WORKERS = 5 + + pbar = tqdm(total=total, desc="Fetching Deals", unit="deal", dynamic_ncols=True) + + workers = [ + asyncio.create_task(worker(i, queue, hubspot, results, pbar)) + for i in range(NUM_WORKERS) + ] + + await queue.join() + + # Stop workers + for _ in range(NUM_WORKERS): + await queue.put(None) + + await asyncio.gather(*workers) + + pbar.close() + + # Save output + with open(OUTPUT_FILE, "w") as f: + json.dump(results, f, indent=2) + + print(f"Done! Saved {len(results)} deals.") + + +if __name__ == "__main__": + asyncio.run(main()) + + fm = FileManager() + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + s3_filename = f"hubspot_deals_{timestamp}.json" + + fm.upload_to_s3( + OUTPUT_FILE, + bucket="retrofit-data-dev", + object_name=f"hubspot_insight/sales_forecast/{s3_filename}" + ) diff --git a/backend/src/dashboard/services/file_manager.py b/backend/src/dashboard/services/file_manager.py index 34884ec..8fb878a 100644 --- a/backend/src/dashboard/services/file_manager.py +++ b/backend/src/dashboard/services/file_manager.py @@ -59,14 +59,46 @@ class FileManager: return sorted(files, key=lambda x: x[0], reverse=True)[0][1] - def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"): + def download_and_read_latest( + self, + bucket: str = "retrofit-data-dev", + prefix: str = "hubspot_insight/" + ): os.makedirs(self.download_dir, exist_ok=True) - latest_key = self.get_latest_s3_file(bucket, prefix) - local_path = os.path.join(self.download_dir, latest_key.split("/")[-1]) + # ------------------------------------------------------ + # Find latest file only at the top level in prefix + # ------------------------------------------------------ + response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix) + + if "Contents" not in response: + raise FileNotFoundError(f"No files found in {bucket}/{prefix}") + + # Filter ONLY files directly under hubspot_insight/ + top_level_files = [ + obj for obj in response["Contents"] + if obj["Key"].count("/") == prefix.count("/") + ] + + if not top_level_files: + raise FileNotFoundError( + f"No top-level files found in {bucket}/{prefix} (only subfolders exist)." + ) + + latest = max(top_level_files, key=lambda x: x["LastModified"]) + latest_key = latest["Key"] + + # ------------------------------------------------------ + # Download + # ------------------------------------------------------ + filename = latest_key.split("/")[-1] + local_path = os.path.join(self.download_dir, filename) self.s3.download_file(bucket, latest_key, local_path) + # ------------------------------------------------------ + # Read JSON + # ------------------------------------------------------ with open(local_path, "r") as f: data = json.load(f) diff --git a/backend/src/dashboard/services/hubspot_client_async.py b/backend/src/dashboard/services/hubspot_client_async.py index e5c972a..aae90ff 100644 --- a/backend/src/dashboard/services/hubspot_client_async.py +++ b/backend/src/dashboard/services/hubspot_client_async.py @@ -51,7 +51,7 @@ class HubSpotClientAsync: return [ deal.id for deal in self.all_deals - if deal.properties.get("pipeline") == str(pipeline_id) + if deal.properties.get("pipeline") in pipeline_id ] # ----------------------------------- diff --git a/backend/src/dashboard/services/json_reader.py b/backend/src/dashboard/services/json_reader.py index 94f83d3..4c53ef6 100644 --- a/backend/src/dashboard/services/json_reader.py +++ b/backend/src/dashboard/services/json_reader.py @@ -60,6 +60,7 @@ class jsonReader: def _return_df_from_deal_info(self, deal, product_type): rows = [] + print(deal) if deal["company_info"]["name"] != "Apple": if deal["attempts"]: # Multiple attempts => multiple rows diff --git a/run_backend.sh b/run_backend.sh index 32e6b12..d2724b9 100755 --- a/run_backend.sh +++ b/run_backend.sh @@ -1 +1 @@ -cd backend && poetry run python src/dashboard/main.py +cd backend && poetry run python src/dashboard/app.py