Merge pull request #22 from Hestia-Homes/feature/make_it_live_ready

Feature/make it live ready
2026-06-08 11:17:25 +00:00 · 2025-12-12 11:32:40 +00:00 · 2025-12-12 11:32:40 +00:00 · e68ca49adb
commit e68ca49adb
parent 85248b375d 9f70d22ef4
12 changed files with 631 additions and 10 deletions
--- a/.github/workflows/gather_hubspot_data.yml
+++ b/.github/workflows/gather_hubspot_data.yml
@ -6,18 +6,19 @@ on:
  workflow_dispatch:

 jobs:
-  gather_hubspot_data_and_upload_to_s3:
+  hubspot_raw_to_s3:
+    name: HubSpot → S3 (raw data)
    runs-on: [self-hosted, mist]
+    timeout-minutes: 720
+
    steps:
      - uses: actions/checkout@v4

-      # Build Docker image using .devcontainer/Dockerfile
      - name: Build Docker image
        run: |
          docker build -f .devcontainer/Dockerfile -t latest-image .

-      # Install dependencies + run script inside container
-      - name: Poetry install & run script
+      - name: Run raw HubSpot export
        run: |
          docker run \
            -u $(id -u):$(id -g) \
@ -28,3 +29,27 @@ jobs:
              poetry install &&
              poetry run python src/dashboard/scripts/hubspot_to_s3.py
            "
+
+  hubspot_sales_forecast_to_s3:
+    name: HubSpot → S3 (sales forecast)
+    runs-on: [self-hosted, mist]
+    timeout-minutes: 720
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build Docker image
+        run: |
+          docker build -f .devcontainer/Dockerfile -t latest-image .
+
+      - name: Run sales forecast export
+        run: |
+          docker run \
+            -u $(id -u):$(id -g) \
+            -v ${{ github.workspace }}:/workspaces/insight \
+            -w /workspaces/insight/backend \
+            latest-image \
+            bash -c "
+              poetry install &&
+              poetry run python src/dashboard/scripts/hubspot_to_s3_sales_forecast.py
+            "
--- a/backend/init.py
+++ b/backend/init.py
--- a/backend/src/init.py
+++ b/backend/src/init.py
--- a/backend/src/dashboard/app.py
+++ b/backend/src/dashboard/app.py
@ -0,0 +1,27 @@
+# app.py
+from dash import Dash, html, dcc
+import dash
+import dash_bootstrap_components as dbc
+
+app = Dash(
+    __name__,
+    use_pages=True,
+    external_stylesheets=[dbc.themes.BOOTSTRAP],
+)
+
+server = app.server
+
+app.layout = dbc.Container([
+    html.H1("Welcome to DomnaInsights", className="text-center my-4"),
+
+    # Navigation bar
+    dbc.Nav([
+        dbc.NavLink("Planned vs Completed", href="/", active="exact"),
+        dbc.NavLink("Sales Forecast", href="/sales-forecast", active="exact"),
+    ], pills=True, justified=True, className="mb-4"),
+
+    dash.page_container    # <-- Page content loads here
+], fluid=True)
+
+if __name__ == "__main__":
+    app.run(debug=True)
--- a/backend/src/dashboard/pages/planned_vs_complete.py
+++ b/backend/src/dashboard/pages/planned_vs_complete.py
@ -0,0 +1,302 @@
+# pages/planned_vs_completed.py
+
+import dash
+from dash import html, dcc, dash_table, Input, Output, State, ctx
+import dash_bootstrap_components as dbc
+import pandas as pd
+from datetime import datetime, timedelta
+import json
+import os
+
+# from backend.src.dashboard.services.file_manager import FileManager
+# from backend.src.dashboard.services.json_reader import jsonReader
+# from backend.src.dashboard.components.pivot_charts import (
+#     build_pivot_tables_and_charts,
+#     week_start_monday,
+# )
+
+from dashboard.services.file_manager import FileManager
+from dashboard.services.json_reader import jsonReader
+from dashboard.components.pivot_charts import (
+    build_pivot_tables_and_charts,
+    week_start_monday,
+)
+# -----------------------------------------------------
+# Register Page
+# -----------------------------------------------------
+dash.register_page(__name__, path="/", name="Planned vs Completed")
+
+SAFE_DELIM = "\\\\"
+
+
+# -----------------------------------------------------
+# Helper: Current Monday
+# -----------------------------------------------------
+def current_week_start():
+    today = datetime.today()
+    monday = today - timedelta(days=today.weekday())
+    return monday.strftime("%Y-%m-%d")
+
+
+# -----------------------------------------------------
+# Load & Build Master DF
+# -----------------------------------------------------
+def build_master_df(local=False):
+    if local is False:
+        s3 = FileManager()
+        key, path, data = s3.download_and_read_latest()
+    else:
+        file_path = os.path.join(os.path.dirname(__file__), "data.json")
+        with open(file_path, "r") as f:
+            data = json.load(f)
+
+    hubspot_data = jsonReader(data)
+    frames = []
+
+    for p in hubspot_data.line_item_names:
+        df = hubspot_data.generate_df_via_product_type(p)
+        if df is None or df.empty:
+            continue
+
+        df["product_type"] = p
+        df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
+
+        df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday)
+        df["raw_completed_week"] = df.get("submission_date", None)
+        df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday)
+
+        # corrected completed week logic
+        def corrected(row):
+            planned = row["Planned Week"]
+            submitted = row["raw_completed_week"]
+            if not submitted:
+                return None
+            if not planned:
+                return submitted
+            return planned if submitted > planned else submitted
+
+        df["Completed Week"] = df.apply(corrected, axis=1)
+        df.drop(columns=["raw_completed_week"], inplace=True)
+
+        frames.append(df)
+
+    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
+
+
+# Load data once (refresh button can rebuild)
+df = build_master_df()
+
+
+# -----------------------------------------------------
+# Page Layout
+# -----------------------------------------------------
+layout = html.Div([
+
+    html.H1("Planned vs Completed — Pivot Tables + Charts",
+            style={"textAlign": "center"}),
+
+    # ---------------- FILTERS ----------------
+    dcc.Dropdown(
+        id="date-filter",
+        options=[{"label": "All Dates", "value": "All Dates"}] +
+                [{"label": d, "value": d}
+                 for d in sorted(df["Planned Week"].dropna().unique())],
+        value=current_week_start()
+        if current_week_start() in df["Planned Week"].unique()
+        else "All Dates",
+        clearable=False,
+        style={"width": "300px", "margin": "20px auto"},
+    ),
+
+    html.Button("Refresh Data", id="refresh-btn", n_clicks=0),
+
+    html.Hr(),
+
+    # ---------------- JOBS TABLE ----------------
+    html.H2("Jobs Pivot Table", style={"textAlign": "center"}),
+
+    dash_table.DataTable(
+        id="jobs-table",
+        page_size=40,
+        sort_action="native",
+        cell_selectable=True,
+        style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
+        style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
+        style_cell_conditional=[
+            {"if": {"column_id": "Product Type"},
+             "textAlign": "left",
+             "fontWeight": "bold",
+             "minWidth": "150px"},
+        ],
+        style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
+        style_data_conditional=[
+            {
+                "if": {"filter_query": "{Product Type} = 'TOTAL'"},
+                "fontWeight": "bold",
+                "backgroundColor": "#f0f0f0",
+            }
+        ],
+    ),
+
+    html.Hr(),
+
+    # ---------------- REVENUE TABLE ----------------
+    html.H2("Revenue (£) Pivot Table", style={"textAlign": "center"}),
+
+    dash_table.DataTable(
+        id="revenue-table",
+        page_size=40,
+        sort_action="native",
+        cell_selectable=True,
+        style_table={"overflowX": "scroll", "maxWidth": "98%", "margin": "0 auto"},
+        style_cell={"textAlign": "center", "minWidth": "80px", "padding": "6px"},
+        style_cell_conditional=[
+            {"if": {"column_id": "Product Type"},
+             "textAlign": "left",
+             "fontWeight": "bold",
+             "minWidth": "150px"},
+        ],
+        style_header={"fontWeight": "bold", "backgroundColor": "#f5f5f5"},
+        style_data_conditional=[
+            {
+                "if": {"filter_query": "{Product Type} = 'TOTAL'"},
+                "fontWeight": "bold",
+                "backgroundColor": "#f0f0f0",
+            }
+        ],
+    ),
+
+    html.Hr(),
+
+    # ---------------- CHARTS ----------------
+    html.H2("Jobs Line Chart", style={"textAlign": "center"}),
+    dcc.Graph(id="jobs-graph", style={"height": "400px"}),
+
+    html.Hr(),
+
+    html.H2("Revenue Line Chart (£)", style={"textAlign": "center"}),
+    dcc.Graph(id="revenue-graph", style={"height": "400px"}),
+
+    html.Hr(),
+
+    # ---------------- MODAL ----------------
+    dbc.Modal(
+        [
+            dbc.ModalHeader("HubSpot IDs"),
+            dbc.ModalBody(id="modal-body"),
+            dbc.ModalFooter(
+                dbc.Button("Close", id="close-modal", className="ms-auto")
+            ),
+        ],
+        id="hubspot-modal",
+        size="lg",
+        is_open=False,
+    ),
+])
+
+
+# -----------------------------------------------------
+# Callback: Update tables + charts
+# -----------------------------------------------------
+@dash.callback(
+    Output("jobs-table", "data"),
+    Output("jobs-table", "columns"),
+    Output("revenue-table", "data"),
+    Output("revenue-table", "columns"),
+    Output("jobs-graph", "figure"),
+    Output("revenue-graph", "figure"),
+    Input("date-filter", "value"),
+    Input("refresh-btn", "n_clicks"),
+)
+def update_outputs(selected_week, n_clicks):
+    global df
+
+    if n_clicks > 0:
+        df = build_master_df()
+
+    return build_pivot_tables_and_charts(df, selected_week)
+
+
+# -----------------------------------------------------
+# Modal: Display HubSpot IDs when clicking a cell
+# -----------------------------------------------------
+def id_to_link(deal_id):
+    url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}"
+    match = df.loc[df["hubspot_id"].astype(str) == str(deal_id)]
+    return html.Li(html.A(match.iloc[0].get("deal_name"), href=url, target="_blank"))
+
+
+@dash.callback(
+    Output("hubspot-modal", "is_open"),
+    Output("modal-body", "children"),
+    Output("jobs-table", "active_cell"),
+    Output("revenue-table", "active_cell"),
+
+    Input("jobs-table", "active_cell"),
+    Input("revenue-table", "active_cell"),
+    Input("close-modal", "n_clicks"),
+
+    State("jobs-table", "data"),
+    State("revenue-table", "data"),
+    State("hubspot-modal", "is_open"),
+)
+def open_modal(jobs_cell, revenue_cell, close_click, jobs_data, revenue_data, is_open):
+
+    triggered = ctx.triggered_id
+
+    # -------------------------
+    # CLOSE THE MODAL
+    # -------------------------
+    if triggered == "close-modal":
+        return False, "", None, None
+
+    # -------------------------
+    # Helper: renderer for modal content
+    # -------------------------
+    def build_modal(row, col_id):
+
+        if col_id == "Product Type":
+            return html.P("This column has no IDs.")
+
+        parts = col_id.split(" ")
+
+        # Jobs table style → 2025-02-05_Planned
+        if "_" in parts[0]:
+            week = parts[0].split("_")[0]
+        else:
+            # Revenue table style → 2025-02-05 Planned £
+            week = parts[0]
+
+        label = col_id.lower()
+        is_planned = "planned" in label
+
+        id_key = f"{week}_planned_ids" if is_planned else f"{week}_actual_ids"
+        raw_ids = row.get(id_key, "")
+
+        if not raw_ids:
+            return html.P("No IDs recorded for this cell.")
+
+        ids = raw_ids.split(SAFE_DELIM)
+        seen = set()
+        return html.Ul([id_to_link(d) for d in ids if not (d in seen or seen.add(d))])
+
+    # -------------------------
+    # JOBS TABLE CLICK
+    # -------------------------
+    if triggered == "jobs-table" and jobs_cell:
+        row = jobs_data[jobs_cell["row"]]
+        col_id = jobs_cell["column_id"]
+        return True, build_modal(row, col_id), None, None
+
+    # -------------------------
+    # REVENUE TABLE CLICK
+    # -------------------------
+    if triggered == "revenue-table" and revenue_cell:
+        row = revenue_data[revenue_cell["row"]]
+        col_id = revenue_cell["column_id"]
+        return True, build_modal(row, col_id), None, None
+
+    # -------------------------
+    # DEFAULT
+    # -------------------------
+    return is_open, "", None, None
--- a/backend/src/dashboard/pages/sales_forecast.py
+++ b/backend/src/dashboard/pages/sales_forecast.py
@ -0,0 +1,139 @@
+# pages/sales_forecast.py
+
+import dash
+from dash import html, dcc, dash_table, Input, Output
+import dash_bootstrap_components as dbc
+import pandas as pd
+from datetime import datetime
+from dashboard.services.file_manager import FileManager
+from dashboard.services.json_reader import jsonReader
+from dashboard.components.pivot_charts import week_start_monday
+import os
+dash.register_page(__name__, path="/sales-forecast", name="Sales Forecast")
+import json
+
+# -----------------------
+# Load base dataframe
+# -----------------------
+# -----------------------------------------------------
+# Load & Build Master DF
+# -----------------------------------------------------
+def build_master_df(local=False):
+    if local is False:
+        s3 = FileManager()
+        key, path, data = s3.download_and_read_latest(
+            bucket="retrofit-data-dev",
+            prefix="hubspot_insight/sales_forecast/",
+        )
+    else:
+        file_path = os.path.join(os.path.dirname(__file__), "data.json")
+        with open(file_path, "r") as f:
+            data = json.load(f)
+
+    hubspot_data = jsonReader(data)
+    for p in hubspot_data.line_item_names:
+        df = hubspot_data.generate_df_via_product_type(p)
+
+        # show each deal
+
+    return hubspot_data
+
+
+# Load data once (refresh button can rebuild)
+df = build_master_df()
+
+# -----------------------
+# Page Layout
+# -----------------------
+
+layout = html.Div([
+
+    html.H1("Sales Forecast", className="text-center"),
+
+    html.P(
+        "This page projects expected revenue and job volume into future weeks "
+        "based on existing HubSpot data.",
+        className="text-center text-muted"
+    ),
+
+    html.Hr(),
+
+    dcc.Dropdown(
+        id="forecast-product-filter",
+        options=[{"label": p, "value": p} for p in sorted(df["product_type"].unique())],
+        multi=True,
+        placeholder="Filter by product type…",
+        style={"width": "400px", "margin": "0 auto"},
+    ),
+
+    html.Br(),
+
+    dash_table.DataTable(
+        id="forecast-table",
+        page_size=20,
+        style_table={"overflowX": "auto"},
+        style_cell={"textAlign": "center"},
+    ),
+
+    html.Hr(),
+
+    html.H2("Forecasted Revenue (£)", className="text-center"),
+    dcc.Graph(id="forecast-revenue-graph"),
+
+    html.H2("Forecasted Job Volume", className="text-center mt-4"),
+    dcc.Graph(id="forecast-volume-graph"),
+])
+
+
+# -----------------------
+# Callbacks
+# -----------------------
+@dash.callback(
+    Output("forecast-table", "data"),
+    Output("forecast-table", "columns"),
+    Output("forecast-revenue-graph", "figure"),
+    Output("forecast-volume-graph", "figure"),
+    Input("forecast-product-filter", "value"),
+)
+def build_forecast(products):
+
+    df_filtered = df.copy()
+    if products:
+        df_filtered = df_filtered[df_filtered["product_type"].isin(products)]
+
+    # ----------------------------------------
+    # Basic aggregation per week (extend later)
+    # ----------------------------------------
+    weekly = df_filtered.groupby("Planned Week").agg(
+        jobs=("hubspot_id", "count"),
+        revenue=("price", "sum")
+    ).reset_index()
+
+    weekly = weekly.sort_values("Planned Week")
+
+    # ----------------------------------------
+    # TABLE
+    # ----------------------------------------
+    columns = [{"name": c, "id": c} for c in weekly.columns]
+    data = weekly.to_dict("records")
+
+    # ----------------------------------------
+    # GRAPHS
+    # ----------------------------------------
+    import plotly.express as px
+
+    revenue_fig = px.line(
+        weekly,
+        x="Planned Week",
+        y="revenue",
+        title="Expected Revenue per Week"
+    )
+
+    volume_fig = px.line(
+        weekly,
+        x="Planned Week",
+        y="jobs",
+        title="Expected Job Count per Week"
+    )
+
+    return data, columns, revenue_fig, volume_fig
--- a/backend/src/dashboard/scripts/hubspot_to_s3.py
+++ b/backend/src/dashboard/scripts/hubspot_to_s3.py
@ -38,7 +38,7 @@ async def main():

    # Fetch all deals in the pipeline
    deals = await hubspot.get_deal_ids_by_pipeline(
-        Pipeline.OPERATIONS_SOCIAL_HOUSING.value
+        [Pipeline.OPERATIONS_SOCIAL_HOUSING.value]
    )

    total = len(deals)
--- a/backend/src/dashboard/scripts/hubspot_to_s3_sales_forecast.py
+++ b/backend/src/dashboard/scripts/hubspot_to_s3_sales_forecast.py
@ -0,0 +1,95 @@
+import asyncio
+import json
+from tqdm import tqdm
+from datetime import datetime
+
+from dashboard.services.hubspot_client import Pipeline
+from dashboard.services.hubspot_client_async import HubSpotClientAsync
+from dashboard.services.file_manager import FileManager
+
+OUTPUT_FILE = "hubspot_deals.json"
+
+# -------------------------------------------------------
+# WORKER — pulls deals from the queue and fetches info
+# -------------------------------------------------------
+async def worker(id, queue, hubspot, results, pbar):
+    while True:
+        deal_id = await queue.get()
+        if deal_id is None:   # poison pill = stop worker
+            queue.task_done()
+            break
+
+        try:
+            data = await hubspot.from_deal_get_info(deal_id)
+            results.append(data)
+        except Exception as e:
+            # You can add logging here if needed
+            pass
+
+        pbar.update(1)
+        queue.task_done()
+
+
+# -------------------------------------------------------
+# MAIN EXECUTION
+# -------------------------------------------------------
+async def main():
+    hubspot = HubSpotClientAsync()
+
+    # Fetch all deals in the pipeline
+    deals = await hubspot.get_deal_ids_by_pipeline([
+        "2761590974",
+        "2774202608",
+        "2337194212",
+        "2870263028",
+    ])
+
+    total = len(deals)
+    print(f"Total deals: {total}")
+
+    queue = asyncio.Queue()
+    results = []
+
+    # prefill queue
+    for deal_id in deals:
+        await queue.put(deal_id)
+
+    # PROPER concurrency — same as semaphore limit
+    NUM_WORKERS = 5
+
+    pbar = tqdm(total=total, desc="Fetching Deals", unit="deal", dynamic_ncols=True)
+
+    workers = [
+        asyncio.create_task(worker(i, queue, hubspot, results, pbar))
+        for i in range(NUM_WORKERS)
+    ]
+
+    await queue.join()
+
+    # Stop workers
+    for _ in range(NUM_WORKERS):
+        await queue.put(None)
+
+    await asyncio.gather(*workers)
+
+    pbar.close()
+
+    # Save output
+    with open(OUTPUT_FILE, "w") as f:
+        json.dump(results, f, indent=2)
+
+    print(f"Done! Saved {len(results)} deals.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
+    fm = FileManager()
+    timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+    s3_filename = f"hubspot_deals_{timestamp}.json"
+
+    fm.upload_to_s3(
+        OUTPUT_FILE,
+        bucket="retrofit-data-dev",
+        object_name=f"hubspot_insight/sales_forecast/{s3_filename}"
+    )
--- a/backend/src/dashboard/services/file_manager.py
+++ b/backend/src/dashboard/services/file_manager.py
@ -59,14 +59,46 @@ class FileManager:

        return sorted(files, key=lambda x: x[0], reverse=True)[0][1]

-    def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"):
+    def download_and_read_latest(
+        self,
+        bucket: str = "retrofit-data-dev",
+        prefix: str = "hubspot_insight/"
+    ):
        os.makedirs(self.download_dir, exist_ok=True)

-        latest_key = self.get_latest_s3_file(bucket, prefix)
-        local_path = os.path.join(self.download_dir, latest_key.split("/")[-1])
+        # ------------------------------------------------------
+        # Find latest file only at the top level in prefix
+        # ------------------------------------------------------
+        response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
+
+        if "Contents" not in response:
+            raise FileNotFoundError(f"No files found in {bucket}/{prefix}")
+
+        # Filter ONLY files directly under hubspot_insight/
+        top_level_files = [
+            obj for obj in response["Contents"]
+            if obj["Key"].count("/") == prefix.count("/")
+        ]
+
+        if not top_level_files:
+            raise FileNotFoundError(
+                f"No top-level files found in {bucket}/{prefix} (only subfolders exist)."
+            )
+
+        latest = max(top_level_files, key=lambda x: x["LastModified"])
+        latest_key = latest["Key"]
+
+        # ------------------------------------------------------
+        # Download
+        # ------------------------------------------------------
+        filename = latest_key.split("/")[-1]
+        local_path = os.path.join(self.download_dir, filename)

        self.s3.download_file(bucket, latest_key, local_path)

+        # ------------------------------------------------------
+        # Read JSON
+        # ------------------------------------------------------
        with open(local_path, "r") as f:
            data = json.load(f)

--- a/backend/src/dashboard/services/hubspot_client_async.py
+++ b/backend/src/dashboard/services/hubspot_client_async.py
@ -51,7 +51,7 @@ class HubSpotClientAsync:
        return [
            deal.id
            for deal in self.all_deals
-            if deal.properties.get("pipeline") == str(pipeline_id)
+            if deal.properties.get("pipeline") in pipeline_id
        ]

    # -----------------------------------
--- a/backend/src/dashboard/services/json_reader.py
+++ b/backend/src/dashboard/services/json_reader.py
@ -60,6 +60,7 @@ class jsonReader:
    
    def _return_df_from_deal_info(self, deal, product_type):
        rows = []
+        print(deal)
        if deal["company_info"]["name"] != "Apple":
                if deal["attempts"]:
                    # Multiple attempts => multiple rows
--- a/run_backend.sh
+++ b/run_backend.sh
@ -1 +1 @@
-cd backend && poetry run python src/dashboard/main.py
+cd backend && poetry run python src/dashboard/app.py