From 6bb2128c6a74610c417399e48d81c228da60ac39 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte.kim@mealcraft.com>
Date: Tue, 25 Nov 2025 17:01:12 +0000
Subject: [PATCH] save local

---
 .devcontainer/devcontainer.json               |   1 -
 .devcontainer/docker-compose.yml              |   2 +
 .vscode/settings.json                         |   2 +
 backend/poetry.lock                           |  20 +-
 backend/pyproject.toml                        |   3 +-
 backend/src/dashboard/main.py                 | 333 ++++++++++++------
 backend/src/dashboard/scripts/quick_one.py    |   4 +-
 backend/src/dashboard/services/json_reader.py |  59 +++-
 8 files changed, 295 insertions(+), 129 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 23f181d..2f2a89e 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -9,7 +9,6 @@
     // Optional convenience mount
     "source=${localEnv:HOME},target=/workspaces/home,type=bind"
   ],
-
   "customizations": {
     "vscode": {
       "settings": {
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
index 6ac624f..fc12511 100644
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -1,7 +1,9 @@
 version: '3.8'
 
+
 services:
   insight:
+    init: true
     user: "${UID}:${GID}"
     build:
       context: ..
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 27782c1..56ad03a 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -10,7 +10,9 @@
         }
     },
 
+
     // Hot reload setting that needs to be in user settings
+    // Ctrl + Shit + P , Prefernce: Open User Setting (JSON)
     // "jupyter.runStartupCommands": [
     //     "%load_ext autoreload", "%autoreload 2"
     // ]
diff --git a/backend/poetry.lock b/backend/poetry.lock
index 612ee27..60e923d 100644
--- a/backend/poetry.lock
+++ b/backend/poetry.lock
@@ -403,6 +403,24 @@ dev = ["PyYAML (>=5.4.1)", "coloredlogs (>=15.0.1)", "fire (>=0.4.0)"]
 diskcache = ["diskcache (>=5.2.1)", "multiprocess (>=0.70.12)", "psutil (>=5.8.0)"]
 testing = ["beautifulsoup4 (>=4.8.2)", "cryptography", "dash_testing_stub (>=0.0.2)", "lxml (>=4.6.2)", "multiprocess (>=0.70.12)", "percy (>=2.0.2)", "psutil (>=5.8.0)", "pytest (>=6.0.2)", "requests[security] (>=2.21.0)", "selenium (>=3.141.0,<=4.2.0)", "waitress (>=1.4.4)"]
 
+[[package]]
+name = "dash-bootstrap-components"
+version = "2.0.4"
+description = "Bootstrap themed components for use in Plotly Dash"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "dash_bootstrap_components-2.0.4-py3-none-any.whl", hash = "sha256:767cf0084586c1b2b614ccf50f79fe4525fdbbf8e3a161ed60016e584a14f5d1"},
+    {file = "dash_bootstrap_components-2.0.4.tar.gz", hash = "sha256:c3206c0923774bbc6a6ddaa7822b8d9aa5326b0d3c1e7cd795cc975025fe2484"},
+]
+
+[package.dependencies]
+dash = ">=3.0.4"
+
+[package.extras]
+pandas = ["numpy (>=2.0.2)", "pandas (>=2.2.3)"]
+
 [[package]]
 name = "debugpy"
 version = "1.8.17"
@@ -1849,4 +1867,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12"
-content-hash = "e5f24396e682e9ab0a06586c52e2bcc4b9c79c76352b8f752c5f1997808c6b7d"
+content-hash = "8fb73ebfc424ad06013ee6c53bef966265d0658fc6ea9a6e6489e285f90844e8"
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 4779723..3943693 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -16,7 +16,8 @@ dependencies = [
     "tqdm (>=4.67.1,<5.0.0)",
     "boto3 (>=1.40.74,<2.0.0)",
     "dash (>=3.3.0,<4.0.0)",
-    "gunicorn (>=23.0.0,<24.0.0)"
+    "gunicorn (>=23.0.0,<24.0.0)",
+    "dash-bootstrap-components (>=2.0.4,<3.0.0)"
 ]
 
 [tool.poetry]
diff --git a/backend/src/dashboard/main.py b/backend/src/dashboard/main.py
index cf89b57..3c8f598 100644
--- a/backend/src/dashboard/main.py
+++ b/backend/src/dashboard/main.py
@@ -1,106 +1,125 @@
-from dash import Dash, html, dcc, dash_table, Input, Output
+from dash import Dash, html, dcc, dash_table, Input, Output, State
+import dash_bootstrap_components as dbc
 import pandas as pd
-import json
-import boto3
-import re
-import os
+from datetime import timedelta
 
-BUCKET = "retrofit-data-dev"
-PREFIX = "hubspot_insight/"
+from dashboard.services.file_manager import FileManager
+from dashboard.services.json_reader import jsonReader
 
-# -------------------------
-# S3 Helpers
-# -------------------------
-def get_latest_s3_file(bucket: str, prefix: str = "") -> str:
-    s3 = boto3.client("s3")
-    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
+from datetime import datetime
 
-    if "Contents" not in response:
-        raise FileNotFoundError("No files found.")
+def current_week_start():
+    today = datetime.today()
+    monday = today - timedelta(days=today.weekday())
+    return monday.strftime("%Y-%m-%d")
 
-    timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$")
-    files = []
-
-    for obj in response["Contents"]:
-        key = obj["Key"]
-        match = timestamp_regex.match(key)
-        if match:
-            files.append((match.group(1), key))
-
-    if not files:
-        raise FileNotFoundError("No timestamped files found.")
-
-    return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
+# -----------------------------------------------------
+# Product types
+# -----------------------------------------------------
+PRODUCT_TYPES = [
+    "Empty Cavity - ECO4",
+    "Solar PV - ECO4",
+    "Extract & Fill - ECO4",
+    "Solar PV + Heating Upgrade - ECO4",
+    "Solar PV + HHRSH - ECO4",
+    "ECO4 empty cavity survey",
+    "ECO4 Retrofit Coordination",
+    "ECO4 Solar with client contribution",
+    "EPC",
+]
 
 
-def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downloads"):
-    s3 = boto3.client("s3")
-    os.makedirs(download_dir, exist_ok=True)
-
-    latest_key = get_latest_s3_file(bucket, prefix)
-    local_path = os.path.join(download_dir, latest_key.split("/")[-1])
-
-    s3.download_file(bucket, latest_key, local_path)
-
-    with open(local_path, "r") as f:
-        data = json.load(f)
-
-    return latest_key, local_path, data
+# -----------------------------------------------------
+# Helpers
+# -----------------------------------------------------
+def week_start_monday(date_str):
+    if not date_str or pd.isna(date_str):
+        return None
+    date = pd.to_datetime(date_str)
+    monday = date - timedelta(days=date.weekday())  # Monday = week start
+    return monday.strftime("%Y-%m-%d")
 
 
-# -------------------------
-# Build DataFrame from S3
-# -------------------------
-def load_dataframe():
-    latest_key, file_path, data = download_and_read_latest(BUCKET, PREFIX)
+# -----------------------------------------------------
+# Build master DF (single S3 read)
+# -----------------------------------------------------
+def build_master_df(product_types):
+    s3 = FileManager()
+    key, path, data = s3.download_and_read_latest()
+    hubspot_data = jsonReader(data)
 
-    records = []
-    for entry in data:
-        p = entry["deal_properties"]
-        line_items = entry.get("line_items", [])
+    frames = []
 
-        if not line_items:
-            records.append({
-                "Expected Commencement Date": p.get("expected_commencement_date"),
-                "Deal ID": p.get("deal_id"),
-                "Line Item Name": "Missing Line Item",
-                "Line Item Amount": 0
-            })
-        else:
-            for li in line_items:
-                records.append({
-                    "Expected Commencement Date": p.get("expected_commencement_date"),
-                    "Deal ID": p.get("deal_id"),
-                    "Line Item Name": li.get("name"),
-                    "Line Item Amount": li.get("amount", 0)
-                })
+    for p in product_types:
+        df = hubspot_data.generate_df_via_product_type(p)
 
-    df = pd.DataFrame(records)
-    df["Line Item Amount"] = pd.to_numeric(df["Line Item Amount"], errors="coerce").fillna(0)
+        if df is None or not isinstance(df, pd.DataFrame) or df.empty:
+            continue
 
-    return df
+        df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
+        df["product_type"] = p
+
+        # Planned week
+        df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday)
+
+        # Raw completed week
+        df["raw_completed_week"] = df.get("submission_date", None)
+        df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday)
+
+        # Completed-week logic
+        def corrected_completed_week(row):
+            planned = row["Planned Week"]
+            submitted = row["raw_completed_week"]
+
+            if not submitted:
+                return None  # no completion
+
+            if not planned:
+                return submitted  # fallback
+
+            # override if submitted > planned
+            if submitted > planned:
+                return planned
+
+            return submitted
+
+        df["Completed Week"] = df.apply(corrected_completed_week, axis=1)
+        df.drop(columns=["raw_completed_week"], inplace=True)
+
+        frames.append(df)
+
+    if not frames:
+        return pd.DataFrame()
+
+    return pd.concat(frames, ignore_index=True)
 
 
-# -------------------------
-# Initial Load
-# -------------------------
-df = load_dataframe()
+# -----------------------------------------------------
+# Initial DF load
+# -----------------------------------------------------
+df = build_master_df(PRODUCT_TYPES)
 
 
-# -------------------------
+# -----------------------------------------------------
 # Dash App
-# -------------------------
-app = Dash(__name__)
-server = app.server   # required for Render
+# -----------------------------------------------------
+app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+server = app.server
 
+
+# -----------------------------------------------------
+# Layout
+# -----------------------------------------------------
 app.layout = html.Div([
-    html.H1("Line Items by Expected Commencement Date", style={"textAlign": "center"}),
+
+    html.H1("Planned vs Completed", style={"textAlign": "center"}),
 
     dcc.Dropdown(
         id="date-filter",
         options=[{"label": "All Dates", "value": "All Dates"}] +
-                [{"label": d, "value": d} for d in sorted(df["Expected Commencement Date"].dropna().unique())],
-        value="All Dates",
+                [{"label": d, "value": d}
+                 for d in sorted(df["Planned Week"].dropna().unique())],
+        value=current_week_start() if current_week_start() in df["Planned Week"].unique() else "All Dates",
         clearable=False,
         style={"width": "300px", "margin": "20px auto"}
     ),
@@ -112,59 +131,159 @@ app.layout = html.Div([
         style={"margin": "10px", "padding": "10px 20px"}
     ),
 
+    html.H2("Planned", style={"marginTop": "30px"}),
+
     dash_table.DataTable(
-        id="lineitem-table",
+        id="planned-table",
         columns=[
-            {"name": "Line Item Name", "id": "Line Item Name"},
+            {"name": "Product Type", "id": "Product Type"},
             {"name": "Total Deals", "id": "Total Deals"},
             {"name": "Total Amount (£)", "id": "Total Amount (£)"},
         ],
         page_size=20,
         sort_action="native",
-        style_table={"overflowX": "scroll"},
-        style_header={"backgroundColor": "#e4e4e4", "fontWeight": "bold"},
-        style_cell={"padding": "8px", "textAlign": "left"},
-    )
+        row_selectable="single",
+        cell_selectable=False,
+        style_cell_conditional=[
+            {"if": {"column_id": "Product Type"}, "textAlign": "left"},
+        ]
+    ),
+
+    html.H2("Actual", style={"marginTop": "40px"}),
+
+    dash_table.DataTable(
+        id="completed-table",
+        columns=[
+            {"name": "Product Type", "id": "Product Type"},
+            {"name": "Total Deals", "id": "Total Deals"},
+            {"name": "Total Amount (£)", "id": "Total Amount (£)"},
+        ],
+        page_size=20,
+        sort_action="native",
+        row_selectable="single",
+        cell_selectable=False,
+        style_cell_conditional=[
+            {"if": {"column_id": "Product Type"}, "textAlign": "left"},
+        ]
+    ),
+
+    dbc.Modal(
+        [
+            dbc.ModalHeader("HubSpot IDs"),
+            dbc.ModalBody(id="modal-body"),
+            dbc.ModalFooter(
+                dbc.Button("Close", id="close-modal", className="ms-auto")
+            ),
+        ],
+        id="hubspot-modal",
+        size="lg",
+        is_open=False,
+    ),
 ])
 
 
-# -------------------------
-# Callback (filter + refresh)
-# -------------------------
+# -----------------------------------------------------
+# Callback: Table Updates
+# -----------------------------------------------------
 @app.callback(
-    Output("lineitem-table", "data"),
+    Output("planned-table", "data"),
+    Output("completed-table", "data"),
     Input("date-filter", "value"),
     Input("refresh-btn", "n_clicks")
 )
-def update_table(selected_date, n_clicks):
+def update_tables(selected_date, n_clicks):
     global df
 
-    # Refresh DF from AWS when button clicked
     if n_clicks > 0:
-        df = load_dataframe()
+        df = build_master_df(PRODUCT_TYPES)
 
-    # Filter data
+    # Filter
     if selected_date == "All Dates":
-        dff = df.copy()
+        planned_df = df.copy()
+        completed_df = df.copy()
     else:
-        dff = df[df["Expected Commencement Date"] == selected_date]
+        planned_df = df[df["Planned Week"] == selected_date].copy()
+        completed_df = df[df["Completed Week"] == selected_date].copy()
 
-    grouped = (
-        dff.groupby("Line Item Name", dropna=False)
-           .agg(
-               Total_Deals=("Deal ID", "nunique"),
-               Total_Amount=("Line Item Amount", "sum")
-           )
-           .reset_index()
+    # ------------- PLANNED GROUPING -------------
+    planned_grouped = (
+        planned_df.groupby(["Planned Week", "product_type"], dropna=False)
+        .agg(
+            Total_Deals=("hubspot_id", "nunique"),
+            Total_Amount=("price", "sum"),
+            HubSpot_IDs=("hubspot_id",
+                         lambda x: ", ".join(sorted(set(x.astype(str)))))
+        )
+        .reset_index()
     )
 
-    grouped = grouped.rename(columns={
+    planned_grouped.rename(columns={
+        "product_type": "Product Type",
         "Total_Deals": "Total Deals",
-        "Total_Amount": "Total Amount (£)"
-    })
+        "Total_Amount": "Total Amount (£)",
+        "HubSpot_IDs": "HubSpot IDs",
+    }, inplace=True)
 
-    grouped = grouped.sort_values("Total Amount (£)", ascending=False)
-    return grouped.to_dict("records")
+    planned_records = [] if planned_grouped.empty else \
+        planned_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
+
+    # ------------- COMPLETED GROUPING -------------
+    completed_df = completed_df[completed_df["Completed Week"].notna()]
+
+    completed_grouped = (
+        completed_df.groupby(["Completed Week", "product_type"], dropna=False)
+        .agg(
+            Total_Deals=("hubspot_id", "nunique"),
+            Total_Amount=("price", "sum"),
+            HubSpot_IDs=("hubspot_id",
+                         lambda x: ", ".join(sorted(set(x.astype(str)))))
+        )
+        .reset_index()
+    )
+
+    # ❗ You forgot these renames — THIS caused the crash.
+    completed_grouped.rename(columns={
+        "product_type": "Product Type",
+        "Total_Deals": "Total Deals",
+        "Total_Amount": "Total Amount (£)",
+        "HubSpot_IDs": "HubSpot IDs",
+    }, inplace=True)
+
+    completed_records = [] if completed_grouped.empty else \
+        completed_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
+
+    return planned_records, completed_records
+
+# -----------------------------------------------------
+# Callback: Modal for Planned + Completed tables
+# -----------------------------------------------------
+@app.callback(
+    Output("hubspot-modal", "is_open"),
+    Output("modal-body", "children"),
+    Input("planned-table", "selected_rows"),
+    Input("completed-table", "selected_rows"),
+    Input("close-modal", "n_clicks"),
+    State("planned-table", "data"),
+    State("completed-table", "data"),
+    State("hubspot-modal", "is_open"),
+)
+def open_modal(planned_rows, completed_rows, close_click,
+               planned_data, completed_data, is_open):
+
+    if close_click:
+        return False, ""
+
+    # Planned table click
+    if planned_rows:
+        row = planned_data[planned_rows[0]]
+        return True, html.Ul([html.Li(i) for i in row["HubSpot IDs"].split(", ")])
+
+    # Completed table click
+    if completed_rows:
+        row = completed_data[completed_rows[0]]
+        return True, html.Ul([html.Li(i) for i in row["HubSpot IDs"].split(", ")])
+
+    return False, ""
 
 
 if __name__ == "__main__":
diff --git a/backend/src/dashboard/scripts/quick_one.py b/backend/src/dashboard/scripts/quick_one.py
index d6d1342..1f70cd4 100644
--- a/backend/src/dashboard/scripts/quick_one.py
+++ b/backend/src/dashboard/scripts/quick_one.py
@@ -5,6 +5,4 @@ s3 = FileManager()
 key, path, data = s3.download_and_read_latest()
 hubspot_data = jsonReader(data)
 
-df  = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4")
-df
-
+df  = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4")
\ No newline at end of file
diff --git a/backend/src/dashboard/services/json_reader.py b/backend/src/dashboard/services/json_reader.py
index adb458e..1efe091 100644
--- a/backend/src/dashboard/services/json_reader.py
+++ b/backend/src/dashboard/services/json_reader.py
@@ -3,6 +3,7 @@ from collections import defaultdict
 import pandas as pd
 
 from enum import Enum
+from datetime import datetime
 
 class ProductType(Enum):
     EMPTY_CAVITY_ECO_4 = "Empty Cavity - ECO4"
@@ -16,6 +17,14 @@ class jsonReader:
         self.line_item_names = list
         self.initial_setup()
 
+    def to_date_only(self, timestamp: str) -> str:
+        if timestamp is None:
+            return None
+        if timestamp.endswith("Z"):
+            timestamp = timestamp[:-1]
+        dt = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S")
+        return dt.strftime("%Y-%m-%d")
+    
     def initial_setup(self):
         """
         Build a dictionary mapping line item names -> list of deals
@@ -38,28 +47,46 @@ class jsonReader:
     def generate_df_via_product_type(self, product_type):
         rows = []
         for deals in self.deals_by_line_item[product_type]:
-                row = self._return_df_from_deal_info(deals, product_type)
-                rows.append(row)
-                break
+            row = self._return_df_from_deal_info(deals, product_type)
+            rows.append(row)
 
         if rows:
             return pd.concat(rows, ignore_index=True)
-        else:
-            return
 
     
     def _return_df_from_deal_info(self, deal, product_type):
-        data = {
-            "submission_date": deal["deal_properties"].get("submission_date", None),
-            "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date", None),
-            "work_type": product_type,
-            "price": next(
-                    (item["price"] for item in deal["line_items"] if product_type in item["name"]),
-                    None
-                ) 
-        }
-            
-        return pd.DataFrame([data])
+        rows = []
+
+        if "ECO" in product_type or "EPC" in product_type:
+            if deal["attempts"]:
+                # Multiple attempts => multiple rows
+                for attempt in deal["attempts"]:
+                    rows.append({
+                        "submission_date": self.to_date_only(deal["deal_properties"].get("submission_date")),
+                        "hubspot_id": deal["deal_properties"]["deal_id"],
+                        "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"),
+                        "work_type": product_type,
+                        "price": next(
+                            (item["price"] for item in deal["line_items"] if product_type in item["name"]),
+                            None
+                        )
+                    })
+            else:
+                # Single row case
+                rows.append({
+                    "submission_date": self.to_date_only(deal["deal_properties"].get("last_submission_date")),
+                    "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"),
+                    "hubspot_id": deal["deal_properties"]["deal_id"],
+                    "work_type": product_type,
+                    "price": next(
+                        (item["price"] for item in deal["line_items"] if product_type in item["name"]),
+                        None
+                    )
+                })
+                
+
+        # Return a DataFrame or None
+        return pd.DataFrame(rows) if rows else None
     
     def find_all_job_with_line_item(self):
         for i, deal in enumerate(self.raw_data):