From 6bb2128c6a74610c417399e48d81c228da60ac39 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 25 Nov 2025 17:01:12 +0000 Subject: [PATCH] save local --- .devcontainer/devcontainer.json | 1 - .devcontainer/docker-compose.yml | 2 + .vscode/settings.json | 2 + backend/poetry.lock | 20 +- backend/pyproject.toml | 3 +- backend/src/dashboard/main.py | 333 ++++++++++++------ backend/src/dashboard/scripts/quick_one.py | 4 +- backend/src/dashboard/services/json_reader.py | 59 +++- 8 files changed, 295 insertions(+), 129 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 23f181d..2f2a89e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -9,7 +9,6 @@ // Optional convenience mount "source=${localEnv:HOME},target=/workspaces/home,type=bind" ], - "customizations": { "vscode": { "settings": { diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 6ac624f..fc12511 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,7 +1,9 @@ version: '3.8' + services: insight: + init: true user: "${UID}:${GID}" build: context: .. diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c1..56ad03a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,7 +10,9 @@ } }, + // Hot reload setting that needs to be in user settings + // Ctrl + Shit + P , Prefernce: Open User Setting (JSON) // "jupyter.runStartupCommands": [ // "%load_ext autoreload", "%autoreload 2" // ] diff --git a/backend/poetry.lock b/backend/poetry.lock index 612ee27..60e923d 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -403,6 +403,24 @@ dev = ["PyYAML (>=5.4.1)", "coloredlogs (>=15.0.1)", "fire (>=0.4.0)"] diskcache = ["diskcache (>=5.2.1)", "multiprocess (>=0.70.12)", "psutil (>=5.8.0)"] testing = ["beautifulsoup4 (>=4.8.2)", "cryptography", "dash_testing_stub (>=0.0.2)", "lxml (>=4.6.2)", "multiprocess (>=0.70.12)", "percy (>=2.0.2)", "psutil (>=5.8.0)", "pytest (>=6.0.2)", "requests[security] (>=2.21.0)", "selenium (>=3.141.0,<=4.2.0)", "waitress (>=1.4.4)"] +[[package]] +name = "dash-bootstrap-components" +version = "2.0.4" +description = "Bootstrap themed components for use in Plotly Dash" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "dash_bootstrap_components-2.0.4-py3-none-any.whl", hash = "sha256:767cf0084586c1b2b614ccf50f79fe4525fdbbf8e3a161ed60016e584a14f5d1"}, + {file = "dash_bootstrap_components-2.0.4.tar.gz", hash = "sha256:c3206c0923774bbc6a6ddaa7822b8d9aa5326b0d3c1e7cd795cc975025fe2484"}, +] + +[package.dependencies] +dash = ">=3.0.4" + +[package.extras] +pandas = ["numpy (>=2.0.2)", "pandas (>=2.2.3)"] + [[package]] name = "debugpy" version = "1.8.17" @@ -1849,4 +1867,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.12" -content-hash = "e5f24396e682e9ab0a06586c52e2bcc4b9c79c76352b8f752c5f1997808c6b7d" +content-hash = "8fb73ebfc424ad06013ee6c53bef966265d0658fc6ea9a6e6489e285f90844e8" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 4779723..3943693 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -16,7 +16,8 @@ dependencies = [ "tqdm (>=4.67.1,<5.0.0)", "boto3 (>=1.40.74,<2.0.0)", "dash (>=3.3.0,<4.0.0)", - "gunicorn (>=23.0.0,<24.0.0)" + "gunicorn (>=23.0.0,<24.0.0)", + "dash-bootstrap-components (>=2.0.4,<3.0.0)" ] [tool.poetry] diff --git a/backend/src/dashboard/main.py b/backend/src/dashboard/main.py index cf89b57..3c8f598 100644 --- a/backend/src/dashboard/main.py +++ b/backend/src/dashboard/main.py @@ -1,106 +1,125 @@ -from dash import Dash, html, dcc, dash_table, Input, Output +from dash import Dash, html, dcc, dash_table, Input, Output, State +import dash_bootstrap_components as dbc import pandas as pd -import json -import boto3 -import re -import os +from datetime import timedelta -BUCKET = "retrofit-data-dev" -PREFIX = "hubspot_insight/" +from dashboard.services.file_manager import FileManager +from dashboard.services.json_reader import jsonReader -# ------------------------- -# S3 Helpers -# ------------------------- -def get_latest_s3_file(bucket: str, prefix: str = "") -> str: - s3 = boto3.client("s3") - response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) +from datetime import datetime - if "Contents" not in response: - raise FileNotFoundError("No files found.") +def current_week_start(): + today = datetime.today() + monday = today - timedelta(days=today.weekday()) + return monday.strftime("%Y-%m-%d") - timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$") - files = [] - - for obj in response["Contents"]: - key = obj["Key"] - match = timestamp_regex.match(key) - if match: - files.append((match.group(1), key)) - - if not files: - raise FileNotFoundError("No timestamped files found.") - - return sorted(files, key=lambda x: x[0], reverse=True)[0][1] +# ----------------------------------------------------- +# Product types +# ----------------------------------------------------- +PRODUCT_TYPES = [ + "Empty Cavity - ECO4", + "Solar PV - ECO4", + "Extract & Fill - ECO4", + "Solar PV + Heating Upgrade - ECO4", + "Solar PV + HHRSH - ECO4", + "ECO4 empty cavity survey", + "ECO4 Retrofit Coordination", + "ECO4 Solar with client contribution", + "EPC", +] -def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downloads"): - s3 = boto3.client("s3") - os.makedirs(download_dir, exist_ok=True) - - latest_key = get_latest_s3_file(bucket, prefix) - local_path = os.path.join(download_dir, latest_key.split("/")[-1]) - - s3.download_file(bucket, latest_key, local_path) - - with open(local_path, "r") as f: - data = json.load(f) - - return latest_key, local_path, data +# ----------------------------------------------------- +# Helpers +# ----------------------------------------------------- +def week_start_monday(date_str): + if not date_str or pd.isna(date_str): + return None + date = pd.to_datetime(date_str) + monday = date - timedelta(days=date.weekday()) # Monday = week start + return monday.strftime("%Y-%m-%d") -# ------------------------- -# Build DataFrame from S3 -# ------------------------- -def load_dataframe(): - latest_key, file_path, data = download_and_read_latest(BUCKET, PREFIX) +# ----------------------------------------------------- +# Build master DF (single S3 read) +# ----------------------------------------------------- +def build_master_df(product_types): + s3 = FileManager() + key, path, data = s3.download_and_read_latest() + hubspot_data = jsonReader(data) - records = [] - for entry in data: - p = entry["deal_properties"] - line_items = entry.get("line_items", []) + frames = [] - if not line_items: - records.append({ - "Expected Commencement Date": p.get("expected_commencement_date"), - "Deal ID": p.get("deal_id"), - "Line Item Name": "Missing Line Item", - "Line Item Amount": 0 - }) - else: - for li in line_items: - records.append({ - "Expected Commencement Date": p.get("expected_commencement_date"), - "Deal ID": p.get("deal_id"), - "Line Item Name": li.get("name"), - "Line Item Amount": li.get("amount", 0) - }) + for p in product_types: + df = hubspot_data.generate_df_via_product_type(p) - df = pd.DataFrame(records) - df["Line Item Amount"] = pd.to_numeric(df["Line Item Amount"], errors="coerce").fillna(0) + if df is None or not isinstance(df, pd.DataFrame) or df.empty: + continue - return df + df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0) + df["product_type"] = p + + # Planned week + df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday) + + # Raw completed week + df["raw_completed_week"] = df.get("submission_date", None) + df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday) + + # Completed-week logic + def corrected_completed_week(row): + planned = row["Planned Week"] + submitted = row["raw_completed_week"] + + if not submitted: + return None # no completion + + if not planned: + return submitted # fallback + + # override if submitted > planned + if submitted > planned: + return planned + + return submitted + + df["Completed Week"] = df.apply(corrected_completed_week, axis=1) + df.drop(columns=["raw_completed_week"], inplace=True) + + frames.append(df) + + if not frames: + return pd.DataFrame() + + return pd.concat(frames, ignore_index=True) -# ------------------------- -# Initial Load -# ------------------------- -df = load_dataframe() +# ----------------------------------------------------- +# Initial DF load +# ----------------------------------------------------- +df = build_master_df(PRODUCT_TYPES) -# ------------------------- +# ----------------------------------------------------- # Dash App -# ------------------------- -app = Dash(__name__) -server = app.server # required for Render +# ----------------------------------------------------- +app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) +server = app.server + +# ----------------------------------------------------- +# Layout +# ----------------------------------------------------- app.layout = html.Div([ - html.H1("Line Items by Expected Commencement Date", style={"textAlign": "center"}), + + html.H1("Planned vs Completed", style={"textAlign": "center"}), dcc.Dropdown( id="date-filter", options=[{"label": "All Dates", "value": "All Dates"}] + - [{"label": d, "value": d} for d in sorted(df["Expected Commencement Date"].dropna().unique())], - value="All Dates", + [{"label": d, "value": d} + for d in sorted(df["Planned Week"].dropna().unique())], + value=current_week_start() if current_week_start() in df["Planned Week"].unique() else "All Dates", clearable=False, style={"width": "300px", "margin": "20px auto"} ), @@ -112,59 +131,159 @@ app.layout = html.Div([ style={"margin": "10px", "padding": "10px 20px"} ), + html.H2("Planned", style={"marginTop": "30px"}), + dash_table.DataTable( - id="lineitem-table", + id="planned-table", columns=[ - {"name": "Line Item Name", "id": "Line Item Name"}, + {"name": "Product Type", "id": "Product Type"}, {"name": "Total Deals", "id": "Total Deals"}, {"name": "Total Amount (£)", "id": "Total Amount (£)"}, ], page_size=20, sort_action="native", - style_table={"overflowX": "scroll"}, - style_header={"backgroundColor": "#e4e4e4", "fontWeight": "bold"}, - style_cell={"padding": "8px", "textAlign": "left"}, - ) + row_selectable="single", + cell_selectable=False, + style_cell_conditional=[ + {"if": {"column_id": "Product Type"}, "textAlign": "left"}, + ] + ), + + html.H2("Actual", style={"marginTop": "40px"}), + + dash_table.DataTable( + id="completed-table", + columns=[ + {"name": "Product Type", "id": "Product Type"}, + {"name": "Total Deals", "id": "Total Deals"}, + {"name": "Total Amount (£)", "id": "Total Amount (£)"}, + ], + page_size=20, + sort_action="native", + row_selectable="single", + cell_selectable=False, + style_cell_conditional=[ + {"if": {"column_id": "Product Type"}, "textAlign": "left"}, + ] + ), + + dbc.Modal( + [ + dbc.ModalHeader("HubSpot IDs"), + dbc.ModalBody(id="modal-body"), + dbc.ModalFooter( + dbc.Button("Close", id="close-modal", className="ms-auto") + ), + ], + id="hubspot-modal", + size="lg", + is_open=False, + ), ]) -# ------------------------- -# Callback (filter + refresh) -# ------------------------- +# ----------------------------------------------------- +# Callback: Table Updates +# ----------------------------------------------------- @app.callback( - Output("lineitem-table", "data"), + Output("planned-table", "data"), + Output("completed-table", "data"), Input("date-filter", "value"), Input("refresh-btn", "n_clicks") ) -def update_table(selected_date, n_clicks): +def update_tables(selected_date, n_clicks): global df - # Refresh DF from AWS when button clicked if n_clicks > 0: - df = load_dataframe() + df = build_master_df(PRODUCT_TYPES) - # Filter data + # Filter if selected_date == "All Dates": - dff = df.copy() + planned_df = df.copy() + completed_df = df.copy() else: - dff = df[df["Expected Commencement Date"] == selected_date] + planned_df = df[df["Planned Week"] == selected_date].copy() + completed_df = df[df["Completed Week"] == selected_date].copy() - grouped = ( - dff.groupby("Line Item Name", dropna=False) - .agg( - Total_Deals=("Deal ID", "nunique"), - Total_Amount=("Line Item Amount", "sum") - ) - .reset_index() + # ------------- PLANNED GROUPING ------------- + planned_grouped = ( + planned_df.groupby(["Planned Week", "product_type"], dropna=False) + .agg( + Total_Deals=("hubspot_id", "nunique"), + Total_Amount=("price", "sum"), + HubSpot_IDs=("hubspot_id", + lambda x: ", ".join(sorted(set(x.astype(str))))) + ) + .reset_index() ) - grouped = grouped.rename(columns={ + planned_grouped.rename(columns={ + "product_type": "Product Type", "Total_Deals": "Total Deals", - "Total_Amount": "Total Amount (£)" - }) + "Total_Amount": "Total Amount (£)", + "HubSpot_IDs": "HubSpot IDs", + }, inplace=True) - grouped = grouped.sort_values("Total Amount (£)", ascending=False) - return grouped.to_dict("records") + planned_records = [] if planned_grouped.empty else \ + planned_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records") + + # ------------- COMPLETED GROUPING ------------- + completed_df = completed_df[completed_df["Completed Week"].notna()] + + completed_grouped = ( + completed_df.groupby(["Completed Week", "product_type"], dropna=False) + .agg( + Total_Deals=("hubspot_id", "nunique"), + Total_Amount=("price", "sum"), + HubSpot_IDs=("hubspot_id", + lambda x: ", ".join(sorted(set(x.astype(str))))) + ) + .reset_index() + ) + + # ❗ You forgot these renames — THIS caused the crash. + completed_grouped.rename(columns={ + "product_type": "Product Type", + "Total_Deals": "Total Deals", + "Total_Amount": "Total Amount (£)", + "HubSpot_IDs": "HubSpot IDs", + }, inplace=True) + + completed_records = [] if completed_grouped.empty else \ + completed_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records") + + return planned_records, completed_records + +# ----------------------------------------------------- +# Callback: Modal for Planned + Completed tables +# ----------------------------------------------------- +@app.callback( + Output("hubspot-modal", "is_open"), + Output("modal-body", "children"), + Input("planned-table", "selected_rows"), + Input("completed-table", "selected_rows"), + Input("close-modal", "n_clicks"), + State("planned-table", "data"), + State("completed-table", "data"), + State("hubspot-modal", "is_open"), +) +def open_modal(planned_rows, completed_rows, close_click, + planned_data, completed_data, is_open): + + if close_click: + return False, "" + + # Planned table click + if planned_rows: + row = planned_data[planned_rows[0]] + return True, html.Ul([html.Li(i) for i in row["HubSpot IDs"].split(", ")]) + + # Completed table click + if completed_rows: + row = completed_data[completed_rows[0]] + return True, html.Ul([html.Li(i) for i in row["HubSpot IDs"].split(", ")]) + + return False, "" if __name__ == "__main__": diff --git a/backend/src/dashboard/scripts/quick_one.py b/backend/src/dashboard/scripts/quick_one.py index d6d1342..1f70cd4 100644 --- a/backend/src/dashboard/scripts/quick_one.py +++ b/backend/src/dashboard/scripts/quick_one.py @@ -5,6 +5,4 @@ s3 = FileManager() key, path, data = s3.download_and_read_latest() hubspot_data = jsonReader(data) -df = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4") -df - +df = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4") \ No newline at end of file diff --git a/backend/src/dashboard/services/json_reader.py b/backend/src/dashboard/services/json_reader.py index adb458e..1efe091 100644 --- a/backend/src/dashboard/services/json_reader.py +++ b/backend/src/dashboard/services/json_reader.py @@ -3,6 +3,7 @@ from collections import defaultdict import pandas as pd from enum import Enum +from datetime import datetime class ProductType(Enum): EMPTY_CAVITY_ECO_4 = "Empty Cavity - ECO4" @@ -16,6 +17,14 @@ class jsonReader: self.line_item_names = list self.initial_setup() + def to_date_only(self, timestamp: str) -> str: + if timestamp is None: + return None + if timestamp.endswith("Z"): + timestamp = timestamp[:-1] + dt = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") + return dt.strftime("%Y-%m-%d") + def initial_setup(self): """ Build a dictionary mapping line item names -> list of deals @@ -38,28 +47,46 @@ class jsonReader: def generate_df_via_product_type(self, product_type): rows = [] for deals in self.deals_by_line_item[product_type]: - row = self._return_df_from_deal_info(deals, product_type) - rows.append(row) - break + row = self._return_df_from_deal_info(deals, product_type) + rows.append(row) if rows: return pd.concat(rows, ignore_index=True) - else: - return def _return_df_from_deal_info(self, deal, product_type): - data = { - "submission_date": deal["deal_properties"].get("submission_date", None), - "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date", None), - "work_type": product_type, - "price": next( - (item["price"] for item in deal["line_items"] if product_type in item["name"]), - None - ) - } - - return pd.DataFrame([data]) + rows = [] + + if "ECO" in product_type or "EPC" in product_type: + if deal["attempts"]: + # Multiple attempts => multiple rows + for attempt in deal["attempts"]: + rows.append({ + "submission_date": self.to_date_only(deal["deal_properties"].get("submission_date")), + "hubspot_id": deal["deal_properties"]["deal_id"], + "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"), + "work_type": product_type, + "price": next( + (item["price"] for item in deal["line_items"] if product_type in item["name"]), + None + ) + }) + else: + # Single row case + rows.append({ + "submission_date": self.to_date_only(deal["deal_properties"].get("last_submission_date")), + "expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"), + "hubspot_id": deal["deal_properties"]["deal_id"], + "work_type": product_type, + "price": next( + (item["price"] for item in deal["line_items"] if product_type in item["name"]), + None + ) + }) + + + # Return a DataFrame or None + return pd.DataFrame(rows) if rows else None def find_all_job_with_line_item(self): for i, deal in enumerate(self.raw_data):