diff --git a/backend/src/dashboard/main.py b/backend/src/dashboard/main.py index 5f6cb8c..4e3b5d8 100644 --- a/backend/src/dashboard/main.py +++ b/backend/src/dashboard/main.py @@ -1,30 +1,23 @@ -from dash import Dash, html, dcc, callback, Output, Input, dash_table, html -import plotly.express as px +from dash import Dash, html, dcc, dash_table, Input, Output import pandas as pd import json import boto3 import re import os -from datetime import datetime -BUCKET="retrofit-data-dev" -PREFIX="hubspot_insight/" +BUCKET = "retrofit-data-dev" +PREFIX = "hubspot_insight/" def get_latest_s3_file(bucket: str, prefix: str = "") -> str: - """ - Returns the key of the latest timestamped file in S3. - Files must contain a timestamp like: *_YYYYMMDD_HHMMSS.json - """ s3 = boto3.client("s3") - response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) if "Contents" not in response: - raise FileNotFoundError("No files found in bucket/prefix.") + raise FileNotFoundError("No files found.") timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$") - files = [] + for obj in response["Contents"]: key = obj["Key"] match = timestamp_regex.match(key) @@ -34,9 +27,8 @@ def get_latest_s3_file(bucket: str, prefix: str = "") -> str: if not files: raise FileNotFoundError("No timestamped files found.") - # Sort by timestamp descending - latest_key = sorted(files, key=lambda x: x[0], reverse=True)[0][1] - return latest_key + return sorted(files, key=lambda x: x[0], reverse=True)[0][1] + def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downloads"): s3 = boto3.client("s3") @@ -52,57 +44,112 @@ def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downlo return latest_key, local_path, data + +# ------------------------- +# Load S3 JSON +# ------------------------- latest_key, file_path, data = download_and_read_latest(BUCKET, PREFIX) -# --------------------------------------------------------- -# Extract relevant info into a table -# --------------------------------------------------------- +# ------------------------- +# Build expanded table +# ------------------------- records = [] -def iso_week(date_str): - if not date_str: - return None - return datetime.fromisoformat(date_str).isocalendar().week - for entry in data: p = entry["deal_properties"] - c = entry["company_info"] + line_items = entry.get("line_items", []) - records.append({ - "Deal Name": p.get("dealname"), - "Company": c.get("name"), - "Expected Start Date": p.get("expected_commencement_date"), - "Expected Week": iso_week(p.get("expected_commencement_date")), - "Design Planned Week": p.get("design_planned_week"), - "Design Completion": p.get("design_completion_date"), - "MTP Planned Week": p.get("mtp_planned_week"), - "MTP Completion": p.get("mtp_completion_date"), - "Retrofit Status": p.get("retrofit_design_status"), - "Deal ID": p.get("deal_id") - }) + if not line_items: + records.append({ + "Expected Commencement Date": p.get("expected_commencement_date"), + "Deal ID": p.get("deal_id"), + "Line Item Name": "Missing Line Item", + "Line Item Amount": 0 + }) + else: + for li in line_items: + records.append({ + "Expected Commencement Date": p.get("expected_commencement_date"), + "Deal ID": p.get("deal_id"), + "Line Item Name": li.get("name"), + "Line Item Amount": li.get("amount", 0) + }) df = pd.DataFrame(records) -# --------------------------------------------------------- +# Convert numeric fields +df["Line Item Amount"] = pd.to_numeric(df["Line Item Amount"], errors="coerce").fillna(0) + +# ------------------------- +# Unique date list +# ------------------------- +unique_dates = sorted(df["Expected Commencement Date"].dropna().unique()) +unique_dates = ["All Dates"] + list(unique_dates) + +# ------------------------- # Dash App -# --------------------------------------------------------- +# ------------------------- app = Dash(__name__) app.layout = html.Div([ - html.H1("Deal Scheduling Overview", style={"textAlign": "center"}), + html.H1("Line Items by Expected Commencement Date", style={"textAlign": "center"}), + + dcc.Dropdown( + id="date-filter", + options=[{"label": d, "value": d} for d in unique_dates], + value="All Dates", + clearable=False, + style={"width": "300px", "margin": "20px auto"} + ), dash_table.DataTable( - id="deal-table", - columns=[{"name": i, "id": i} for i in df.columns], - data=df.to_dict("records"), + id="lineitem-table", + columns=[ + {"name": "Line Item Name", "id": "Line Item Name"}, + {"name": "Total Deals", "id": "Total Deals"}, + {"name": "Total Amount (£)", "id": "Total Amount (£)"}, + ], page_size=20, - filter_action="native", sort_action="native", style_table={"overflowX": "scroll"}, - style_header={"backgroundColor": "#f0f0f0", "fontWeight": "bold"}, + style_header={"backgroundColor": "#e4e4e4", "fontWeight": "bold"}, style_cell={"padding": "8px", "textAlign": "left"}, ) ]) +# ------------------------- +# Callback for filtering +# ------------------------- +@app.callback( + Output("lineitem-table", "data"), + Input("date-filter", "value") +) +def update_table(selected_date): + + if selected_date == "All Dates": + dff = df.copy() + else: + dff = df[df["Expected Commencement Date"] == selected_date] + + grouped = ( + dff.groupby("Line Item Name", dropna=False) + .agg( + Total_Deals=("Deal ID", "nunique"), + Total_Amount=("Line Item Amount", "sum") + ) + .reset_index() + ) + + # Final rename to match DataTable column IDs + grouped = grouped.rename(columns={ + "Total_Deals": "Total Deals", + "Total_Amount": "Total Amount (£)" + }) + + grouped = grouped.sort_values("Total Amount (£)", ascending=False) + + return grouped.to_dict("records") + + if __name__ == "__main__": - app.run(debug=True) \ No newline at end of file + app.run(debug=True)