save to main incase khalim deploys

This commit is contained in:
Jun-te Kim 2025-11-17 20:35:42 +00:00
parent 02f9f60114
commit 004943faa5

View file

@ -1,30 +1,23 @@
from dash import Dash, html, dcc, callback, Output, Input, dash_table, html
import plotly.express as px
from dash import Dash, html, dcc, dash_table, Input, Output
import pandas as pd
import json
import boto3
import re
import os
from datetime import datetime
BUCKET="retrofit-data-dev"
PREFIX="hubspot_insight/"
BUCKET = "retrofit-data-dev"
PREFIX = "hubspot_insight/"
def get_latest_s3_file(bucket: str, prefix: str = "") -> str:
"""
Returns the key of the latest timestamped file in S3.
Files must contain a timestamp like: *_YYYYMMDD_HHMMSS.json
"""
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
raise FileNotFoundError("No files found in bucket/prefix.")
raise FileNotFoundError("No files found.")
timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$")
files = []
for obj in response["Contents"]:
key = obj["Key"]
match = timestamp_regex.match(key)
@ -34,9 +27,8 @@ def get_latest_s3_file(bucket: str, prefix: str = "") -> str:
if not files:
raise FileNotFoundError("No timestamped files found.")
# Sort by timestamp descending
latest_key = sorted(files, key=lambda x: x[0], reverse=True)[0][1]
return latest_key
return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downloads"):
s3 = boto3.client("s3")
@ -52,57 +44,112 @@ def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downlo
return latest_key, local_path, data
# -------------------------
# Load S3 JSON
# -------------------------
latest_key, file_path, data = download_and_read_latest(BUCKET, PREFIX)
# ---------------------------------------------------------
# Extract relevant info into a table
# ---------------------------------------------------------
# -------------------------
# Build expanded table
# -------------------------
records = []
def iso_week(date_str):
if not date_str:
return None
return datetime.fromisoformat(date_str).isocalendar().week
for entry in data:
p = entry["deal_properties"]
c = entry["company_info"]
line_items = entry.get("line_items", [])
records.append({
"Deal Name": p.get("dealname"),
"Company": c.get("name"),
"Expected Start Date": p.get("expected_commencement_date"),
"Expected Week": iso_week(p.get("expected_commencement_date")),
"Design Planned Week": p.get("design_planned_week"),
"Design Completion": p.get("design_completion_date"),
"MTP Planned Week": p.get("mtp_planned_week"),
"MTP Completion": p.get("mtp_completion_date"),
"Retrofit Status": p.get("retrofit_design_status"),
"Deal ID": p.get("deal_id")
})
if not line_items:
records.append({
"Expected Commencement Date": p.get("expected_commencement_date"),
"Deal ID": p.get("deal_id"),
"Line Item Name": "Missing Line Item",
"Line Item Amount": 0
})
else:
for li in line_items:
records.append({
"Expected Commencement Date": p.get("expected_commencement_date"),
"Deal ID": p.get("deal_id"),
"Line Item Name": li.get("name"),
"Line Item Amount": li.get("amount", 0)
})
df = pd.DataFrame(records)
# ---------------------------------------------------------
# Convert numeric fields
df["Line Item Amount"] = pd.to_numeric(df["Line Item Amount"], errors="coerce").fillna(0)
# -------------------------
# Unique date list
# -------------------------
unique_dates = sorted(df["Expected Commencement Date"].dropna().unique())
unique_dates = ["All Dates"] + list(unique_dates)
# -------------------------
# Dash App
# ---------------------------------------------------------
# -------------------------
app = Dash(__name__)
app.layout = html.Div([
html.H1("Deal Scheduling Overview", style={"textAlign": "center"}),
html.H1("Line Items by Expected Commencement Date", style={"textAlign": "center"}),
dcc.Dropdown(
id="date-filter",
options=[{"label": d, "value": d} for d in unique_dates],
value="All Dates",
clearable=False,
style={"width": "300px", "margin": "20px auto"}
),
dash_table.DataTable(
id="deal-table",
columns=[{"name": i, "id": i} for i in df.columns],
data=df.to_dict("records"),
id="lineitem-table",
columns=[
{"name": "Line Item Name", "id": "Line Item Name"},
{"name": "Total Deals", "id": "Total Deals"},
{"name": "Total Amount (£)", "id": "Total Amount (£)"},
],
page_size=20,
filter_action="native",
sort_action="native",
style_table={"overflowX": "scroll"},
style_header={"backgroundColor": "#f0f0f0", "fontWeight": "bold"},
style_header={"backgroundColor": "#e4e4e4", "fontWeight": "bold"},
style_cell={"padding": "8px", "textAlign": "left"},
)
])
# -------------------------
# Callback for filtering
# -------------------------
@app.callback(
Output("lineitem-table", "data"),
Input("date-filter", "value")
)
def update_table(selected_date):
if selected_date == "All Dates":
dff = df.copy()
else:
dff = df[df["Expected Commencement Date"] == selected_date]
grouped = (
dff.groupby("Line Item Name", dropna=False)
.agg(
Total_Deals=("Deal ID", "nunique"),
Total_Amount=("Line Item Amount", "sum")
)
.reset_index()
)
# Final rename to match DataTable column IDs
grouped = grouped.rename(columns={
"Total_Deals": "Total Deals",
"Total_Amount": "Total Amount (£)"
})
grouped = grouped.sort_values("Total Amount (£)", ascending=False)
return grouped.to_dict("records")
if __name__ == "__main__":
app.run(debug=True)
app.run(debug=True)