Merge pull request #11 from Hestia-Homes/feature/forgot_to_branch_off

Feature/forgot to branch off
This commit is contained in:
Jun-te Kim 2025-11-26 08:05:31 +00:00 committed by GitHub
commit 091b2cab7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 308 additions and 129 deletions

View file

@ -9,7 +9,6 @@
// Optional convenience mount
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
],
"customizations": {
"vscode": {
"settings": {

View file

@ -1,7 +1,9 @@
version: '3.8'
services:
insight:
init: true
user: "${UID}:${GID}"
build:
context: ..

View file

@ -10,7 +10,9 @@
}
},
// Hot reload setting that needs to be in user settings
// Ctrl + Shit + P , Prefernce: Open User Setting (JSON)
// "jupyter.runStartupCommands": [
// "%load_ext autoreload", "%autoreload 2"
// ]

20
backend/poetry.lock generated
View file

@ -403,6 +403,24 @@ dev = ["PyYAML (>=5.4.1)", "coloredlogs (>=15.0.1)", "fire (>=0.4.0)"]
diskcache = ["diskcache (>=5.2.1)", "multiprocess (>=0.70.12)", "psutil (>=5.8.0)"]
testing = ["beautifulsoup4 (>=4.8.2)", "cryptography", "dash_testing_stub (>=0.0.2)", "lxml (>=4.6.2)", "multiprocess (>=0.70.12)", "percy (>=2.0.2)", "psutil (>=5.8.0)", "pytest (>=6.0.2)", "requests[security] (>=2.21.0)", "selenium (>=3.141.0,<=4.2.0)", "waitress (>=1.4.4)"]
[[package]]
name = "dash-bootstrap-components"
version = "2.0.4"
description = "Bootstrap themed components for use in Plotly Dash"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "dash_bootstrap_components-2.0.4-py3-none-any.whl", hash = "sha256:767cf0084586c1b2b614ccf50f79fe4525fdbbf8e3a161ed60016e584a14f5d1"},
{file = "dash_bootstrap_components-2.0.4.tar.gz", hash = "sha256:c3206c0923774bbc6a6ddaa7822b8d9aa5326b0d3c1e7cd795cc975025fe2484"},
]
[package.dependencies]
dash = ">=3.0.4"
[package.extras]
pandas = ["numpy (>=2.0.2)", "pandas (>=2.2.3)"]
[[package]]
name = "debugpy"
version = "1.8.17"
@ -1849,4 +1867,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.12"
content-hash = "e5f24396e682e9ab0a06586c52e2bcc4b9c79c76352b8f752c5f1997808c6b7d"
content-hash = "8fb73ebfc424ad06013ee6c53bef966265d0658fc6ea9a6e6489e285f90844e8"

View file

@ -16,7 +16,8 @@ dependencies = [
"tqdm (>=4.67.1,<5.0.0)",
"boto3 (>=1.40.74,<2.0.0)",
"dash (>=3.3.0,<4.0.0)",
"gunicorn (>=23.0.0,<24.0.0)"
"gunicorn (>=23.0.0,<24.0.0)",
"dash-bootstrap-components (>=2.0.4,<3.0.0)"
]
[tool.poetry]

View file

@ -1,106 +1,125 @@
from dash import Dash, html, dcc, dash_table, Input, Output
from dash import Dash, html, dcc, dash_table, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
import json
import boto3
import re
import os
from datetime import timedelta
BUCKET = "retrofit-data-dev"
PREFIX = "hubspot_insight/"
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
# -------------------------
# S3 Helpers
# -------------------------
def get_latest_s3_file(bucket: str, prefix: str = "") -> str:
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
from datetime import datetime
if "Contents" not in response:
raise FileNotFoundError("No files found.")
def current_week_start():
today = datetime.today()
monday = today - timedelta(days=today.weekday())
return monday.strftime("%Y-%m-%d")
timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$")
files = []
for obj in response["Contents"]:
key = obj["Key"]
match = timestamp_regex.match(key)
if match:
files.append((match.group(1), key))
if not files:
raise FileNotFoundError("No timestamped files found.")
return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
# -----------------------------------------------------
# Product types
# -----------------------------------------------------
PRODUCT_TYPES = [
"Empty Cavity - ECO4",
"Solar PV - ECO4",
"Extract & Fill - ECO4",
"Solar PV + Heating Upgrade - ECO4",
"Solar PV + HHRSH - ECO4",
"ECO4 empty cavity survey",
"ECO4 Retrofit Coordination",
"ECO4 Solar with client contribution",
"EPC",
]
def download_and_read_latest(bucket: str, prefix: str = "", download_dir="downloads"):
s3 = boto3.client("s3")
os.makedirs(download_dir, exist_ok=True)
latest_key = get_latest_s3_file(bucket, prefix)
local_path = os.path.join(download_dir, latest_key.split("/")[-1])
s3.download_file(bucket, latest_key, local_path)
with open(local_path, "r") as f:
data = json.load(f)
return latest_key, local_path, data
# -----------------------------------------------------
# Helpers
# -----------------------------------------------------
def week_start_monday(date_str):
if not date_str or pd.isna(date_str):
return None
date = pd.to_datetime(date_str)
monday = date - timedelta(days=date.weekday()) # Monday = week start
return monday.strftime("%Y-%m-%d")
# -------------------------
# Build DataFrame from S3
# -------------------------
def load_dataframe():
latest_key, file_path, data = download_and_read_latest(BUCKET, PREFIX)
# -----------------------------------------------------
# Build master DF (single S3 read)
# -----------------------------------------------------
def build_master_df(product_types):
s3 = FileManager()
key, path, data = s3.download_and_read_latest()
hubspot_data = jsonReader(data)
records = []
for entry in data:
p = entry["deal_properties"]
line_items = entry.get("line_items", [])
frames = []
if not line_items:
records.append({
"Expected Commencement Date": p.get("expected_commencement_date"),
"Deal ID": p.get("deal_id"),
"Line Item Name": "Missing Line Item",
"Line Item Amount": 0
})
else:
for li in line_items:
records.append({
"Expected Commencement Date": p.get("expected_commencement_date"),
"Deal ID": p.get("deal_id"),
"Line Item Name": li.get("name"),
"Line Item Amount": li.get("amount", 0)
})
for p in product_types:
df = hubspot_data.generate_df_via_product_type(p)
df = pd.DataFrame(records)
df["Line Item Amount"] = pd.to_numeric(df["Line Item Amount"], errors="coerce").fillna(0)
if df is None or not isinstance(df, pd.DataFrame) or df.empty:
continue
return df
df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)
df["product_type"] = p
# Planned week
df["Planned Week"] = df["expected_commencement_date"].apply(week_start_monday)
# Raw completed week
df["raw_completed_week"] = df.get("submission_date", None)
df["raw_completed_week"] = df["raw_completed_week"].apply(week_start_monday)
# Completed-week logic
def corrected_completed_week(row):
planned = row["Planned Week"]
submitted = row["raw_completed_week"]
if not submitted:
return None # no completion
if not planned:
return submitted # fallback
# override if submitted > planned
if submitted > planned:
return planned
return submitted
df["Completed Week"] = df.apply(corrected_completed_week, axis=1)
df.drop(columns=["raw_completed_week"], inplace=True)
frames.append(df)
if not frames:
return pd.DataFrame()
return pd.concat(frames, ignore_index=True)
# -------------------------
# Initial Load
# -------------------------
df = load_dataframe()
# -----------------------------------------------------
# Initial DF load
# -----------------------------------------------------
df = build_master_df(PRODUCT_TYPES)
# -------------------------
# -----------------------------------------------------
# Dash App
# -------------------------
app = Dash(__name__)
server = app.server # required for Render
# -----------------------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
# -----------------------------------------------------
# Layout
# -----------------------------------------------------
app.layout = html.Div([
html.H1("Line Items by Expected Commencement Date", style={"textAlign": "center"}),
html.H1("Planned vs Completed", style={"textAlign": "center"}),
dcc.Dropdown(
id="date-filter",
options=[{"label": "All Dates", "value": "All Dates"}] +
[{"label": d, "value": d} for d in sorted(df["Expected Commencement Date"].dropna().unique())],
value="All Dates",
[{"label": d, "value": d}
for d in sorted(df["Planned Week"].dropna().unique())],
value=current_week_start() if current_week_start() in df["Planned Week"].unique() else "All Dates",
clearable=False,
style={"width": "300px", "margin": "20px auto"}
),
@ -112,60 +131,173 @@ app.layout = html.Div([
style={"margin": "10px", "padding": "10px 20px"}
),
html.H2("Planned", style={"marginTop": "30px"}),
dash_table.DataTable(
id="lineitem-table",
id="planned-table",
columns=[
{"name": "Line Item Name", "id": "Line Item Name"},
{"name": "Product Type", "id": "Product Type"},
{"name": "Total Deals", "id": "Total Deals"},
{"name": "Total Amount (£)", "id": "Total Amount (£)"},
],
page_size=20,
sort_action="native",
style_table={"overflowX": "scroll"},
style_header={"backgroundColor": "#e4e4e4", "fontWeight": "bold"},
style_cell={"padding": "8px", "textAlign": "left"},
)
row_selectable="single",
cell_selectable=False,
style_cell_conditional=[
{"if": {"column_id": "Product Type"}, "textAlign": "left"},
]
),
html.H2("Actual", style={"marginTop": "40px"}),
dash_table.DataTable(
id="completed-table",
columns=[
{"name": "Product Type", "id": "Product Type"},
{"name": "Total Deals", "id": "Total Deals"},
{"name": "Total Amount (£)", "id": "Total Amount (£)"},
],
page_size=20,
sort_action="native",
row_selectable="single",
cell_selectable=False,
style_cell_conditional=[
{"if": {"column_id": "Product Type"}, "textAlign": "left"},
]
),
dbc.Modal(
[
dbc.ModalHeader("HubSpot IDs"),
dbc.ModalBody(id="modal-body"),
dbc.ModalFooter(
dbc.Button("Close", id="close-modal", className="ms-auto")
),
],
id="hubspot-modal",
size="lg",
is_open=False,
),
])
# -------------------------
# Callback (filter + refresh)
# -------------------------
# -----------------------------------------------------
# Callback: Table Updates
# -----------------------------------------------------
@app.callback(
Output("lineitem-table", "data"),
Output("planned-table", "data"),
Output("completed-table", "data"),
Input("date-filter", "value"),
Input("refresh-btn", "n_clicks")
)
def update_table(selected_date, n_clicks):
def update_tables(selected_date, n_clicks):
global df
# Refresh DF from AWS when button clicked
if n_clicks > 0:
df = load_dataframe()
df = build_master_df(PRODUCT_TYPES)
# Filter data
# Filter
if selected_date == "All Dates":
dff = df.copy()
planned_df = df.copy()
completed_df = df.copy()
else:
dff = df[df["Expected Commencement Date"] == selected_date]
planned_df = df[df["Planned Week"] == selected_date].copy()
completed_df = df[df["Completed Week"] == selected_date].copy()
grouped = (
dff.groupby("Line Item Name", dropna=False)
.agg(
Total_Deals=("Deal ID", "nunique"),
Total_Amount=("Line Item Amount", "sum")
)
.reset_index()
# ------------- PLANNED GROUPING -------------
planned_grouped = (
planned_df.groupby(["Planned Week", "product_type"], dropna=False)
.agg(
Total_Deals=("hubspot_id", "nunique"),
Total_Amount=("price", "sum"),
HubSpot_IDs=("hubspot_id",
lambda x: ", ".join(sorted(set(x.astype(str)))))
)
.reset_index()
)
grouped = grouped.rename(columns={
planned_grouped.rename(columns={
"product_type": "Product Type",
"Total_Deals": "Total Deals",
"Total_Amount": "Total Amount (£)"
})
"Total_Amount": "Total Amount (£)",
"HubSpot_IDs": "HubSpot IDs",
}, inplace=True)
grouped = grouped.sort_values("Total Amount (£)", ascending=False)
return grouped.to_dict("records")
planned_records = [] if planned_grouped.empty else \
planned_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
# ------------- COMPLETED GROUPING -------------
completed_df = completed_df[completed_df["Completed Week"].notna()]
completed_grouped = (
completed_df.groupby(["Completed Week", "product_type"], dropna=False)
.agg(
Total_Deals=("hubspot_id", "nunique"),
Total_Amount=("price", "sum"),
HubSpot_IDs=("hubspot_id",
lambda x: ", ".join(sorted(set(x.astype(str)))))
)
.reset_index()
)
# ❗ You forgot these renames — THIS caused the crash.
completed_grouped.rename(columns={
"product_type": "Product Type",
"Total_Deals": "Total Deals",
"Total_Amount": "Total Amount (£)",
"HubSpot_IDs": "HubSpot IDs",
}, inplace=True)
completed_records = [] if completed_grouped.empty else \
completed_grouped.sort_values("Total Amount (£)", ascending=False).to_dict("records")
return planned_records, completed_records
def id_to_link(deal_id):
url = f"https://app.hubspot.com/contacts/145275138/record/0-3/{deal_id}"
return html.Li(
html.A(deal_id, href=url, target="_blank", style={"textDecoration": "none"})
)
# -----------------------------------------------------
# Callback: Modal for Planned + Completed tables
# -----------------------------------------------------
@app.callback(
Output("hubspot-modal", "is_open"),
Output("modal-body", "children"),
Output("planned-table", "selected_rows"),
Output("completed-table", "selected_rows"),
Input("planned-table", "selected_rows"),
Input("completed-table", "selected_rows"),
Input("close-modal", "n_clicks"),
State("planned-table", "data"),
State("completed-table", "data"),
State("hubspot-modal", "is_open")
)
def open_modal(planned_rows, completed_rows, close_click,
planned_data, completed_data, is_open):
if close_click:
return False, "", [], []
# Planned table
if planned_rows:
row = planned_data[planned_rows[0]]
ids = row["HubSpot IDs"].split(", ")
links = [id_to_link(i) for i in ids]
return True, html.Ul(links), [], []
# Completed table
if completed_rows:
row = completed_data[completed_rows[0]]
ids = row["HubSpot IDs"].split(", ")
links = [id_to_link(i) for i in ids]
return True, html.Ul(links), [], []
return False, "", [], []
if __name__ == "__main__":
app.run(debug=True)

View file

@ -5,6 +5,4 @@ s3 = FileManager()
key, path, data = s3.download_and_read_latest()
hubspot_data = jsonReader(data)
df = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4")
df
df = hubspot_data.generate_df_via_product_type("Empty Cavity - ECO4")

View file

@ -3,6 +3,7 @@ from collections import defaultdict
import pandas as pd
from enum import Enum
from datetime import datetime
class ProductType(Enum):
EMPTY_CAVITY_ECO_4 = "Empty Cavity - ECO4"
@ -16,6 +17,14 @@ class jsonReader:
self.line_item_names = list
self.initial_setup()
def to_date_only(self, timestamp: str) -> str:
if timestamp is None:
return None
if timestamp.endswith("Z"):
timestamp = timestamp[:-1]
dt = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S")
return dt.strftime("%Y-%m-%d")
def initial_setup(self):
"""
Build a dictionary mapping line item names -> list of deals
@ -38,28 +47,46 @@ class jsonReader:
def generate_df_via_product_type(self, product_type):
rows = []
for deals in self.deals_by_line_item[product_type]:
row = self._return_df_from_deal_info(deals, product_type)
rows.append(row)
break
row = self._return_df_from_deal_info(deals, product_type)
rows.append(row)
if rows:
return pd.concat(rows, ignore_index=True)
else:
return
def _return_df_from_deal_info(self, deal, product_type):
data = {
"submission_date": deal["deal_properties"].get("submission_date", None),
"expected_commencement_date": deal["deal_properties"].get("expected_commencement_date", None),
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
}
return pd.DataFrame([data])
rows = []
if "ECO" in product_type or "EPC" in product_type:
if deal["attempts"]:
# Multiple attempts => multiple rows
for attempt in deal["attempts"]:
rows.append({
"submission_date": self.to_date_only(deal["deal_properties"].get("submission_date")),
"hubspot_id": deal["deal_properties"]["deal_id"],
"expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"),
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
})
else:
# Single row case
rows.append({
"submission_date": self.to_date_only(deal["deal_properties"].get("last_submission_date")),
"expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"),
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
})
# Return a DataFrame or None
return pd.DataFrame(rows) if rows else None
def find_all_job_with_line_item(self):
for i, deal in enumerate(self.raw_data):