Merge pull request #14 from Hestia-Homes/feature/learning

added scripts to help me in the future
This commit is contained in:
Jun-te Kim 2025-11-30 15:15:07 +00:00 committed by GitHub
commit 8bbf41cb7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 127 additions and 23 deletions

View file

@ -0,0 +1,24 @@
import asyncio
import json
from tqdm import tqdm
from dashboard.services.hubspot_client import Pipeline, Companies
from dashboard.services.hubspot_client_async import HubSpotClientAsync
from dashboard.services.file_manager import FileManager
from datetime import datetime
OUTPUT_FILE = "hubspot_deals.json"
async def main():
hubspot = HubSpotClientAsync()
# Fetch all deals in the pipeline
deals = await hubspot.get_dea(Companies.HOMEGROUP.value)
return deals
if __name__ == "__main__":
deals = await main()
print(deals)

View file

@ -12,18 +12,7 @@ OUTPUT_FILE = "hubspot_deals.json"
async def main():
hubspot = HubSpotClientAsync()
# Fetch all deal IDs (but we will take only one)
deals = await hubspot.get_deal_ids_by_pipeline(
Pipeline.OPERATIONS_SOCIAL_HOUSING.value
)
if not deals:
print("No deals found.")
return
# Only take ONE deal
# Just do one
deal_id = deals[0]
deal_id = "234993273058"
print(f"Fetching only deal: {deal_id}")
try:
@ -40,14 +29,14 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())
await main()
fm = FileManager()
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
s3_filename = f"hubspot_deals_{timestamp}.json"
# s3_filename = f"hubspot_deals_{timestamp}.json"
fm.upload_to_s3(
OUTPUT_FILE,
bucket="retrofit-data-dev",
object_name=f"hubspot_insight/{s3_filename}"
)
# fm.upload_to_s3(
# OUTPUT_FILE,
# bucket="retrofit-data-dev",
# object_name=f"hubspot_insight/{s3_filename}"
# )

View file

@ -0,0 +1,46 @@
from dash import Dash, html, dcc, dash_table, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
from datetime import timedelta
from dashboard.services.file_manager import FileManager
from dashboard.services.json_reader import jsonReader
from datetime import datetime
# -----------------------------------------------------
# Product types
# -----------------------------------------------------
PRODUCT_TYPES = [
"ECO4 Retrofit Coordination",
]
one_deal_to_test = "254427203793"
s3 = FileManager()
key, path, data = s3.download_and_read_latest()
json_reader = jsonReader(data)
df = json_reader.generate_df_via_product_type(PRODUCT_TYPES[0])
json = json_reader.deals_by_line_item[PRODUCT_TYPES[0]]
for deals in json:
if deals["deal_properties"]["deal_id"] == one_deal_to_test:
json = deals
break
else:
RuntimeError("Failed to find one")
json # json from s3 with the deal we are testing for
returned_value = json_reader._return_df_from_deal_info(json, product_type=PRODUCT_TYPES[0])
# This test will only be true if 2 borden close test, submission date, expected commnecemnt date hasn't changed!!!
assert returned_value.size == 10
assert returned_value.iloc[0]["submission_date"] == '2025-10-20'
assert returned_value.iloc[0]["expected_commencement_date"] == '2025-07-14'
assert returned_value.iloc[1]["submission_date"] == None
assert returned_value.iloc[1]["expected_commencement_date"] == '2025-07-07'

View file

@ -72,10 +72,43 @@ class jsonReader:
)
})
else:
# Single row case
def historical_ecd_value_processes(timestamp):
if timestamp is None or timestamp == '':
return None
dt = datetime.strptime(timestamp, "%Y-%m-%d")
return dt.strftime("%Y-%m-%d")
history = deal["deal_properties"]["expected_commencement_history"]
# ---- SORT HISTORY: latest first ----
history_sorted = sorted(
history,
key=lambda h: datetime.strptime(h["timestamp"].split("T")[0], "%Y-%m-%d"),
reverse=True
)
# Extract latest expected commencement date
latest = history_sorted[0]
latest_ecd = historical_ecd_value_processes(latest["value"]) # returns YYYY-MM-DD or None
# Convert submission date
raw_submission_date = deal["deal_properties"].get("last_submission_date")
submission_date = self.to_date_only(raw_submission_date) if raw_submission_date else None
# Convert both to datetime for comparison
if submission_date and latest_ecd:
dt_sub = datetime.strptime(submission_date, "%Y-%m-%d")
dt_ecd = datetime.strptime(latest_ecd, "%Y-%m-%d")
# Only keep submission date if submission_date > latest ECD
if dt_sub <= dt_ecd:
submission_date = None
else:
submission_date = None
# 1⃣ Add latest expected commencement date WITH conditional submission date
rows.append({
"submission_date": self.to_date_only(deal["deal_properties"].get("last_submission_date")),
"expected_commencement_date": deal["deal_properties"].get("expected_commencement_date"),
"submission_date": submission_date,
"expected_commencement_date": latest_ecd,
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
@ -83,7 +116,19 @@ class jsonReader:
None
)
})
# 2⃣ Add the remaining history WITHOUT submission date
for attempt in history_sorted[1:]:
rows.append({
"submission_date": None,
"expected_commencement_date": historical_ecd_value_processes(attempt["value"]),
"hubspot_id": deal["deal_properties"]["deal_id"],
"work_type": product_type,
"price": next(
(item["price"] for item in deal["line_items"] if product_type in item["name"]),
None
)
})
# Return a DataFrame or None
return pd.DataFrame(rows) if rows else None