From 16d7fe3318d4cf12bafe0ba24540de969c3af62e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 21 Nov 2025 11:44:50 +0000 Subject: [PATCH] added code to do eco work --- backend/src/dashboard/scripts/quick_one.py | 35 +++---------- .../src/dashboard/services/file_manager.py | 39 +++++++++++--- .../services/hubspot_client_async.py | 5 +- backend/src/dashboard/services/json_reader.py | 52 +++++++++++++++++++ 4 files changed, 95 insertions(+), 36 deletions(-) create mode 100644 backend/src/dashboard/services/json_reader.py diff --git a/backend/src/dashboard/scripts/quick_one.py b/backend/src/dashboard/scripts/quick_one.py index ff63bb1..0c8a18c 100644 --- a/backend/src/dashboard/scripts/quick_one.py +++ b/backend/src/dashboard/scripts/quick_one.py @@ -1,32 +1,13 @@ -# raise RuntimeError("this should never run in production") -# Never run this in a workflow. -# It is only for debugging/local development - - -import asyncio -import json -from tqdm import tqdm -from dashboard.services.hubspot_client import Pipeline -from dashboard.services.hubspot_client_async import HubSpotClientAsync from dashboard.services.file_manager import FileManager -from datetime import datetime +from dashboard.services.json_reader import jsonReader +s3 = FileManager() + +key, path, data = s3.download_and_read_latest() +hubspot_data = jsonReader(data) +counter, deals = hubspot_data.generate_solar_numbers_df() -async def main(): - hubspot = HubSpotClientAsync() - # https://app-eu1.hubspot.com/contacts/145275138/record/0-3/370193175794 - deal_id = "263490768079" - tasks = [asyncio.create_task(hubspot.from_deal_get_info(deal_id))] - results = [] +counter +deals - - for task in asyncio.as_completed(tasks): - result = await task - results.append(result) - return results - - -if __name__ == "__main__": - result = await main() - result[0]["attempts"] diff --git a/backend/src/dashboard/services/file_manager.py b/backend/src/dashboard/services/file_manager.py index 3c949aa..34884ec 100644 --- a/backend/src/dashboard/services/file_manager.py +++ b/backend/src/dashboard/services/file_manager.py @@ -1,6 +1,8 @@ import os import requests import boto3 +import json +import re class FileManager: def __init__(self, download_dir="downloads", aws_region="us-east-1"): @@ -37,10 +39,35 @@ class FileManager: self.s3.upload_file(file_path, bucket, object_name) return f"s3://{bucket}/{object_name}" + def get_latest_s3_file(self, bucket: str, prefix: str = "") -> str: + response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix) -# Example usage: -# fm = FileManager() -# local_path = fm.download_file("https://example.com/file.txt") -# print("Saved to:", local_path) -# s3_path = fm.upload_to_s3(local_path, "my-bucket") -# print("Uploaded to:", s3_path) + if "Contents" not in response: + raise FileNotFoundError("No files found.") + + timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$") + files = [] + + for obj in response["Contents"]: + key = obj["Key"] + match = timestamp_regex.match(key) + if match: + files.append((match.group(1), key)) + + if not files: + raise FileNotFoundError("No timestamped files found.") + + return sorted(files, key=lambda x: x[0], reverse=True)[0][1] + + def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"): + os.makedirs(self.download_dir, exist_ok=True) + + latest_key = self.get_latest_s3_file(bucket, prefix) + local_path = os.path.join(self.download_dir, latest_key.split("/")[-1]) + + self.s3.download_file(bucket, latest_key, local_path) + + with open(local_path, "r") as f: + data = json.load(f) + + return latest_key, local_path, data \ No newline at end of file diff --git a/backend/src/dashboard/services/hubspot_client_async.py b/backend/src/dashboard/services/hubspot_client_async.py index cd7d9ba..19bf826 100644 --- a/backend/src/dashboard/services/hubspot_client_async.py +++ b/backend/src/dashboard/services/hubspot_client_async.py @@ -269,11 +269,10 @@ class HubSpotClientAsync: properties=[ "hs_appointment_name", "assigned_surveyor", - "outcome_from_deal", "outcome__cloned_", "outcome_surveyor", - "ecd_from_deal", - "submission_date" + "submission_date", + "expected_commencement_date", ] ) diff --git a/backend/src/dashboard/services/json_reader.py b/backend/src/dashboard/services/json_reader.py new file mode 100644 index 0000000..fa75404 --- /dev/null +++ b/backend/src/dashboard/services/json_reader.py @@ -0,0 +1,52 @@ +from pprint import pprint +from collections import defaultdict + + + +class jsonReader: + def __init__(self, json_data): + self.raw_data = json_data + self.deals_by_line_item = defaultdict(list) + self.line_item_names = list + self.initial_setup() + + def initial_setup(self): + """ + Build a dictionary mapping line item names -> list of deals + """ + for deal in self.raw_data: + line_items = deal.get("line_items", []) + + if not line_items: + # Store empty deals under a special key + self.deals_by_line_item["__empty__"].append(deal) + continue + + # Add this deal under each line item name + for item in line_items: + name = item.get("name") + if name: + self.deals_by_line_item[name].append(deal) + self.line_item_names = list(self.deals_by_line_item.keys()) + + def generate_empty_cavity_numbers_df(self): + count=0 + for deals in self.deals_by_line_item["Empty Cavity - ECO4"]: + count +=1 + if deals['attempts'] != []: + return count, deals + return count, deals + + def _return_df_from_deal_info(self, deal): + + pass + + def find_all_job_with_line_item(self): + for i, deal in enumerate(self.raw_data): + if len(deal["line_items"])>0: + print(deal) + print(i) + break + + def print_raw_data(self): + pprint(self.raw_data)