From 16d7fe3318d4cf12bafe0ba24540de969c3af62e Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte.kim@mealcraft.com>
Date: Fri, 21 Nov 2025 11:44:50 +0000
Subject: [PATCH] added code to do eco work

---
 backend/src/dashboard/scripts/quick_one.py    | 35 +++----------
 .../src/dashboard/services/file_manager.py    | 39 +++++++++++---
 .../services/hubspot_client_async.py          |  5 +-
 backend/src/dashboard/services/json_reader.py | 52 +++++++++++++++++++
 4 files changed, 95 insertions(+), 36 deletions(-)
 create mode 100644 backend/src/dashboard/services/json_reader.py

diff --git a/backend/src/dashboard/scripts/quick_one.py b/backend/src/dashboard/scripts/quick_one.py
index ff63bb1..0c8a18c 100644
--- a/backend/src/dashboard/scripts/quick_one.py
+++ b/backend/src/dashboard/scripts/quick_one.py
@@ -1,32 +1,13 @@
-# raise RuntimeError("this should never run in production")
-# Never run this in a workflow.
-# It is only for debugging/local development
-
-
-import asyncio
-import json
-from tqdm import tqdm
-from dashboard.services.hubspot_client import Pipeline
-from dashboard.services.hubspot_client_async import HubSpotClientAsync
 from dashboard.services.file_manager import FileManager
-from datetime import datetime
+from dashboard.services.json_reader import jsonReader
+s3 = FileManager()
+
+key, path, data = s3.download_and_read_latest()
+hubspot_data = jsonReader(data)
 
 
+counter, deals = hubspot_data.generate_solar_numbers_df()
 
-async def main():
-    hubspot = HubSpotClientAsync()
-    # https://app-eu1.hubspot.com/contacts/145275138/record/0-3/370193175794
-    deal_id = "263490768079"
-    tasks = [asyncio.create_task(hubspot.from_deal_get_info(deal_id))]
-    results = []
+counter
+deals
 
-
-    for task in asyncio.as_completed(tasks):
-        result = await task
-        results.append(result)
-    return results
-
-
-if __name__ == "__main__":
-    result = await main()
-    result[0]["attempts"]
diff --git a/backend/src/dashboard/services/file_manager.py b/backend/src/dashboard/services/file_manager.py
index 3c949aa..34884ec 100644
--- a/backend/src/dashboard/services/file_manager.py
+++ b/backend/src/dashboard/services/file_manager.py
@@ -1,6 +1,8 @@
 import os
 import requests
 import boto3
+import json
+import re
 
 class FileManager:
     def __init__(self, download_dir="downloads", aws_region="us-east-1"):
@@ -37,10 +39,35 @@ class FileManager:
         self.s3.upload_file(file_path, bucket, object_name)
         return f"s3://{bucket}/{object_name}"
 
+    def get_latest_s3_file(self, bucket: str, prefix: str = "") -> str:
+        response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
 
-# Example usage:
-# fm = FileManager()
-# local_path = fm.download_file("https://example.com/file.txt")
-# print("Saved to:", local_path)
-# s3_path = fm.upload_to_s3(local_path, "my-bucket")
-# print("Uploaded to:", s3_path)
+        if "Contents" not in response:
+            raise FileNotFoundError("No files found.")
+
+        timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$")
+        files = []
+
+        for obj in response["Contents"]:
+            key = obj["Key"]
+            match = timestamp_regex.match(key)
+            if match:
+                files.append((match.group(1), key))
+
+        if not files:
+            raise FileNotFoundError("No timestamped files found.")
+
+        return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
+
+    def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"):
+        os.makedirs(self.download_dir, exist_ok=True)
+
+        latest_key = self.get_latest_s3_file(bucket, prefix)
+        local_path = os.path.join(self.download_dir, latest_key.split("/")[-1])
+
+        self.s3.download_file(bucket, latest_key, local_path)
+
+        with open(local_path, "r") as f:
+            data = json.load(f)
+
+        return latest_key, local_path, data
\ No newline at end of file
diff --git a/backend/src/dashboard/services/hubspot_client_async.py b/backend/src/dashboard/services/hubspot_client_async.py
index cd7d9ba..19bf826 100644
--- a/backend/src/dashboard/services/hubspot_client_async.py
+++ b/backend/src/dashboard/services/hubspot_client_async.py
@@ -269,11 +269,10 @@ class HubSpotClientAsync:
             properties=[
                 "hs_appointment_name",
                 "assigned_surveyor",
-                "outcome_from_deal",
                 "outcome__cloned_",
                 "outcome_surveyor",
-                "ecd_from_deal",
-                "submission_date"
+                "submission_date",
+                "expected_commencement_date",
             ]
         )
         
diff --git a/backend/src/dashboard/services/json_reader.py b/backend/src/dashboard/services/json_reader.py
new file mode 100644
index 0000000..fa75404
--- /dev/null
+++ b/backend/src/dashboard/services/json_reader.py
@@ -0,0 +1,52 @@
+from pprint import pprint
+from collections import defaultdict
+
+
+
+class jsonReader:
+    def __init__(self, json_data):
+        self.raw_data = json_data
+        self.deals_by_line_item = defaultdict(list)
+        self.line_item_names = list
+        self.initial_setup()
+
+    def initial_setup(self):
+        """
+        Build a dictionary mapping line item names -> list of deals
+        """
+        for deal in self.raw_data:
+            line_items = deal.get("line_items", [])
+
+            if not line_items:
+                # Store empty deals under a special key
+                self.deals_by_line_item["__empty__"].append(deal)
+                continue
+
+            # Add this deal under each line item name
+            for item in line_items:
+                name = item.get("name")
+                if name:
+                    self.deals_by_line_item[name].append(deal)
+        self.line_item_names = list(self.deals_by_line_item.keys())
+
+    def generate_empty_cavity_numbers_df(self):
+        count=0
+        for deals in self.deals_by_line_item["Empty Cavity - ECO4"]:
+            count +=1 
+            if deals['attempts'] != []:
+                return count, deals
+        return count, deals
+    
+    def _return_df_from_deal_info(self, deal):
+
+        pass
+
+    def find_all_job_with_line_item(self):
+        for i, deal in enumerate(self.raw_data):
+            if len(deal["line_items"])>0:
+                print(deal)
+                print(i)
+                break
+
+    def print_raw_data(self):
+        pprint(self.raw_data)