added code to do eco work

This commit is contained in:
Jun-te Kim 2025-11-21 11:44:50 +00:00
parent 9529d4814d
commit 16d7fe3318
4 changed files with 95 additions and 36 deletions

View file

@ -1,32 +1,13 @@
# raise RuntimeError("this should never run in production")
# Never run this in a workflow.
# It is only for debugging/local development
import asyncio
import json
from tqdm import tqdm
from dashboard.services.hubspot_client import Pipeline
from dashboard.services.hubspot_client_async import HubSpotClientAsync
from dashboard.services.file_manager import FileManager
from datetime import datetime
from dashboard.services.json_reader import jsonReader
s3 = FileManager()
key, path, data = s3.download_and_read_latest()
hubspot_data = jsonReader(data)
counter, deals = hubspot_data.generate_solar_numbers_df()
async def main():
hubspot = HubSpotClientAsync()
# https://app-eu1.hubspot.com/contacts/145275138/record/0-3/370193175794
deal_id = "263490768079"
tasks = [asyncio.create_task(hubspot.from_deal_get_info(deal_id))]
results = []
counter
deals
for task in asyncio.as_completed(tasks):
result = await task
results.append(result)
return results
if __name__ == "__main__":
result = await main()
result[0]["attempts"]

View file

@ -1,6 +1,8 @@
import os
import requests
import boto3
import json
import re
class FileManager:
def __init__(self, download_dir="downloads", aws_region="us-east-1"):
@ -37,10 +39,35 @@ class FileManager:
self.s3.upload_file(file_path, bucket, object_name)
return f"s3://{bucket}/{object_name}"
def get_latest_s3_file(self, bucket: str, prefix: str = "") -> str:
response = self.s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
# Example usage:
# fm = FileManager()
# local_path = fm.download_file("https://example.com/file.txt")
# print("Saved to:", local_path)
# s3_path = fm.upload_to_s3(local_path, "my-bucket")
# print("Uploaded to:", s3_path)
if "Contents" not in response:
raise FileNotFoundError("No files found.")
timestamp_regex = re.compile(r".*_(\d{8}_\d{6})\.json$")
files = []
for obj in response["Contents"]:
key = obj["Key"]
match = timestamp_regex.match(key)
if match:
files.append((match.group(1), key))
if not files:
raise FileNotFoundError("No timestamped files found.")
return sorted(files, key=lambda x: x[0], reverse=True)[0][1]
def download_and_read_latest(self, bucket: str="retrofit-data-dev", prefix: str = "hubspot_insight/"):
os.makedirs(self.download_dir, exist_ok=True)
latest_key = self.get_latest_s3_file(bucket, prefix)
local_path = os.path.join(self.download_dir, latest_key.split("/")[-1])
self.s3.download_file(bucket, latest_key, local_path)
with open(local_path, "r") as f:
data = json.load(f)
return latest_key, local_path, data

View file

@ -269,11 +269,10 @@ class HubSpotClientAsync:
properties=[
"hs_appointment_name",
"assigned_surveyor",
"outcome_from_deal",
"outcome__cloned_",
"outcome_surveyor",
"ecd_from_deal",
"submission_date"
"submission_date",
"expected_commencement_date",
]
)

View file

@ -0,0 +1,52 @@
from pprint import pprint
from collections import defaultdict
class jsonReader:
def __init__(self, json_data):
self.raw_data = json_data
self.deals_by_line_item = defaultdict(list)
self.line_item_names = list
self.initial_setup()
def initial_setup(self):
"""
Build a dictionary mapping line item names -> list of deals
"""
for deal in self.raw_data:
line_items = deal.get("line_items", [])
if not line_items:
# Store empty deals under a special key
self.deals_by_line_item["__empty__"].append(deal)
continue
# Add this deal under each line item name
for item in line_items:
name = item.get("name")
if name:
self.deals_by_line_item[name].append(deal)
self.line_item_names = list(self.deals_by_line_item.keys())
def generate_empty_cavity_numbers_df(self):
count=0
for deals in self.deals_by_line_item["Empty Cavity - ECO4"]:
count +=1
if deals['attempts'] != []:
return count, deals
return count, deals
def _return_df_from_deal_info(self, deal):
pass
def find_all_job_with_line_item(self):
for i, deal in enumerate(self.raw_data):
if len(deal["line_items"])>0:
print(deal)
print(i)
break
def print_raw_data(self):
pprint(self.raw_data)