From d4d5531ddc9c7c1e29137742f5513112bc2cef46 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 22 Jul 2025 17:18:21 +0100 Subject: [PATCH] months end --- etl/month_end_automation.py | 65 +++++++++++++++++++++- etl/osmosis_complaince_address_to_files.py | 60 ++++++++++---------- 2 files changed, 94 insertions(+), 31 deletions(-) diff --git a/etl/month_end_automation.py b/etl/month_end_automation.py index e75154b..d396551 100644 --- a/etl/month_end_automation.py +++ b/etl/month_end_automation.py @@ -1 +1,64 @@ -print("hello world") \ No newline at end of file +from tqdm import tqdm +from monday import MondayClient +from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids +from pprint import pprint +import pandas as pd +import json + +monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY" +monday = MondayClient(monday_key) +board_ids = ["3900434153"] + + +for board in tqdm(board_ids): + board_data = monday.boards.fetch_boards_by_id(board) + columns = board_data["data"]["boards"][0]["columns"] + col_id_map = {col["title"].lower(): col["id"] for col in columns} + reversed_col_id_map = {v: k for k, v in col_id_map.items()} + + + items = get_all_items(board, monday) + + all_records = [] + for row in tqdm(items): + data = {} + data.update({"address": row['name']}) + for col in row.get("column_values", []): + if col.get("id") in reversed_col_id_map: + if col.get("type") == "file": + value = col.get("value") + no_of_files = 0 + + if value: + value = json.loads(col["value"]) + no_of_files = len(value.get('files', [])) + data.update({reversed_col_id_map[col.get("id")]: no_of_files}) + else: + + data.update({ + reversed_col_id_map[col.get("id")]: col.get("text") + }) + all_records.append(data) + +# Convert to DataFrame +df = pd.DataFrame(all_records) + +# RA +filtered_df = df[ + df["ra"].str.lower().isin(["completed rdsap 10", "completed rdsap 9.9"]) +].copy() + +# ATT +filtered_df = df[ + df["att"].str.lower().isin(["completed"]) +].copy() + +# V1 Coordination +filtered_df = df[ + df["v1 coordination status"].str.lower().isin(["rc complete"]) +].copy() + +# V2 Coordination +_ = df[df["v2 invoiced"].fillna('').str.lower().isin(['to be invoiced'])] +filtered_df = _[_["v2 dc/ima/pas"] > 0] + diff --git a/etl/osmosis_complaince_address_to_files.py b/etl/osmosis_complaince_address_to_files.py index 0ab6ed9..c48e4eb 100644 --- a/etl/osmosis_complaince_address_to_files.py +++ b/etl/osmosis_complaince_address_to_files.py @@ -91,7 +91,7 @@ def get_all_items(board_id, monday): limit = 25 # Adjust the limit based on how many items you want per request all_items = [] # List to store all fetched items cursor = None # Start without a cursor for the first page - + print(f"Connecting to Monday API and retrieving data for board {board_id}") # Loop through pages while True: # Fetch items for the current page @@ -116,8 +116,7 @@ def get_all_items(board_id, monday): # If there's no cursor, we've reached the last page if not cursor: break - print(f"cursor {cursor}") - print(f"len all_itemms {len(all_items)}") + print("Loading...") return all_items def upload_to_sharepoint(to_upload, master_folder_name): @@ -128,36 +127,37 @@ def upload_to_sharepoint(to_upload, master_folder_name): print(f"Uploading {file_name} to sharepoint") osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_name) -# Step 1: Fetch column IDs -board_data = monday.boards.fetch_boards_by_id(board_id) -columns = board_data["data"]["boards"][0]["columns"] -col_id_map = {col["title"].lower(): col["id"] for col in columns} +if __name__ == "__main__": + # Step 1: Fetch column IDs + board_data = monday.boards.fetch_boards_by_id(board_id) + columns = board_data["data"]["boards"][0]["columns"] + col_id_map = {col["title"].lower(): col["id"] for col in columns} -name_id = col_id_map.get("name") # Replace with actual title if different -files_id = col_id_map.get("file(s)") # Replace with actual title if different + name_id = col_id_map.get("name") # Replace with actual title if different + files_id = col_id_map.get("file(s)") # Replace with actual title if different -if not name_id or not files_id: - raise Exception("Could not find 'name' or 'file(s)' columns") + if not name_id or not files_id: + raise Exception("Could not find 'name' or 'file(s)' columns") -items = get_all_items(board_id, monday) -for i,item in enumerate(tqdm(items)): - if i>329: - item_name = item["name"] - item_name = sanitize_name(item_name, ignore_dot=True) - print(f"Item name is {item_name}") - asset_ids = extract_asset_ids(item, files_id) + items = get_all_items(board_id, monday) + for i,item in enumerate(tqdm(items)): + if i>329: + item_name = item["name"] + item_name = sanitize_name(item_name, ignore_dot=True) + print(f"Item name is {item_name}") + asset_ids = extract_asset_ids(item, files_id) - to_upload = [] - for asset_id in asset_ids: - try: - public_url, file_name = get_public_url(asset_id) - print(f"Downloading {file_name}") - file_path = download_file_from_public_url(public_url, file_name) - to_upload.append(file_path) - except Exception as e: - print(f"Failed to download/upload asset {asset_id}: {e}") + to_upload = [] + for asset_id in asset_ids: + try: + public_url, file_name = get_public_url(asset_id) + print(f"Downloading {file_name}") + file_path = download_file_from_public_url(public_url, file_name) + to_upload.append(file_path) + except Exception as e: + print(f"Failed to download/upload asset {asset_id}: {e}") - if to_upload: - upload_to_sharepoint(to_upload, item_name) + if to_upload: + upload_to_sharepoint(to_upload, item_name) -# Liv green # Cocuun # Wates \ No newline at end of file + # Liv green # Cocuun # Wates \ No newline at end of file