From d4d5531ddc9c7c1e29137742f5513112bc2cef46 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte@domna.homes>
Date: Tue, 22 Jul 2025 17:18:21 +0100
Subject: [PATCH] months end

---
 etl/month_end_automation.py                | 65 +++++++++++++++++++++-
 etl/osmosis_complaince_address_to_files.py | 60 ++++++++++----------
 2 files changed, 94 insertions(+), 31 deletions(-)

diff --git a/etl/month_end_automation.py b/etl/month_end_automation.py
index e75154b..d396551 100644
--- a/etl/month_end_automation.py
+++ b/etl/month_end_automation.py
@@ -1 +1,64 @@
-print("hello world")
\ No newline at end of file
+from tqdm import tqdm
+from monday import MondayClient
+from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids
+from pprint import pprint
+import pandas as pd
+import json
+
+monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
+monday = MondayClient(monday_key)
+board_ids = ["3900434153"]
+
+
+for board in tqdm(board_ids):
+    board_data = monday.boards.fetch_boards_by_id(board)
+    columns = board_data["data"]["boards"][0]["columns"]
+    col_id_map = {col["title"].lower(): col["id"] for col in columns}
+    reversed_col_id_map = {v: k for k, v in col_id_map.items()}
+
+    
+    items = get_all_items(board, monday)
+
+    all_records = []
+    for row in tqdm(items):
+        data = {}
+        data.update({"address": row['name']})
+        for col in row.get("column_values", []):
+            if col.get("id") in reversed_col_id_map:
+                if col.get("type") == "file":
+                    value = col.get("value")
+                    no_of_files = 0
+
+                    if value:
+                        value = json.loads(col["value"])
+                        no_of_files = len(value.get('files', []))
+                    data.update({reversed_col_id_map[col.get("id")]: no_of_files})
+                else:
+                    
+                    data.update({
+                        reversed_col_id_map[col.get("id")]: col.get("text")
+                    })
+        all_records.append(data)
+
+# Convert to DataFrame
+df = pd.DataFrame(all_records)
+
+# RA
+filtered_df = df[
+    df["ra"].str.lower().isin(["completed rdsap 10", "completed rdsap 9.9"])
+].copy()
+
+# ATT
+filtered_df = df[
+    df["att"].str.lower().isin(["completed"])
+].copy()
+
+# V1 Coordination
+filtered_df = df[
+    df["v1 coordination status"].str.lower().isin(["rc complete"])
+].copy()
+
+# V2 Coordination
+_ = df[df["v2 invoiced"].fillna('').str.lower().isin(['to be invoiced'])]
+filtered_df = _[_["v2 dc/ima/pas"] > 0]
+
diff --git a/etl/osmosis_complaince_address_to_files.py b/etl/osmosis_complaince_address_to_files.py
index 0ab6ed9..c48e4eb 100644
--- a/etl/osmosis_complaince_address_to_files.py
+++ b/etl/osmosis_complaince_address_to_files.py
@@ -91,7 +91,7 @@ def get_all_items(board_id, monday):
     limit = 25  # Adjust the limit based on how many items you want per request
     all_items = []  # List to store all fetched items
     cursor = None  # Start without a cursor for the first page
-
+    print(f"Connecting to Monday API and retrieving data for board {board_id}")
     # Loop through pages
     while True:
         # Fetch items for the current page
@@ -116,8 +116,7 @@ def get_all_items(board_id, monday):
         # If there's no cursor, we've reached the last page
         if not cursor:
             break
-        print(f"cursor {cursor}")
-        print(f"len all_itemms {len(all_items)}")
+        print("Loading...")
     return all_items
 
 def upload_to_sharepoint(to_upload, master_folder_name):
@@ -128,36 +127,37 @@ def upload_to_sharepoint(to_upload, master_folder_name):
         print(f"Uploading {file_name} to sharepoint")
         osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_name)
 
-# Step 1: Fetch column IDs
-board_data = monday.boards.fetch_boards_by_id(board_id)
-columns = board_data["data"]["boards"][0]["columns"]
-col_id_map = {col["title"].lower(): col["id"] for col in columns}
+if __name__ == "__main__":
+    # Step 1: Fetch column IDs
+    board_data = monday.boards.fetch_boards_by_id(board_id)
+    columns = board_data["data"]["boards"][0]["columns"]
+    col_id_map = {col["title"].lower(): col["id"] for col in columns}
 
-name_id = col_id_map.get("name")  # Replace with actual title if different
-files_id = col_id_map.get("file(s)")  # Replace with actual title if different
+    name_id = col_id_map.get("name")  # Replace with actual title if different
+    files_id = col_id_map.get("file(s)")  # Replace with actual title if different
 
-if not name_id or not files_id:
-    raise Exception("Could not find 'name' or 'file(s)' columns")
+    if not name_id or not files_id:
+        raise Exception("Could not find 'name' or 'file(s)' columns")
 
-items = get_all_items(board_id, monday)
-for i,item in enumerate(tqdm(items)):
-    if i>329:
-        item_name = item["name"]
-        item_name = sanitize_name(item_name, ignore_dot=True)
-        print(f"Item name is {item_name}")
-        asset_ids = extract_asset_ids(item, files_id)
+    items = get_all_items(board_id, monday)
+    for i,item in enumerate(tqdm(items)):
+        if i>329:
+            item_name = item["name"]
+            item_name = sanitize_name(item_name, ignore_dot=True)
+            print(f"Item name is {item_name}")
+            asset_ids = extract_asset_ids(item, files_id)
 
-        to_upload = []
-        for asset_id in asset_ids:
-            try:
-                public_url, file_name = get_public_url(asset_id)
-                print(f"Downloading {file_name}")
-                file_path = download_file_from_public_url(public_url, file_name)
-                to_upload.append(file_path)
-            except Exception as e:
-                print(f"Failed to download/upload asset {asset_id}: {e}")
+            to_upload = []
+            for asset_id in asset_ids:
+                try:
+                    public_url, file_name = get_public_url(asset_id)
+                    print(f"Downloading {file_name}")
+                    file_path = download_file_from_public_url(public_url, file_name)
+                    to_upload.append(file_path)
+                except Exception as e:
+                    print(f"Failed to download/upload asset {asset_id}: {e}")
 
-        if to_upload:
-            upload_to_sharepoint(to_upload, item_name)
+            if to_upload:
+                upload_to_sharepoint(to_upload, item_name)
 
-# Liv green # Cocuun # Wates
\ No newline at end of file
+    # Liv green # Cocuun # Wates
\ No newline at end of file