script to automate complaince done@

2026-06-08 11:17:29 +00:00 · 2025-05-16 11:09:55 +00:00 · 2025-05-16 11:09:55 +00:00 · a75ab60e3a
commit a75ab60e3a
parent 50dea33a25
1 changed files with 119 additions and 0 deletions
--- a/etl/osmosis_complaince_address_to_files.py
+++ b/etl/osmosis_complaince_address_to_files.py
@ -0,0 +1,119 @@
+from monday import MondayClient
+import json
+import requests
+import time
+from tqdm import tqdm
+import os
+from etl.scraper.scraper import SharePointInstaller
+from etl.scraper.scraper import SharePointScraper
+
+board_id = "6097548932"
+monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
+monday = MondayClient(monday_key)
+
+
+# osmsis keys
+os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
+os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
+os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
+osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2)
+parent_folder = "/Osmosis ACD/Osmosis ACD Projects/Installer Documentation/"
+
+# Change this per installer
+parent_folder += "Platform Housing Group/Broadoak"
+
+
+def download_file(url):
+    headers = {
+        "Authorization": monday_key,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                    "AppleWebKit/537.36 (KHTML, like Gecko) "
+                    "Chrome/125.0.0.0 Safari/537.36",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
+                "image/avif,image/webp,image/apng,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Connection": "keep-alive",
+        "Referer": "https://osmosis-acd-team.monday.com/",  # Optional but helpful
+        "Upgrade-Insecure-Requests": "1",
+        "Sec-Fetch-Dest": "document",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-Site": "same-origin",
+        "Sec-Fetch-User": "?1",
+    }
+    local_filename = os.path.join("/tmp", url.split("/")[-1])
+    with requests.get(url.strip(), headers=headers, stream=True) as r:
+        r.raise_for_status()
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192): 
+                f.write(chunk)
+    return local_filename
+
+def get_all_items(board_id, monday):
+    # Parameters
+    limit = 25  # Adjust the limit based on how many items you want per request
+    all_items = []  # List to store all fetched items
+    cursor = None  # Start without a cursor for the first page
+
+    # Loop through pages
+    while True:
+        # Fetch items for the current page
+        response = monday.boards.fetch_items_by_board_id(
+            board_ids=board_id, 
+            limit=limit, 
+            cursor=cursor
+        )
+        
+        items = response['data']['boards'][0]['items_page']['items']
+        
+        # If no items are returned, stop the loop
+        if not items:
+            break
+        
+        # Append items from this page to the all_items list
+        all_items.extend(items)
+        
+        # Get the cursor for the next page (if there is one)
+        cursor = response['data']['boards'][0]['items_page'].get('cursor')  # Get the current cursor
+        
+        # If there's no cursor, we've reached the last page
+        if not cursor:
+            break
+        print(f"cursor {cursor}")
+        print(f"len all_itemms {len(all_items)}")
+    return all_items
+
+def upload_to_sharepoint(to_upload, master_folder_name):
+    osmosis.create_dir(master_folder_name, parent_folder)
+    for file_path in to_upload:
+        osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:])
+
+# Step 1: Fetch column IDs
+board_data = monday.boards.fetch_boards_by_id(board_id)
+columns = board_data["data"]["boards"][0]["columns"]
+col_id_map = {col["title"].lower(): col["id"] for col in columns}
+
+name_id = col_id_map.get("name")  # Replace with actual title if different
+files_id = col_id_map.get("file(s)")  # Replace with actual title if different
+
+if not name_id or not files_id:
+    raise Exception("Could not find 'name' or 'file(s)' columns")
+
+items = get_all_items(board_id, monday)
+for item in tqdm(items):
+    item_name = item["name"]
+    item_id = item["id"]
+
+    print(f"Downloading '{item_name}'...")
+    for val in item["column_values"]:
+        # files
+        if val["id"] == files_id:
+            all_files_csv = val["text"]
+            files = all_files_csv.split(",")
+
+            to_upload = []
+            for file in tqdm(files):
+                print(f"Downloading {file}")
+                to_upload.append(download_file(file))
+            upload_to_sharepoint(to_upload, item_name) 
+