script to automate complaince done@

This commit is contained in:
Jun-te Kim 2025-05-16 11:09:55 +00:00
parent 50dea33a25
commit a75ab60e3a

View file

@ -0,0 +1,119 @@
from monday import MondayClient
import json
import requests
import time
from tqdm import tqdm
import os
from etl.scraper.scraper import SharePointInstaller
from etl.scraper.scraper import SharePointScraper
board_id = "6097548932"
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
monday = MondayClient(monday_key)
# osmsis keys
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2)
parent_folder = "/Osmosis ACD/Osmosis ACD Projects/Installer Documentation/"
# Change this per installer
parent_folder += "Platform Housing Group/Broadoak"
def download_file(url):
headers = {
"Authorization": monday_key,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/125.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://osmosis-acd-team.monday.com/", # Optional but helpful
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
}
local_filename = os.path.join("/tmp", url.split("/")[-1])
with requests.get(url.strip(), headers=headers, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
def get_all_items(board_id, monday):
# Parameters
limit = 25 # Adjust the limit based on how many items you want per request
all_items = [] # List to store all fetched items
cursor = None # Start without a cursor for the first page
# Loop through pages
while True:
# Fetch items for the current page
response = monday.boards.fetch_items_by_board_id(
board_ids=board_id,
limit=limit,
cursor=cursor
)
items = response['data']['boards'][0]['items_page']['items']
# If no items are returned, stop the loop
if not items:
break
# Append items from this page to the all_items list
all_items.extend(items)
# Get the cursor for the next page (if there is one)
cursor = response['data']['boards'][0]['items_page'].get('cursor') # Get the current cursor
# If there's no cursor, we've reached the last page
if not cursor:
break
print(f"cursor {cursor}")
print(f"len all_itemms {len(all_items)}")
return all_items
def upload_to_sharepoint(to_upload, master_folder_name):
osmosis.create_dir(master_folder_name, parent_folder)
for file_path in to_upload:
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:])
# Step 1: Fetch column IDs
board_data = monday.boards.fetch_boards_by_id(board_id)
columns = board_data["data"]["boards"][0]["columns"]
col_id_map = {col["title"].lower(): col["id"] for col in columns}
name_id = col_id_map.get("name") # Replace with actual title if different
files_id = col_id_map.get("file(s)") # Replace with actual title if different
if not name_id or not files_id:
raise Exception("Could not find 'name' or 'file(s)' columns")
items = get_all_items(board_id, monday)
for item in tqdm(items):
item_name = item["name"]
item_id = item["id"]
print(f"Downloading '{item_name}'...")
for val in item["column_values"]:
# files
if val["id"] == files_id:
all_files_csv = val["text"]
files = all_files_csv.split(",")
to_upload = []
for file in tqdm(files):
print(f"Downloading {file}")
to_upload.append(download_file(file))
upload_to_sharepoint(to_upload, item_name)