months end

This commit is contained in:
Jun-te Kim 2025-07-22 17:18:21 +01:00
parent 1d347098b7
commit d4d5531ddc
2 changed files with 94 additions and 31 deletions

View file

@ -1 +1,64 @@
print("hello world")
from tqdm import tqdm
from monday import MondayClient
from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids
from pprint import pprint
import pandas as pd
import json
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
monday = MondayClient(monday_key)
board_ids = ["3900434153"]
for board in tqdm(board_ids):
board_data = monday.boards.fetch_boards_by_id(board)
columns = board_data["data"]["boards"][0]["columns"]
col_id_map = {col["title"].lower(): col["id"] for col in columns}
reversed_col_id_map = {v: k for k, v in col_id_map.items()}
items = get_all_items(board, monday)
all_records = []
for row in tqdm(items):
data = {}
data.update({"address": row['name']})
for col in row.get("column_values", []):
if col.get("id") in reversed_col_id_map:
if col.get("type") == "file":
value = col.get("value")
no_of_files = 0
if value:
value = json.loads(col["value"])
no_of_files = len(value.get('files', []))
data.update({reversed_col_id_map[col.get("id")]: no_of_files})
else:
data.update({
reversed_col_id_map[col.get("id")]: col.get("text")
})
all_records.append(data)
# Convert to DataFrame
df = pd.DataFrame(all_records)
# RA
filtered_df = df[
df["ra"].str.lower().isin(["completed rdsap 10", "completed rdsap 9.9"])
].copy()
# ATT
filtered_df = df[
df["att"].str.lower().isin(["completed"])
].copy()
# V1 Coordination
filtered_df = df[
df["v1 coordination status"].str.lower().isin(["rc complete"])
].copy()
# V2 Coordination
_ = df[df["v2 invoiced"].fillna('').str.lower().isin(['to be invoiced'])]
filtered_df = _[_["v2 dc/ima/pas"] > 0]

View file

@ -91,7 +91,7 @@ def get_all_items(board_id, monday):
limit = 25 # Adjust the limit based on how many items you want per request
all_items = [] # List to store all fetched items
cursor = None # Start without a cursor for the first page
print(f"Connecting to Monday API and retrieving data for board {board_id}")
# Loop through pages
while True:
# Fetch items for the current page
@ -116,8 +116,7 @@ def get_all_items(board_id, monday):
# If there's no cursor, we've reached the last page
if not cursor:
break
print(f"cursor {cursor}")
print(f"len all_itemms {len(all_items)}")
print("Loading...")
return all_items
def upload_to_sharepoint(to_upload, master_folder_name):
@ -128,36 +127,37 @@ def upload_to_sharepoint(to_upload, master_folder_name):
print(f"Uploading {file_name} to sharepoint")
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_name)
# Step 1: Fetch column IDs
board_data = monday.boards.fetch_boards_by_id(board_id)
columns = board_data["data"]["boards"][0]["columns"]
col_id_map = {col["title"].lower(): col["id"] for col in columns}
if __name__ == "__main__":
# Step 1: Fetch column IDs
board_data = monday.boards.fetch_boards_by_id(board_id)
columns = board_data["data"]["boards"][0]["columns"]
col_id_map = {col["title"].lower(): col["id"] for col in columns}
name_id = col_id_map.get("name") # Replace with actual title if different
files_id = col_id_map.get("file(s)") # Replace with actual title if different
name_id = col_id_map.get("name") # Replace with actual title if different
files_id = col_id_map.get("file(s)") # Replace with actual title if different
if not name_id or not files_id:
raise Exception("Could not find 'name' or 'file(s)' columns")
if not name_id or not files_id:
raise Exception("Could not find 'name' or 'file(s)' columns")
items = get_all_items(board_id, monday)
for i,item in enumerate(tqdm(items)):
if i>329:
item_name = item["name"]
item_name = sanitize_name(item_name, ignore_dot=True)
print(f"Item name is {item_name}")
asset_ids = extract_asset_ids(item, files_id)
items = get_all_items(board_id, monday)
for i,item in enumerate(tqdm(items)):
if i>329:
item_name = item["name"]
item_name = sanitize_name(item_name, ignore_dot=True)
print(f"Item name is {item_name}")
asset_ids = extract_asset_ids(item, files_id)
to_upload = []
for asset_id in asset_ids:
try:
public_url, file_name = get_public_url(asset_id)
print(f"Downloading {file_name}")
file_path = download_file_from_public_url(public_url, file_name)
to_upload.append(file_path)
except Exception as e:
print(f"Failed to download/upload asset {asset_id}: {e}")
to_upload = []
for asset_id in asset_ids:
try:
public_url, file_name = get_public_url(asset_id)
print(f"Downloading {file_name}")
file_path = download_file_from_public_url(public_url, file_name)
to_upload.append(file_path)
except Exception as e:
print(f"Failed to download/upload asset {asset_id}: {e}")
if to_upload:
upload_to_sharepoint(to_upload, item_name)
if to_upload:
upload_to_sharepoint(to_upload, item_name)
# Liv green # Cocuun # Wates
# Liv green # Cocuun # Wates