mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
script to automate complaince done@
This commit is contained in:
parent
50dea33a25
commit
a75ab60e3a
1 changed files with 119 additions and 0 deletions
119
etl/osmosis_complaince_address_to_files.py
Normal file
119
etl/osmosis_complaince_address_to_files.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
from monday import MondayClient
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
from etl.scraper.scraper import SharePointInstaller
|
||||
from etl.scraper.scraper import SharePointScraper
|
||||
|
||||
board_id = "6097548932"
|
||||
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
|
||||
monday = MondayClient(monday_key)
|
||||
|
||||
|
||||
# osmsis keys
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
|
||||
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2)
|
||||
parent_folder = "/Osmosis ACD/Osmosis ACD Projects/Installer Documentation/"
|
||||
|
||||
# Change this per installer
|
||||
parent_folder += "Platform Housing Group/Broadoak"
|
||||
|
||||
|
||||
def download_file(url):
|
||||
headers = {
|
||||
"Authorization": monday_key,
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
|
||||
"image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://osmosis-acd-team.monday.com/", # Optional but helpful
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
}
|
||||
local_filename = os.path.join("/tmp", url.split("/")[-1])
|
||||
with requests.get(url.strip(), headers=headers, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return local_filename
|
||||
|
||||
def get_all_items(board_id, monday):
|
||||
# Parameters
|
||||
limit = 25 # Adjust the limit based on how many items you want per request
|
||||
all_items = [] # List to store all fetched items
|
||||
cursor = None # Start without a cursor for the first page
|
||||
|
||||
# Loop through pages
|
||||
while True:
|
||||
# Fetch items for the current page
|
||||
response = monday.boards.fetch_items_by_board_id(
|
||||
board_ids=board_id,
|
||||
limit=limit,
|
||||
cursor=cursor
|
||||
)
|
||||
|
||||
items = response['data']['boards'][0]['items_page']['items']
|
||||
|
||||
# If no items are returned, stop the loop
|
||||
if not items:
|
||||
break
|
||||
|
||||
# Append items from this page to the all_items list
|
||||
all_items.extend(items)
|
||||
|
||||
# Get the cursor for the next page (if there is one)
|
||||
cursor = response['data']['boards'][0]['items_page'].get('cursor') # Get the current cursor
|
||||
|
||||
# If there's no cursor, we've reached the last page
|
||||
if not cursor:
|
||||
break
|
||||
print(f"cursor {cursor}")
|
||||
print(f"len all_itemms {len(all_items)}")
|
||||
return all_items
|
||||
|
||||
def upload_to_sharepoint(to_upload, master_folder_name):
|
||||
osmosis.create_dir(master_folder_name, parent_folder)
|
||||
for file_path in to_upload:
|
||||
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:])
|
||||
|
||||
# Step 1: Fetch column IDs
|
||||
board_data = monday.boards.fetch_boards_by_id(board_id)
|
||||
columns = board_data["data"]["boards"][0]["columns"]
|
||||
col_id_map = {col["title"].lower(): col["id"] for col in columns}
|
||||
|
||||
name_id = col_id_map.get("name") # Replace with actual title if different
|
||||
files_id = col_id_map.get("file(s)") # Replace with actual title if different
|
||||
|
||||
if not name_id or not files_id:
|
||||
raise Exception("Could not find 'name' or 'file(s)' columns")
|
||||
|
||||
items = get_all_items(board_id, monday)
|
||||
for item in tqdm(items):
|
||||
item_name = item["name"]
|
||||
item_id = item["id"]
|
||||
|
||||
print(f"Downloading '{item_name}'...")
|
||||
for val in item["column_values"]:
|
||||
# files
|
||||
if val["id"] == files_id:
|
||||
all_files_csv = val["text"]
|
||||
files = all_files_csv.split(",")
|
||||
|
||||
to_upload = []
|
||||
for file in tqdm(files):
|
||||
print(f"Downloading {file}")
|
||||
to_upload.append(download_file(file))
|
||||
upload_to_sharepoint(to_upload, item_name)
|
||||
|
||||
Loading…
Add table
Reference in a new issue