From fea099afe34cbe5af7e8d613256a9707880ad46e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 14:42:48 +0000 Subject: [PATCH] delete unused methods and add typehinting to DomnaSharepointClient --- utils/sharepoint/domna_sharepoint_client.py | 91 ++++----------------- utils/sharepoint/sharepoint_client.py | 17 ++-- 2 files changed, 26 insertions(+), 82 deletions(-) diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 55336f85..374ee140 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -1,11 +1,8 @@ -from pprint import pformat from enum import Enum import os +from typing import Any, Dict, Optional from utils.logger import setup_logger from utils.sharepoint.sharepoint_client import SharePointClient -from functools import wraps -import re -from datetime import datetime, timedelta from io import BytesIO @@ -23,7 +20,7 @@ class DomnaSharepointClient: A simple scraper to get the contents of a sharepoint and validatate inputs so I can manually change """ - def __init__(self, sharepoint_location, development=False): + def __init__(self, sharepoint_location: DomnaSites): self.logger = setup_logger() self.sharepoint_client_id = os.getenv("SHAREPOINT_CLIENT_ID", None) self.sharepoint_client_secret = os.getenv("SHAREPOINT_CLIENT_SECRET", None) @@ -39,11 +36,8 @@ class DomnaSharepointClient: assert ( self.sharepoint_tenant_id is not None ), "Please assign SHAREPOINT_TENANT_ID env variable" - assert ( - self.sharepoint_drive.value is not None - ), "Please set sharepoint driver id env variable. See SharePointInstaller for more information" - def get_folders_in_path(self, path): + def get_folders_in_path(self, path: str) -> Dict[str, Any]: sharepoint_client = SharePointClient( tenant_id=self.sharepoint_tenant_id, client_id=self.sharepoint_client_id, @@ -53,25 +47,15 @@ class DomnaSharepointClient: return sharepoint_client.list_folder_contents(path) - def get_file_content(self, url): - sharepoint_client = SharePointClient( - tenant_id=self.sharepoint_tenant_id, - client_id=self.sharepoint_client_id, - client_secret=self.sharepoint_client_secret, - site_id=self.sharepoint_drive.value, - ) - - return sharepoint_client.download_sharepoint_file(url) - - def does_folder_exists_at(self, file_name, file_path): - folders = self.get_folders_in_path(file_path) + def does_folder_exists_at(self, file_name: str, file_path: str): + folders: Dict[str, Any] = self.get_folders_in_path(file_path) if "value" in folders: for folder in folders["value"]: if file_name.upper() in folder["name"].upper(): return True return False - def create_dir(self, file_name, at_path="/"): + def create_dir(self, dir_name: str, at_path: str = "/") -> str: sharepoint_client = SharePointClient( tenant_id=self.sharepoint_tenant_id, client_id=self.sharepoint_client_id, @@ -79,25 +63,27 @@ class DomnaSharepointClient: site_id=self.sharepoint_drive.value, ) - folders = self.get_folders_in_path(at_path) + folders: Dict[str, Any] = self.get_folders_in_path(at_path) # Check if folder already exists (case-insensitive match) if "value" in folders: for folder in folders["value"]: - if "name" in folder and folder["name"].lower() == file_name.lower(): - self.logger.info(f"Folder already exists: {file_name} at {at_path}") + if "name" in folder and folder["name"].lower() == dir_name.lower(): + self.logger.info(f"Folder already exists: {dir_name} at {at_path}") return folder["webUrl"] # ✅ return existing folder # Folder does NOT exist → create it - self.logger.info(f"Creating folder: {file_name} at {at_path}") - created = sharepoint_client.create_folder(file_name, at_path) + self.logger.info(f"Creating folder: {dir_name} at {at_path}") + created: Dict[str, Any] = sharepoint_client.create_folder(dir_name, at_path) return created["webUrl"] - def makedir(self, dir_name, at_path="/"): + def makedir(self, dir_name: str, at_path: str = "/") -> str: return self.create_dir(dir_name, at_path) - def upload_file(self, file_path, sharepoint_path, file_name): + def upload_file( + self, file_path: str, sharepoint_path: str, file_name: str + ) -> Optional[Dict[str, Any]]: sharepoint_client = SharePointClient( tenant_id=self.sharepoint_tenant_id, client_id=self.sharepoint_client_id, @@ -105,58 +91,13 @@ class DomnaSharepointClient: site_id=self.sharepoint_drive.value, ) - def get_file_stream(file_path): + def get_file_stream(file_path: str): return open(file_path, "rb") sharepoint_client.upload_file( file_name, get_file_stream(file_path), sharepoint_path ) - def download_files_from_path(self, path, avoid=None): - """ - Download all non-media files from a list of root paths. - - Args: - root_paths (List[str]): List of full folder paths to start from. - - Returns: - List[Dict[str, List[str]]]: A list of dictionaries mapping address folder names to downloaded file paths. - """ - if avoid is None: - avoid = [ - ".jpg", - ".mov", - ".JPG", - ".heic", - ".HEIC", - ".png", - ".PNG", - ".jpeg", - ".JPEG", - ".mp4", - ".MP4", - ] - - files_info = self.get_folders_in_path(path) - - if "value" not in files_info: - raise RuntimeError(f"Failed to get files from {path}") - - file_names_to_download = { - file["name"]: file["@microsoft.graph.downloadUrl"] - for file in files_info["value"] - if "file" in file and not any(file["name"].endswith(ext) for ext in avoid) - } - - downloaded_files = [] - for file_name, url in file_names_to_download.items(): - self.logger.info(f"Downloading {file_name} from {url}") - content = self.get_file_content(url) - file_path = self.create_temp_file(content, f"{path}/{file_name}") - downloaded_files.append(file_path) - - return downloaded_files - def create_temp_file(self, content: BytesIO, path: str): # Ensure the path is under /tmp/ new_path = os.path.join("/tmp/sharepoint", path) diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py index 421b1535..67c4315c 100644 --- a/utils/sharepoint/sharepoint_client.py +++ b/utils/sharepoint/sharepoint_client.py @@ -5,15 +5,14 @@ Documentation to get api_id: https://answers.microsoft.com/en-us/msoffice/forum/all/what-is-the-best-way-to-findout-the-share-point/7b2d4183-4188-4cd5-8441-dd93207c5a01 """ +from typing import Any, BinaryIO, Dict, Optional + from msal import ConfidentialClientApplication from datetime import datetime, timedelta import requests from functools import wraps import time -import logging from io import BytesIO -import tempfile -import os # Api Documentation: https://learn.microsoft.com/en-us/graph/api/drive-get?view=graph-rest-1.0&tabs=http @@ -259,7 +258,9 @@ class SharePointClient: return "GET", url, None @api_call_decorator - def list_folder_contents(self, folder_path: str, page_size: int = 100): + def list_folder_contents( + self, folder_path: str, page_size: int = 100 + ) -> Dict[str, Any]: """ GET drive/root/children @@ -274,7 +275,7 @@ class SharePointClient: return "GET", url, None @api_call_decorator - def create_folder(self, file_name, folder_path): + def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]: """ POST https://graph.microsoft.com/v1.0/me/drive/root/children Content-Type: application/json @@ -285,7 +286,7 @@ class SharePointClient: } """ - data = { + data: Dict[str, Any] = { "name": file_name, "folder": {}, "@microsoft.graph.conflictBehavior": "rename", @@ -294,7 +295,9 @@ class SharePointClient: return "POST", url, data - def upload_file(self, file_name, file_stream, sharepoint_parent_id): + def upload_file( + self, file_name: str, file_stream: BinaryIO, sharepoint_parent_id: str + ) -> Optional[Dict[str, Any]]: """ Uploads a file to SharePoint using the Graph API. PUT /drives/{drive-id}/root:/{path-to-file}:/content