added sharepoint functionality for mr roth - enjoy

2026-07-27 23:35:01 +00:00 · 2026-03-24 13:36:47 +00:00 · 2026-03-24 13:36:47 +00:00 · a13ab09ab9
commit a13ab09ab9
parent 21f5cd40da
6 changed files with 537 additions and 0 deletions
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -13,6 +13,7 @@ boto3==1.35.44
 openpyxl==3.1.5
 # Basic
 pytz
+msal
 uvicorn[standard]
 sqlmodel
 # Testing
--- a/utils/sharepoint/init.py
+++ b/utils/sharepoint/init.py
--- a/utils/sharepoint/domna_sharepoint_client.py
+++ b/utils/sharepoint/domna_sharepoint_client.py
@ -0,0 +1,172 @@
+from pprint import pformat
+from enum import Enum
+import os
+from utils.logger import setup_logger
+from utils.sharepoint.sharepoint_client import SharePointClient
+from functools import wraps
+import re
+from datetime import datetime, timedelta
+from io import BytesIO
+
+
+class DomnaSites(Enum):
+    # https//{tenant}.sharepoint.com/sites/{site}/_api/site/id
+    # TODO: Add these to github secrets!!!
+    DOMNA = os.getenv("DOMNA_SHAREPOINT_ID")
+    OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID")
+    PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID")
+    SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID")
+
+
+class DomnaSharepointClient:
+    """
+    A simple scraper to get the contents of a sharepoint and validatate inputs so I can manually change
+    """
+
+    def __init__(self, sharepoint_location, development=False):
+        self.logger = setup_logger()
+        self.sharepoint_client_id = os.getenv("SHAREPOINT_CLIENT_ID", None)
+        self.sharepoint_client_secret = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
+        self.sharepoint_tenant_id = os.getenv("SHAREPOINT_TENANT_ID", None)
+        self.sharepoint_drive = sharepoint_location
+
+        assert (
+            self.sharepoint_client_id is not None
+        ), "Please assign SHAREPOINT_CLIENT_ID env variable"
+        assert (
+            self.sharepoint_client_secret is not None
+        ), "Please assign SHAREPOINT_CLIENT_SECRET env variable"
+        assert (
+            self.sharepoint_tenant_id is not None
+        ), "Please assign SHAREPOINT_TENANT_ID env variable"
+        assert (
+            self.sharepoint_drive.value is not None
+        ), "Please set sharepoint driver id env variable. See SharePointInstaller for more information"
+
+    def get_folders_in_path(self, path):
+        sharepoint_client = SharePointClient(
+            tenant_id=self.sharepoint_tenant_id,
+            client_id=self.sharepoint_client_id,
+            client_secret=self.sharepoint_client_secret,
+            site_id=self.sharepoint_drive.value,
+        )
+
+        return sharepoint_client.list_folder_contents(path)
+
+    def get_file_content(self, url):
+        sharepoint_client = SharePointClient(
+            tenant_id=self.sharepoint_tenant_id,
+            client_id=self.sharepoint_client_id,
+            client_secret=self.sharepoint_client_secret,
+            site_id=self.sharepoint_drive.value,
+        )
+
+        return sharepoint_client.download_sharepoint_file(url)
+
+    def does_folder_exists_at(self, file_name, file_path):
+        folders = self.get_folders_in_path(file_path)
+        if "value" in folders:
+            for folder in folders["value"]:
+                if file_name.upper() in folder["name"].upper():
+                    return True
+        return False
+
+    def create_dir(self, file_name, at_path="/"):
+        sharepoint_client = SharePointClient(
+            tenant_id=self.sharepoint_tenant_id,
+            client_id=self.sharepoint_client_id,
+            client_secret=self.sharepoint_client_secret,
+            site_id=self.sharepoint_drive.value,
+        )
+
+        folders = self.get_folders_in_path(at_path)
+
+        # Check if folder already exists (case-insensitive match)
+        if "value" in folders:
+            for folder in folders["value"]:
+                if "name" in folder and folder["name"].lower() == file_name.lower():
+                    self.logger.info(f"Folder already exists: {file_name} at {at_path}")
+                    return folder["webUrl"]  # ✅ return existing folder
+
+        # Folder does NOT exist → create it
+        self.logger.info(f"Creating folder: {file_name} at {at_path}")
+        created = sharepoint_client.create_folder(file_name, at_path)
+
+        return created["webUrl"]
+
+    def makedir(self, dir_name, at_path="/"):
+        return self.create_dir(dir_name, at_path)
+
+    def upload_file(self, file_path, sharepoint_path, file_name):
+        sharepoint_client = SharePointClient(
+            tenant_id=self.sharepoint_tenant_id,
+            client_id=self.sharepoint_client_id,
+            client_secret=self.sharepoint_client_secret,
+            site_id=self.sharepoint_drive.value,
+        )
+
+        def get_file_stream(file_path):
+            return open(file_path, "rb")
+
+        sharepoint_client.upload_file(
+            file_name, get_file_stream(file_path), sharepoint_path
+        )
+
+    def download_files_from_path(self, path, avoid=None):
+        """
+        Download all non-media files from a list of root paths.
+
+        Args:
+            root_paths (List[str]): List of full folder paths to start from.
+
+        Returns:
+            List[Dict[str, List[str]]]: A list of dictionaries mapping address folder names to downloaded file paths.
+        """
+        if avoid is None:
+            avoid = [
+                ".jpg",
+                ".mov",
+                ".JPG",
+                ".heic",
+                ".HEIC",
+                ".png",
+                ".PNG",
+                ".jpeg",
+                ".JPEG",
+                ".mp4",
+                ".MP4",
+            ]
+
+        files_info = self.get_folders_in_path(path)
+
+        if "value" not in files_info:
+            raise RuntimeError(f"Failed to get files from {path}")
+
+        file_names_to_download = {
+            file["name"]: file["@microsoft.graph.downloadUrl"]
+            for file in files_info["value"]
+            if "file" in file and not any(file["name"].endswith(ext) for ext in avoid)
+        }
+
+        downloaded_files = []
+        for file_name, url in file_names_to_download.items():
+            self.logger.info(f"Downloading {file_name} from {url}")
+            content = self.get_file_content(url)
+            file_path = self.create_temp_file(content, f"{path}/{file_name}")
+            downloaded_files.append(file_path)
+
+        return downloaded_files
+
+    def create_temp_file(self, content: BytesIO, path: str):
+        # Ensure the path is under /tmp/
+        new_path = os.path.join("/tmp/sharepoint", path)
+
+        # Ensure the parent directory exists
+        os.makedirs(os.path.dirname(new_path), exist_ok=True)
+
+        # Write content to the specified file
+        with open(new_path, "wb+") as temp_file:
+            temp_file.write(content.getvalue())
+
+        self.logger.debug(f"Temporary file created at: {new_path}")
+        return new_path
--- a/utils/sharepoint/main.py
+++ b/utils/sharepoint/main.py
@ -0,0 +1,25 @@
+# This is small script to see if Domna Sharepoint Client works
+# for basic functionality
+
+# Can we import it?
+from io import BytesIO
+
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient, DomnaSites
+
+
+# can we initliase it
+client = DomnaSharepointClient(sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3)
+
+# can we get an example of root path?
+
+client.get_folders_in_path("/")
+client.get_folders_in_path("/JTK Test Folder")
+
+# can we make a folder appear in JTK Test Folder?
+client.makedir("Dan is the best", "/JTK Test Folder")
+
+content = BytesIO(b"Hello, this is some file content!")
+path = client.create_temp_file(content, "some/place/over/the/rainbow")
+client.upload_file(
+    path, "/JTK Test Folder/Dan is the best", "junte_is_the_worst_at_python.txt"
+)
--- a/utils/sharepoint/sharepoint_client.py
+++ b/utils/sharepoint/sharepoint_client.py
@ -0,0 +1,339 @@
+"""
+This file contains the functions which enable interaction with SharePoint via the API.
+
+Documentation to get api_id:
+https://answers.microsoft.com/en-us/msoffice/forum/all/what-is-the-best-way-to-findout-the-share-point/7b2d4183-4188-4cd5-8441-dd93207c5a01
+"""
+
+from msal import ConfidentialClientApplication
+from datetime import datetime, timedelta
+import requests
+from functools import wraps
+import time
+import logging
+from io import BytesIO
+import tempfile
+import os
+
+# Api Documentation: https://learn.microsoft.com/en-us/graph/api/drive-get?view=graph-rest-1.0&tabs=http
+
+
+def handle_error(response):
+    """
+    Handle errors based on HTTP status codes and log detailed information.
+    """
+    try:
+        error_json = response.json().get("error", {})
+    except ValueError:
+        error_json = {}
+
+    error_code = error_json.get("code", "unknownError")
+    error_message = error_json.get("message", "No detailed error message provided.")
+    inner_error = error_json.get("innererror", {})
+    details = error_json.get("details", [])
+
+    logger.error(f"Error Code: {error_code}")
+    logger.error(f"Error Message: {error_message}")
+    if inner_error:
+        logger.error(f"Inner Error: {inner_error}")
+    if details:
+        logger.error(f"Error Details: {details}")
+
+    if response.status_code == 401:
+        logger.error("Unauthorized. Token might be invalid.")
+    elif response.status_code == 403:
+        logger.error("Forbidden. Access denied to the requested resource.")
+    elif response.status_code == 404:
+        logger.error("Not Found. The requested resource doesn’t exist.")
+    elif response.status_code == 429:
+        retry_after = int(
+            response.headers.get("Retry-After", 5)
+        )  # Default to 5 seconds if not provided
+        logger.warning(f"Too Many Requests. Retrying after {retry_after} seconds...")
+        time.sleep(retry_after)
+        return "retry"
+    elif response.status_code in (500, 503):
+        retry_after = int(
+            response.headers.get("Retry-After", 5)
+        )  # Default to 5 seconds if not provided
+        logger.error(f"Server error. Retrying after {retry_after} seconds...")
+        time.sleep(retry_after)
+        return "retry"
+    else:
+        raise ValueError(
+            f"API request failed with status code {response.status_code} - {error_message}"
+        )
+
+    raise ValueError(
+        f"API request failed with status code {response.status_code} - {error_message}"
+    )
+
+
+def api_call_decorator(func):
+    """
+    Handles various aspects of the API call, including refreshing the access token if needed and handling pagination.
+    :param func: The function to be decorated.
+    :return: The wrapped function.
+    """
+
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            # Check and refresh the access token if needed
+            if self.is_access_token_expired():
+                self.retrieve_access_token()
+                logger.debug("Access token refreshed.")
+
+            # Get the HTTP method, URL, and optionally data from the function
+            http_method, url, data = func(self, *args, **kwargs)
+
+            # Initialize the results list and handle pagination if page_size is provided
+            results = []
+            page_size = kwargs.get("page_size", None)
+            response_data = {}
+
+            while url:
+                response = requests.request(
+                    http_method, url, headers=self.headers, json=data
+                )
+
+                # Handle the response
+                if response.status_code == 200 or response.status_code == 201:
+                    response_json = response.json()  # Store the response JSON
+                    if page_size:
+                        results.extend(response_json.get("value", []))
+                        url = response_json.get("@odata.nextLink", None)
+                    else:
+                        response_data = (
+                            response_json  # Capture the full response for consistency
+                        )
+                        break
+                else:
+                    retry = handle_error(response)
+                    if retry == "retry":
+                        continue
+
+            if page_size:
+                response_data = {"value": results}
+
+            return response_data
+
+        except Exception as e:
+            logger.exception("An error occurred during the API call.")
+            raise e
+
+    return wrapper
+
+
+class SharePointClient:
+    access_token = None
+    access_token_request_timestamp = None
+    access_token_expiry = None
+    headers = None
+
+    TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+    def __init__(
+        self,
+        tenant_id,
+        client_id,
+        client_secret,
+        site_id,
+        access_token=None,
+        access_token_expiration_details=None,
+    ):
+        """
+        Initializes the SharePointClient with necessary credentials and site information.
+        :param tenant_id: The tenant ID.
+        :param client_id: The client ID.
+        :param client_secret: The client secret.
+        :param site_id: The site ID.
+        :param access_token: The access token (optional)
+        :param access_token_expiration_details: The access token expiration details (optional)
+        """
+        self.tenant_id = tenant_id
+        self.client_id = client_id
+        self.client_secret = client_secret
+
+        if access_token:
+            if not access_token_expiration_details:
+                raise ValueError("Access token expiration details must be provided.")
+            self.access_token = access_token
+            self.set_access_token_expiration_details(access_token_expiration_details)
+            self.headers = {
+                "Authorization": f"Bearer {self.access_token['access_token']}"
+            }
+        else:
+            self.retrieve_access_token()
+
+        # Retrieve static identifiers
+        self.site_id = site_id
+        self.document_drive = self.get_documents_drive()
+        self.document_drive_id = self.document_drive["id"]
+
+    def get_token_expiration_details(self):
+        """
+        Returns the access token expiration details. Converts the datetime objects to strings for serialization.
+        :return:
+        """
+        return {
+            "access_token_request_timestamp": datetime.strftime(
+                self.access_token_request_timestamp, self.TIMESTAMP_FORMAT
+            ),
+            "access_token_expiry": datetime.strftime(
+                self.access_token_expiry, self.TIMESTAMP_FORMAT
+            ),
+        }
+
+    def set_access_token_expiration_details(self, access_token_expiration_details):
+        """
+        Sets the access token expiration details from a serialized dictionary.
+        :param access_token_expiration_details: The serialized access token expiration details.
+        :return:
+        """
+        self.access_token_request_timestamp = datetime.strptime(
+            access_token_expiration_details["access_token_request_timestamp"],
+            self.TIMESTAMP_FORMAT,
+        )
+        self.access_token_expiry = datetime.strptime(
+            access_token_expiration_details["access_token_expiry"],
+            self.TIMESTAMP_FORMAT,
+        )
+
+    def is_access_token_expired(self):
+        """
+        Checks if the access token has expired. If it has, a new access token is retrieved.
+        :return: True if expired, False otherwise.
+        """
+        return datetime.now() >= self.access_token_expiry
+
+    def retrieve_access_token(self, refresh=False):
+        """
+        Implements authentication using MSAL.
+        :param refresh: If True, force a refresh of the access token.
+        :return: None
+        """
+        app = ConfidentialClientApplication(
+            self.client_id,
+            authority=f"https://login.microsoftonline.com/{self.tenant_id}",
+            client_credential=self.client_secret,
+        )
+
+        scope = ["https://graph.microsoft.com/.default"]
+
+        access_token_request_timestamp = datetime.now()
+
+        if refresh:
+            logger.debug("Forcing refresh of access token.")
+            token = app.acquire_token_for_client(scopes=scope)
+        else:
+            # Check if a token is already cached
+            token = app.acquire_token_silent(scope, account=None)
+
+            if not token:
+                token = app.acquire_token_for_client(scopes=scope)
+
+        if "access_token" not in token:
+            logger.error("Authentication failed.")
+            raise ValueError("Authentication failed")
+
+        access_token_expiry = access_token_request_timestamp + timedelta(
+            seconds=token["expires_in"] - 20
+        )
+
+        self.access_token = token
+        self.access_token_request_timestamp = access_token_request_timestamp
+        self.access_token_expiry = access_token_expiry
+        self.headers = {"Authorization": f"Bearer {self.access_token['access_token']}"}
+
+        # logger.debug("Access token retrieved successfully.")
+
+    @api_call_decorator
+    def get_documents_drive(self):
+        """
+        Get the document drive of the SharePoint site.
+        :return: Tuple containing HTTP method, URL, and None for data.
+        """
+        url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
+        # logger.debug(f"Getting document drive from URL: {url}")
+        return "GET", url, None
+
+    @api_call_decorator
+    def list_folder_contents(self, folder_path: str, page_size: int = 100):
+        """
+        GET drive/root/children
+
+        This function will list the contents of a folder in SharePoint.
+        :param drive_id: The ID of the drive.
+        :param folder_path: The path of the folder.
+        :param page_size: The number of items per page (default is 100).
+        :return: Tuple containing HTTP method, URL, and None for data.
+        """
+        url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children?$top={page_size}"
+        # logger.debug(f"Listing folder contents from URL: {url}")
+        return "GET", url, None
+
+    @api_call_decorator
+    def create_folder(self, file_name, folder_path):
+        """
+        POST https://graph.microsoft.com/v1.0/me/drive/root/children
+        Content-Type: application/json
+        {
+            "name": "New Folder",
+            "folder": { },
+            "@microsoft.graph.conflictBehavior": "rename"
+        }
+
+        """
+        data = {
+            "name": file_name,
+            "folder": {},
+            "@microsoft.graph.conflictBehavior": "rename",
+        }
+        url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children"
+
+        return "POST", url, data
+
+    def upload_file(self, file_name, file_stream, sharepoint_parent_id):
+        """
+        Uploads a file to SharePoint using the Graph API.
+        PUT /drives/{drive-id}/root:/{path-to-file}:/content
+
+        :param file_name: Name of the file to upload
+        :param sharepoint_path: Path within the SharePoint site (folder path)
+        :param file_stream: File content as a binary stream (e.g., BytesIO or open(file, 'rb'))
+        :return: Response JSON from the API
+        """
+        url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{sharepoint_parent_id}/{file_name}:/content"
+        # logger.debug(f"Uploading file to URL: {url}")
+
+        response = requests.put(url, headers=self.headers, data=file_stream)
+
+        if response.status_code in (200, 201):
+            # logger.info(f"File '{file_name}' uploaded successfully.")
+            return response.json()
+        else:
+            retry = handle_error(response)
+            if retry == "retry":
+                return self.upload_file(file_name, sharepoint_parent_id, file_stream)
+
+    @staticmethod
+    def download_sharepoint_file(download_url):
+        """
+        Downloads a file from the given URL and returns its content.
+
+        :param download_url: The URL to download the file from.
+        :return: The content of the downloaded file.
+        """
+        response = requests.get(download_url, stream=True)
+        response.raise_for_status()  # Check if the request was successful
+
+        file_content = BytesIO()
+
+        # Read the file content into memory
+        for chunk in response.iter_content(chunk_size=8192):
+            file_content.write(chunk)
+
+        file_content.seek(0)  # Reset the file pointer to the beginning
+
+        return file_content
--- a/utils/sharepoint/temp
+++ b/utils/sharepoint/temp