survey-extraction/etl/utils/utils.py
2025-07-09 10:25:42 +00:00

40 lines
No EOL
1.2 KiB
Python

from urllib.parse import unquote
class SharePointURLError(ValueError):
"""Custom error for SharePoint URL parsing issues."""
pass
def get_sharepoint_path(url):
try:
url_parts = url.split('/')
# Find the index of 'Forms'
forms_index = url_parts.index('Forms')
# Get the part after 'Forms'
after_forms = url_parts[forms_index + 1]
# Find 'id=' and extract after it
if 'id=' in after_forms:
id_part = after_forms.split('id=')[1]
# Only keep the path before '&' (to ignore other parameters)
id_path = id_part.split('&')[0]
# Decode the path
decoded_path = unquote(id_path)
# Remove the leading '/sites/xxx/Shared Documents/' part
parts = decoded_path.split('Shared Documents')
if len(parts) > 1:
final_path = parts[1].strip('/')
return final_path
else:
return decoded_path.strip('/')
else:
raise SharePointURLError(f"The URL does not contain 'id=' parameter. URL: {url}")
except (IndexError, ValueError) as e:
raise SharePointURLError(f"Error with SharePoint URL, please check {url}. Reason: {e}")