survey-extraction/etl/utils/utils.py
2025-05-23 15:58:04 +00:00

23 lines
No EOL
824 B
Python

from urllib.parse import unquote
def get_sharepoint_path(url):
url_parts = url.split('/')
# Find the index of 'Forms'
forms_index = url_parts.index('Forms')
# Get the part after 'Forms'
after_forms = url_parts[forms_index + 1]
# Find 'id=' and extract after it
if 'id=' in after_forms:
id_part = after_forms.split('id=')[1]
# Only keep the path before '&' (to ignore other parameters)
id_path = id_part.split('&')[0]
# Decode the path
decoded_path = unquote(id_path)
# Now, remove the leading '/sites/xxx/Shared Documents/' part
parts = decoded_path.split('Shared Documents')
if len(parts) > 1:
final_path = parts[1].strip('/')
return final_path
else:
return decoded_path.strip('/')