From 4e21dda328dc4a06ab1eb69e5f44857c1a6cf03f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 20 May 2026 16:26:07 +0000 Subject: [PATCH] rename files in sharepoint to desired structure --- scripts/rename_sharepoint_files.py | 128 ++++++++++++++++++++ utils/sharepoint/domna_sharepoint_client.py | 9 ++ utils/sharepoint/sharepoint_client.py | 11 ++ 3 files changed, 148 insertions(+) create mode 100644 scripts/rename_sharepoint_files.py diff --git a/scripts/rename_sharepoint_files.py b/scripts/rename_sharepoint_files.py new file mode 100644 index 00000000..881b96ef --- /dev/null +++ b/scripts/rename_sharepoint_files.py @@ -0,0 +1,128 @@ +""" +Rename files in SharePoint property folders to the canonical format: + {UPRN}_{Street} {Postcode}_{Document Name}.ext + +Set DRY_RUN = False when ready to commit. Run from repo root. +Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET, + SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID +""" + +import csv +import os +from typing import Optional + +from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders +from utils.logger import setup_logger +from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient +from utils.sharepoint.domna_sites import DomnaSites + +DRY_RUN: bool = True +CSV_PATH: str = "scripts/sero_address_list.csv" + +BASE_PATH = ( + "Osmosis-ACD Projects/Sero-Clarion Housing/" + "Sero Project Documents/Property Folders" +) +ASSESSMENT_SUBFOLDER = "A. Assessment" + +logger = setup_logger() + + +def build_canonical_filename( + uprn: str, address: str, postcode: str, original_name: str +) -> Optional[str]: + """ + Returns the canonical filename, or None if the file is already renamed. + + Already-renamed: name starts with "{uprn}_". + Strips any existing address prefix (address+postcode first, then address alone) + before inserting the canonical prefix. + """ + if original_name.startswith(f"{uprn}_"): + return None + + stem, ext = os.path.splitext(original_name) + stem_lower = stem.lower() + + street = address.split(",")[0].strip() + prefixes = [ + f"{address} {postcode}", + address, + f"{street} {postcode}", + street, + ] + + doc_name = stem + for prefix in prefixes: + if stem_lower.startswith(prefix.lower()): + doc_name = stem[len(prefix) :] + break + + if doc_name.startswith(" - "): + doc_name = doc_name[3:] + elif doc_name.startswith(" _ "): + doc_name = doc_name[3:] + doc_name = doc_name.strip() + + street_post = f"{street} {postcode}" + if doc_name: + return f"{uprn}_{street_post}_{doc_name}{ext}" + return f"{uprn}_{street_post}{ext}" + + +def main() -> None: + sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3) + + with open(CSV_PATH, newline="", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + required = {"UPRN", "Address", "Postcode"} + if not reader.fieldnames or not required.issubset(set(reader.fieldnames)): + raise ValueError( + f"CSV missing required columns. Expected {required}, got {reader.fieldnames}" + ) + + for row in reader: + uprn = row["UPRN"].strip() + address = row["Address"].strip() + postcode = row["Postcode"].strip() + folder_path = ( + f"{BASE_PATH}/{address}, {postcode}" + f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}" + ) + + try: + contents = sp_client.get_folders_in_path(folder_path) + except ValueError: + logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}") + continue + + for item in contents.get("value", []): + if "file" not in item: + continue + + original_name: str = item["name"] + new_name = build_canonical_filename( + uprn, address, postcode, original_name + ) + + if new_name is None: + continue + + if DRY_RUN: + logger.info( + f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})' + ) + else: + try: + sp_client.rename_file(item["id"], new_name) + logger.info( + f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})' + ) + except Exception as e: + logger.error( + f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}' + ) + + +if __name__ == "__main__": + main() diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 5e0255ac..3e9168ba 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -125,6 +125,15 @@ class DomnaSharepointClient: self.logger.debug(f"Downloaded SharePoint file to: {local_path}") return True + def rename_file(self, item_id: str, new_name: str) -> None: + sharepoint_client = SharePointClient( + tenant_id=self.sharepoint_tenant_id, + client_id=self.sharepoint_client_id, + client_secret=self.sharepoint_client_secret, + site_id=self.sharepoint_drive.value, + ) + sharepoint_client.rename_file(item_id, new_name) + def create_temp_file(self, content: BytesIO, path: str): # Ensure the path is under /tmp/ new_path = os.path.join("/tmp/sharepoint", path) diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py index 5807c3bd..38107dbf 100644 --- a/utils/sharepoint/sharepoint_client.py +++ b/utils/sharepoint/sharepoint_client.py @@ -335,6 +335,17 @@ class SharePointClient: if retry == "retry": return self.upload_file(file_name, sharepoint_parent_id, file_stream) + @api_call_decorator + def rename_file(self, item_id: str, new_name: str) -> None: + """ + PATCH /drives/{drive_id}/items/{item_id} + + Renames a file in-place. Caller should discard the return value. + """ + url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/items/{item_id}" + data: Dict[str, Any] = {"name": new_name} + return "PATCH", url, data # type: ignore[return-value] + @staticmethod def download_sharepoint_file(download_url: str) -> BytesIO: """