From 4e21dda328dc4a06ab1eb69e5f44857c1a6cf03f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 20 May 2026 16:26:07 +0000 Subject: [PATCH 1/2] rename files in sharepoint to desired structure --- scripts/rename_sharepoint_files.py | 128 ++++++++++++++++++++ utils/sharepoint/domna_sharepoint_client.py | 9 ++ utils/sharepoint/sharepoint_client.py | 11 ++ 3 files changed, 148 insertions(+) create mode 100644 scripts/rename_sharepoint_files.py diff --git a/scripts/rename_sharepoint_files.py b/scripts/rename_sharepoint_files.py new file mode 100644 index 00000000..881b96ef --- /dev/null +++ b/scripts/rename_sharepoint_files.py @@ -0,0 +1,128 @@ +""" +Rename files in SharePoint property folders to the canonical format: + {UPRN}_{Street} {Postcode}_{Document Name}.ext + +Set DRY_RUN = False when ready to commit. Run from repo root. +Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET, + SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID +""" + +import csv +import os +from typing import Optional + +from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders +from utils.logger import setup_logger +from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient +from utils.sharepoint.domna_sites import DomnaSites + +DRY_RUN: bool = True +CSV_PATH: str = "scripts/sero_address_list.csv" + +BASE_PATH = ( + "Osmosis-ACD Projects/Sero-Clarion Housing/" + "Sero Project Documents/Property Folders" +) +ASSESSMENT_SUBFOLDER = "A. Assessment" + +logger = setup_logger() + + +def build_canonical_filename( + uprn: str, address: str, postcode: str, original_name: str +) -> Optional[str]: + """ + Returns the canonical filename, or None if the file is already renamed. + + Already-renamed: name starts with "{uprn}_". + Strips any existing address prefix (address+postcode first, then address alone) + before inserting the canonical prefix. + """ + if original_name.startswith(f"{uprn}_"): + return None + + stem, ext = os.path.splitext(original_name) + stem_lower = stem.lower() + + street = address.split(",")[0].strip() + prefixes = [ + f"{address} {postcode}", + address, + f"{street} {postcode}", + street, + ] + + doc_name = stem + for prefix in prefixes: + if stem_lower.startswith(prefix.lower()): + doc_name = stem[len(prefix) :] + break + + if doc_name.startswith(" - "): + doc_name = doc_name[3:] + elif doc_name.startswith(" _ "): + doc_name = doc_name[3:] + doc_name = doc_name.strip() + + street_post = f"{street} {postcode}" + if doc_name: + return f"{uprn}_{street_post}_{doc_name}{ext}" + return f"{uprn}_{street_post}{ext}" + + +def main() -> None: + sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3) + + with open(CSV_PATH, newline="", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + required = {"UPRN", "Address", "Postcode"} + if not reader.fieldnames or not required.issubset(set(reader.fieldnames)): + raise ValueError( + f"CSV missing required columns. Expected {required}, got {reader.fieldnames}" + ) + + for row in reader: + uprn = row["UPRN"].strip() + address = row["Address"].strip() + postcode = row["Postcode"].strip() + folder_path = ( + f"{BASE_PATH}/{address}, {postcode}" + f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}" + ) + + try: + contents = sp_client.get_folders_in_path(folder_path) + except ValueError: + logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}") + continue + + for item in contents.get("value", []): + if "file" not in item: + continue + + original_name: str = item["name"] + new_name = build_canonical_filename( + uprn, address, postcode, original_name + ) + + if new_name is None: + continue + + if DRY_RUN: + logger.info( + f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})' + ) + else: + try: + sp_client.rename_file(item["id"], new_name) + logger.info( + f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})' + ) + except Exception as e: + logger.error( + f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}' + ) + + +if __name__ == "__main__": + main() diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 5e0255ac..3e9168ba 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -125,6 +125,15 @@ class DomnaSharepointClient: self.logger.debug(f"Downloaded SharePoint file to: {local_path}") return True + def rename_file(self, item_id: str, new_name: str) -> None: + sharepoint_client = SharePointClient( + tenant_id=self.sharepoint_tenant_id, + client_id=self.sharepoint_client_id, + client_secret=self.sharepoint_client_secret, + site_id=self.sharepoint_drive.value, + ) + sharepoint_client.rename_file(item_id, new_name) + def create_temp_file(self, content: BytesIO, path: str): # Ensure the path is under /tmp/ new_path = os.path.join("/tmp/sharepoint", path) diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py index 5807c3bd..38107dbf 100644 --- a/utils/sharepoint/sharepoint_client.py +++ b/utils/sharepoint/sharepoint_client.py @@ -335,6 +335,17 @@ class SharePointClient: if retry == "retry": return self.upload_file(file_name, sharepoint_parent_id, file_stream) + @api_call_decorator + def rename_file(self, item_id: str, new_name: str) -> None: + """ + PATCH /drives/{drive_id}/items/{item_id} + + Renames a file in-place. Caller should discard the return value. + """ + url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/items/{item_id}" + data: Dict[str, Any] = {"name": new_name} + return "PATCH", url, data # type: ignore[return-value] + @staticmethod def download_sharepoint_file(download_url: str) -> BytesIO: """ From 9f7c16ccbd35e00d081701d5b46393ba3736278d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 21 May 2026 15:30:03 +0000 Subject: [PATCH 2/2] add address list --- scripts/sero_address_list.csv | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 scripts/sero_address_list.csv diff --git a/scripts/sero_address_list.csv b/scripts/sero_address_list.csv new file mode 100644 index 00000000..8c9401c9 --- /dev/null +++ b/scripts/sero_address_list.csv @@ -0,0 +1,51 @@ +UPRN,Address,Postcode +U1035052,"1 Sudbury Crescent, Bromley",BR1 4PY +U1027449,"11 Station Road, Bromley",BR1 3LP +U1021310,"126 Faringdon Avenue, Bromley",BR2 8BU +U1010811,"13 Gilbert Road, Bromley",BR1 3QP +U1024017,"13 Manor Way, Bromley",BR2 8ES +U1042232,"154 Southover, Bromley",BR1 4RZ +U1009369,"17 Minster Road, Bromley",BR1 4DY +U1022305,"18a Lansdowne Road, Bromley",BR1 3LZ +U1033165,"2 Laburnum Way, Bromley",BR2 8BZ +U1035326,"2 Whitebeam Avenue, Bromley",BR2 8DL +U1037872,"20 Sudbury Crescent, Bromley",BR1 4PZ +U1007432,"21 Detling Road, Bromley",BR1 4SH +U1005123,"24 Bonville Road, Bromley",BR1 4QA +U1034810,"24 Newbury Road, Bromley",BR2 0QW +U1020351,"27 Laburnum Way, Bromley",BR2 8BY +U1009511,"27 Newbury Road, Bromley",BR2 0QN +U1034985,"272 Southborough Lane, Bromley",BR2 8AS +U1037954,"28 Treewall Gardens, Bromley",BR1 5BT +U1038103,"29 Whitebeam Avenue, Bromley",BR2 8DJ +U1013358,"3 Bird In Hand Lane, Bromley",BR1 2NA +U1024709,"3 Parkfield Way, Bromley",BR2 8AE +U1031058,"303 Keedonwood Road, Bromley",BR1 4QR +U1014077,"32 Aylesbury Road, Bromley",BR2 0QP +U1019564,"32 Brook Lane, Bromley",BR1 4PU +U1020237,"33 Hornbeam Way, Bromley",BR2 8DB +U1027493,"35 Sudbury Crescent, Bromley",BR1 4PY +U1042298,"39 Sudbury Crescent, Bromley",BR1 4PY +U1024698,"4 Palace View, Bromley",BR1 3EL +U1052186,"4 Ravensleigh Gardens, Bromley",BR1 5SN +U1042153,"4 Scotts Road, Bromley",BR1 3QD +U1037814,"42 Stanley Road, Bromley",BR2 9JH +U1014078,"43 Aylesbury Road, Bromley",BR2 0QR +U1007701,"46 Harwood Avenue, Bromley",BR1 3DU +U1036758,"46 Newbury Road, Bromley",BR2 0QW +U1025820,"46 Princes Plain, Bromley",BR2 8LE +U1022991,"5 Link Way, Bromley",BR2 8JH +U1024484,"55 Mounthurst Road, Bromley",BR2 7PG +U1014793,"59 Headcorn Road, Bromley",BR1 4SQ +U1037465,"6 Princes Plain, Bromley",BR2 8LE +U1009202,"63 Mead Way, Bromley",BR2 9ER +U1021353,"66 George Lane, Bromley",BR2 7LQ +U1042733,"68 Whitebeam Avenue, Bromley",BR2 8DL +U1030962,"7 Ravensleigh Gardens, Bromley",BR1 5SN +U1031294,"70 London Lane, Bromley",BR1 4HE +U1037450,"70 Pontefract Road, Bromley",BR1 4RB +U1014589,"71 Empress Drive, Chislehurst",BR7 5BQ +U1052429,"76 Southover, Bromley",BR1 4RY +U1020199,"78 Hillside Road, Bromley",BR2 0ST +U1024511,"81 Nightingale Lane, Bromley",BR1 2SA +U1009194,"84 Mays Hill Road, Bromley",BR2 0HT