""" Rename files in SharePoint property folders to the canonical format: {UPRN}_{Street} {Postcode}_{Document Name}.ext Set DRY_RUN = False when ready to commit. Run from repo root. Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET, SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID """ import csv import os from typing import Optional from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites DRY_RUN: bool = True CSV_PATH: str = "scripts/sero_address_list.csv" BASE_PATH = ( "Osmosis-ACD Projects/Sero-Clarion Housing/" "Sero Project Documents/Property Folders" ) ASSESSMENT_SUBFOLDER = "A. Assessment" logger = setup_logger() def build_canonical_filename( uprn: str, address: str, postcode: str, original_name: str ) -> Optional[str]: """ Returns the canonical filename, or None if the file is already renamed. Already-renamed: name starts with "{uprn}_". Strips any existing address prefix (address+postcode first, then address alone) before inserting the canonical prefix. """ if original_name.startswith(f"{uprn}_"): return None stem, ext = os.path.splitext(original_name) stem_lower = stem.lower() street = address.split(",")[0].strip() prefixes = [ f"{address} {postcode}", address, f"{street} {postcode}", street, ] doc_name = stem for prefix in prefixes: if stem_lower.startswith(prefix.lower()): doc_name = stem[len(prefix) :] break if doc_name.startswith(" - "): doc_name = doc_name[3:] elif doc_name.startswith(" _ "): doc_name = doc_name[3:] doc_name = doc_name.strip() street_post = f"{street} {postcode}" if doc_name: return f"{uprn}_{street_post}_{doc_name}{ext}" return f"{uprn}_{street_post}{ext}" def main() -> None: sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3) with open(CSV_PATH, newline="", encoding="utf-8-sig") as f: reader = csv.DictReader(f) required = {"UPRN", "Address", "Postcode"} if not reader.fieldnames or not required.issubset(set(reader.fieldnames)): raise ValueError( f"CSV missing required columns. Expected {required}, got {reader.fieldnames}" ) for row in reader: uprn = row["UPRN"].strip() address = row["Address"].strip() postcode = row["Postcode"].strip() folder_path = ( f"{BASE_PATH}/{address}, {postcode}" f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}" ) try: contents = sp_client.get_folders_in_path(folder_path) except ValueError: logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}") continue for item in contents.get("value", []): if "file" not in item: continue original_name: str = item["name"] new_name = build_canonical_filename( uprn, address, postcode, original_name ) if new_name is None: continue if DRY_RUN: logger.info( f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})' ) else: try: sp_client.rename_file(item["id"], new_name) logger.info( f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})' ) except Exception as e: logger.error( f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}' ) if __name__ == "__main__": main()