Model/scripts/rename_sharepoint_files.py
2026-05-20 16:26:07 +00:00

128 lines
4 KiB
Python

"""
Rename files in SharePoint property folders to the canonical format:
{UPRN}_{Street} {Postcode}_{Document Name}.ext
Set DRY_RUN = False when ready to commit. Run from repo root.
Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET,
SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID
"""
import csv
import os
from typing import Optional
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
DRY_RUN: bool = True
CSV_PATH: str = "scripts/sero_address_list.csv"
BASE_PATH = (
"Osmosis-ACD Projects/Sero-Clarion Housing/"
"Sero Project Documents/Property Folders"
)
ASSESSMENT_SUBFOLDER = "A. Assessment"
logger = setup_logger()
def build_canonical_filename(
uprn: str, address: str, postcode: str, original_name: str
) -> Optional[str]:
"""
Returns the canonical filename, or None if the file is already renamed.
Already-renamed: name starts with "{uprn}_".
Strips any existing address prefix (address+postcode first, then address alone)
before inserting the canonical prefix.
"""
if original_name.startswith(f"{uprn}_"):
return None
stem, ext = os.path.splitext(original_name)
stem_lower = stem.lower()
street = address.split(",")[0].strip()
prefixes = [
f"{address} {postcode}",
address,
f"{street} {postcode}",
street,
]
doc_name = stem
for prefix in prefixes:
if stem_lower.startswith(prefix.lower()):
doc_name = stem[len(prefix) :]
break
if doc_name.startswith(" - "):
doc_name = doc_name[3:]
elif doc_name.startswith(" _ "):
doc_name = doc_name[3:]
doc_name = doc_name.strip()
street_post = f"{street} {postcode}"
if doc_name:
return f"{uprn}_{street_post}_{doc_name}{ext}"
return f"{uprn}_{street_post}{ext}"
def main() -> None:
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
with open(CSV_PATH, newline="", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
required = {"UPRN", "Address", "Postcode"}
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
raise ValueError(
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
)
for row in reader:
uprn = row["UPRN"].strip()
address = row["Address"].strip()
postcode = row["Postcode"].strip()
folder_path = (
f"{BASE_PATH}/{address}, {postcode}"
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
)
try:
contents = sp_client.get_folders_in_path(folder_path)
except ValueError:
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
continue
for item in contents.get("value", []):
if "file" not in item:
continue
original_name: str = item["name"]
new_name = build_canonical_filename(
uprn, address, postcode, original_name
)
if new_name is None:
continue
if DRY_RUN:
logger.info(
f'[DRY RUN] Renaming: "{original_name}""{new_name}" (UPRN: {uprn})'
)
else:
try:
sp_client.rename_file(item["id"], new_name)
logger.info(
f'Renamed: "{original_name}""{new_name}" (UPRN: {uprn})'
)
except Exception as e:
logger.error(
f'Failed to rename "{original_name}""{new_name}" (UPRN: {uprn}): {e}'
)
if __name__ == "__main__":
main()