mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
128 lines
4 KiB
Python
128 lines
4 KiB
Python
"""
|
|
Rename files in SharePoint property folders to the canonical format:
|
|
{UPRN}_{Street} {Postcode}_{Document Name}.ext
|
|
|
|
Set DRY_RUN = False when ready to commit. Run from repo root.
|
|
Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET,
|
|
SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID
|
|
"""
|
|
|
|
import csv
|
|
import os
|
|
from typing import Optional
|
|
|
|
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
|
from utils.logger import setup_logger
|
|
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
|
from utils.sharepoint.domna_sites import DomnaSites
|
|
|
|
DRY_RUN: bool = True
|
|
CSV_PATH: str = "scripts/sero_address_list.csv"
|
|
|
|
BASE_PATH = (
|
|
"Osmosis-ACD Projects/Sero-Clarion Housing/"
|
|
"Sero Project Documents/Property Folders"
|
|
)
|
|
ASSESSMENT_SUBFOLDER = "A. Assessment"
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
def build_canonical_filename(
|
|
uprn: str, address: str, postcode: str, original_name: str
|
|
) -> Optional[str]:
|
|
"""
|
|
Returns the canonical filename, or None if the file is already renamed.
|
|
|
|
Already-renamed: name starts with "{uprn}_".
|
|
Strips any existing address prefix (address+postcode first, then address alone)
|
|
before inserting the canonical prefix.
|
|
"""
|
|
if original_name.startswith(f"{uprn}_"):
|
|
return None
|
|
|
|
stem, ext = os.path.splitext(original_name)
|
|
stem_lower = stem.lower()
|
|
|
|
street = address.split(",")[0].strip()
|
|
prefixes = [
|
|
f"{address} {postcode}",
|
|
address,
|
|
f"{street} {postcode}",
|
|
street,
|
|
]
|
|
|
|
doc_name = stem
|
|
for prefix in prefixes:
|
|
if stem_lower.startswith(prefix.lower()):
|
|
doc_name = stem[len(prefix) :]
|
|
break
|
|
|
|
if doc_name.startswith(" - "):
|
|
doc_name = doc_name[3:]
|
|
elif doc_name.startswith(" _ "):
|
|
doc_name = doc_name[3:]
|
|
doc_name = doc_name.strip()
|
|
|
|
street_post = f"{street} {postcode}"
|
|
if doc_name:
|
|
return f"{uprn}_{street_post}_{doc_name}{ext}"
|
|
return f"{uprn}_{street_post}{ext}"
|
|
|
|
|
|
def main() -> None:
|
|
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
|
|
|
|
with open(CSV_PATH, newline="", encoding="utf-8-sig") as f:
|
|
reader = csv.DictReader(f)
|
|
required = {"UPRN", "Address", "Postcode"}
|
|
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
|
|
raise ValueError(
|
|
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
|
|
)
|
|
|
|
for row in reader:
|
|
uprn = row["UPRN"].strip()
|
|
address = row["Address"].strip()
|
|
postcode = row["Postcode"].strip()
|
|
folder_path = (
|
|
f"{BASE_PATH}/{address}, {postcode}"
|
|
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
|
|
)
|
|
|
|
try:
|
|
contents = sp_client.get_folders_in_path(folder_path)
|
|
except ValueError:
|
|
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
|
|
continue
|
|
|
|
for item in contents.get("value", []):
|
|
if "file" not in item:
|
|
continue
|
|
|
|
original_name: str = item["name"]
|
|
new_name = build_canonical_filename(
|
|
uprn, address, postcode, original_name
|
|
)
|
|
|
|
if new_name is None:
|
|
continue
|
|
|
|
if DRY_RUN:
|
|
logger.info(
|
|
f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
|
)
|
|
else:
|
|
try:
|
|
sp_client.rename_file(item["id"], new_name)
|
|
logger.info(
|
|
f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
|
)
|
|
except Exception as e:
|
|
logger.error(
|
|
f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|