diff --git a/applications/sharepoint_renamer/handler.py b/applications/sharepoint_renamer/handler.py new file mode 100644 index 00000000..5a290878 --- /dev/null +++ b/applications/sharepoint_renamer/handler.py @@ -0,0 +1,13 @@ +from typing import Any + +from orchestration.sharepoint_renamer_orchestrator import SharepointRenamerOrchestrator +from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient +from utils.sharepoint.domna_sites import DomnaSites + +CSV_PATH = "scripts/sero_address_list.csv" + + +def handler(event: dict[str, Any], context: Any) -> None: + sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3) + orchestrator = SharepointRenamerOrchestrator(sp_client, CSV_PATH) + orchestrator.run() diff --git a/applications/sharepoint_renamer/handler/Dockerfile b/applications/sharepoint_renamer/handler/Dockerfile index 10c40e89..bb946cc2 100644 --- a/applications/sharepoint_renamer/handler/Dockerfile +++ b/applications/sharepoint_renamer/handler/Dockerfile @@ -8,9 +8,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY utils/ utils/ COPY backend/__init__.py backend/__init__.py COPY backend/pashub_fetcher/ backend/pashub_fetcher/ +COPY orchestration/ orchestration/ COPY applications/sharepoint_renamer/ applications/sharepoint_renamer/ -COPY scripts/__init__.py scripts/__init__.py -COPY scripts/rename_sharepoint_files.py scripts/rename_sharepoint_files.py COPY scripts/sero_address_list.csv scripts/sero_address_list.csv -CMD ["applications.sharepoint_renamer.handler.handler.handler"] +CMD ["applications.sharepoint_renamer.handler.handler"] diff --git a/applications/sharepoint_renamer/handler/handler.py b/applications/sharepoint_renamer/handler/handler.py deleted file mode 100644 index 850d1ae6..00000000 --- a/applications/sharepoint_renamer/handler/handler.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Any - -from scripts.rename_sharepoint_files import main - - -def handler(event: dict[str, Any], context: Any) -> None: - main() diff --git a/orchestration/sharepoint_renamer_orchestrator.py b/orchestration/sharepoint_renamer_orchestrator.py new file mode 100644 index 00000000..764776ae --- /dev/null +++ b/orchestration/sharepoint_renamer_orchestrator.py @@ -0,0 +1,113 @@ +import csv +import logging +import os +from typing import Optional + +from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders +from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient + +BASE_PATH = ( + "Osmosis-ACD Projects/Sero-Clarion Housing/" + "Sero Project Documents/Property Folders" +) +ASSESSMENT_SUBFOLDER = "A. Assessment" + +logger = logging.getLogger(__name__) + + +def build_canonical_filename( + uprn: str, address: str, postcode: str, original_name: str +) -> Optional[str]: + """ + Returns the canonical filename, or None if the file is already renamed. + + Already-renamed: name starts with "{uprn}_". + Strips any existing address prefix (address+postcode first, then address alone) + before inserting the canonical prefix. + """ + if original_name.startswith(f"{uprn}_"): + return None + + stem, ext = os.path.splitext(original_name) + stem_lower = stem.lower() + + street = address.split(",")[0].strip() + prefixes = [ + f"{address} {postcode}", + address, + f"{street} {postcode}", + street, + ] + + doc_name = stem + for prefix in prefixes: + if stem_lower.startswith(prefix.lower()): + doc_name = stem[len(prefix) :] + break + + if doc_name.startswith(" - "): + doc_name = doc_name[3:] + elif doc_name.startswith(" _ "): + doc_name = doc_name[3:] + doc_name = doc_name.strip() + + street_post = f"{street} {postcode}" + if doc_name: + return f"{uprn}_{street_post}_{doc_name}{ext}" + return f"{uprn}_{street_post}{ext}" + + +class SharepointRenamerOrchestrator: + def __init__(self, sp_client: DomnaSharepointClient, csv_path: str) -> None: + self._sp_client = sp_client + self._csv_path = csv_path + + def run(self) -> None: + with open(self._csv_path, newline="", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + required = {"UPRN", "Address", "Postcode"} + if not reader.fieldnames or not required.issubset(set(reader.fieldnames)): + raise ValueError( + f"CSV missing required columns. Expected {required}, got {reader.fieldnames}" + ) + + for row in reader: + uprn = row["UPRN"].strip() + address = row["Address"].strip() + postcode = row["Postcode"].strip() + folder_path = ( + f"{BASE_PATH}/{address}, {postcode}" + f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}" + ) + self._process_folder(folder_path, uprn, address, postcode) + + def _process_folder( + self, folder_path: str, uprn: str, address: str, postcode: str + ) -> None: + try: + contents = self._sp_client.get_folders_in_path(folder_path) + except ValueError: + logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}") + return + + for item in contents.get("value", []): + if "folder" in item: + self._process_folder( + f"{folder_path}/{item['name']}", uprn, address, postcode + ) + elif "file" in item: + original_name: str = item["name"] + new_name = build_canonical_filename(uprn, address, postcode, original_name) + + if new_name is None: + continue + + try: + self._sp_client.rename_file(item["id"], new_name) + logger.info( + f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})' + ) + except Exception as e: + logger.error( + f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}' + ) diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/rename_sharepoint_files.py b/scripts/rename_sharepoint_files.py deleted file mode 100644 index 7ed126e3..00000000 --- a/scripts/rename_sharepoint_files.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Rename files in SharePoint property folders to the canonical format: - {UPRN}_{Street} {Postcode}_{Document Name}.ext - -Set DRY_RUN = False when ready to commit. Run from repo root. -Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET, - SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID -""" - -import csv -import os -from typing import Optional - -from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders -from utils.logger import setup_logger -from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient -from utils.sharepoint.domna_sites import DomnaSites - -DRY_RUN: bool = False -CSV_PATH: str = "scripts/sero_address_list.csv" - -BASE_PATH = ( - "Osmosis-ACD Projects/Sero-Clarion Housing/" - "Sero Project Documents/Property Folders" -) -ASSESSMENT_SUBFOLDER = "A. Assessment" - -logger = setup_logger() - - -def build_canonical_filename( - uprn: str, address: str, postcode: str, original_name: str -) -> Optional[str]: - """ - Returns the canonical filename, or None if the file is already renamed. - - Already-renamed: name starts with "{uprn}_". - Strips any existing address prefix (address+postcode first, then address alone) - before inserting the canonical prefix. - """ - if original_name.startswith(f"{uprn}_"): - return None - - stem, ext = os.path.splitext(original_name) - stem_lower = stem.lower() - - street = address.split(",")[0].strip() - prefixes = [ - f"{address} {postcode}", - address, - f"{street} {postcode}", - street, - ] - - doc_name = stem - for prefix in prefixes: - if stem_lower.startswith(prefix.lower()): - doc_name = stem[len(prefix) :] - break - - if doc_name.startswith(" - "): - doc_name = doc_name[3:] - elif doc_name.startswith(" _ "): - doc_name = doc_name[3:] - doc_name = doc_name.strip() - - street_post = f"{street} {postcode}" - if doc_name: - return f"{uprn}_{street_post}_{doc_name}{ext}" - return f"{uprn}_{street_post}{ext}" - - -def process_folder( - sp_client: DomnaSharepointClient, - folder_path: str, - uprn: str, - address: str, - postcode: str, -) -> None: - try: - contents = sp_client.get_folders_in_path(folder_path) - except ValueError: - logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}") - return - - for item in contents.get("value", []): - if "folder" in item: - process_folder( - sp_client, f"{folder_path}/{item['name']}", uprn, address, postcode - ) - elif "file" in item: - original_name: str = item["name"] - new_name = build_canonical_filename(uprn, address, postcode, original_name) - - if new_name is None: - continue - - if DRY_RUN: - logger.info( - f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})' - ) - else: - try: - sp_client.rename_file(item["id"], new_name) - logger.info( - f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})' - ) - except Exception as e: - logger.error( - f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}' - ) - - -def main() -> None: - sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3) - - with open(CSV_PATH, newline="", encoding="utf-8-sig") as f: - reader = csv.DictReader(f) - required = {"UPRN", "Address", "Postcode"} - if not reader.fieldnames or not required.issubset(set(reader.fieldnames)): - raise ValueError( - f"CSV missing required columns. Expected {required}, got {reader.fieldnames}" - ) - - for row in reader: - uprn = row["UPRN"].strip() - address = row["Address"].strip() - postcode = row["Postcode"].strip() - folder_path = ( - f"{BASE_PATH}/{address}, {postcode}" - f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}" - ) - process_folder(sp_client, folder_path, uprn, address, postcode) - - -if __name__ == "__main__": - main() diff --git a/scripts/tests/test_build_canonical_filename.py b/scripts/tests/test_build_canonical_filename.py index 3890477c..67d4fcae 100644 --- a/scripts/tests/test_build_canonical_filename.py +++ b/scripts/tests/test_build_canonical_filename.py @@ -1,5 +1,5 @@ # scripts/tests/test_build_canonical_filename.py -from scripts.rename_sharepoint_files import build_canonical_filename +from orchestration.sharepoint_renamer_orchestrator import build_canonical_filename UPRN = "10093456789" ADDRESS = "1 High Street, Anytown"