mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Move SharePoint renamer logic from scripts/ into orchestrator and app-root handler
This commit is contained in:
parent
8cb0e986e6
commit
beb4e5d0d9
7 changed files with 129 additions and 148 deletions
13
applications/sharepoint_renamer/handler.py
Normal file
13
applications/sharepoint_renamer/handler.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from typing import Any
|
||||
|
||||
from orchestration.sharepoint_renamer_orchestrator import SharepointRenamerOrchestrator
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
CSV_PATH = "scripts/sero_address_list.csv"
|
||||
|
||||
|
||||
def handler(event: dict[str, Any], context: Any) -> None:
|
||||
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
|
||||
orchestrator = SharepointRenamerOrchestrator(sp_client, CSV_PATH)
|
||||
orchestrator.run()
|
||||
|
|
@ -8,9 +8,8 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
COPY utils/ utils/
|
||||
COPY backend/__init__.py backend/__init__.py
|
||||
COPY backend/pashub_fetcher/ backend/pashub_fetcher/
|
||||
COPY orchestration/ orchestration/
|
||||
COPY applications/sharepoint_renamer/ applications/sharepoint_renamer/
|
||||
COPY scripts/__init__.py scripts/__init__.py
|
||||
COPY scripts/rename_sharepoint_files.py scripts/rename_sharepoint_files.py
|
||||
COPY scripts/sero_address_list.csv scripts/sero_address_list.csv
|
||||
|
||||
CMD ["applications.sharepoint_renamer.handler.handler.handler"]
|
||||
CMD ["applications.sharepoint_renamer.handler.handler"]
|
||||
|
|
|
|||
|
|
@ -1,7 +0,0 @@
|
|||
from typing import Any
|
||||
|
||||
from scripts.rename_sharepoint_files import main
|
||||
|
||||
|
||||
def handler(event: dict[str, Any], context: Any) -> None:
|
||||
main()
|
||||
113
orchestration/sharepoint_renamer_orchestrator.py
Normal file
113
orchestration/sharepoint_renamer_orchestrator.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import csv
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
|
||||
BASE_PATH = (
|
||||
"Osmosis-ACD Projects/Sero-Clarion Housing/"
|
||||
"Sero Project Documents/Property Folders"
|
||||
)
|
||||
ASSESSMENT_SUBFOLDER = "A. Assessment"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_canonical_filename(
|
||||
uprn: str, address: str, postcode: str, original_name: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the canonical filename, or None if the file is already renamed.
|
||||
|
||||
Already-renamed: name starts with "{uprn}_".
|
||||
Strips any existing address prefix (address+postcode first, then address alone)
|
||||
before inserting the canonical prefix.
|
||||
"""
|
||||
if original_name.startswith(f"{uprn}_"):
|
||||
return None
|
||||
|
||||
stem, ext = os.path.splitext(original_name)
|
||||
stem_lower = stem.lower()
|
||||
|
||||
street = address.split(",")[0].strip()
|
||||
prefixes = [
|
||||
f"{address} {postcode}",
|
||||
address,
|
||||
f"{street} {postcode}",
|
||||
street,
|
||||
]
|
||||
|
||||
doc_name = stem
|
||||
for prefix in prefixes:
|
||||
if stem_lower.startswith(prefix.lower()):
|
||||
doc_name = stem[len(prefix) :]
|
||||
break
|
||||
|
||||
if doc_name.startswith(" - "):
|
||||
doc_name = doc_name[3:]
|
||||
elif doc_name.startswith(" _ "):
|
||||
doc_name = doc_name[3:]
|
||||
doc_name = doc_name.strip()
|
||||
|
||||
street_post = f"{street} {postcode}"
|
||||
if doc_name:
|
||||
return f"{uprn}_{street_post}_{doc_name}{ext}"
|
||||
return f"{uprn}_{street_post}{ext}"
|
||||
|
||||
|
||||
class SharepointRenamerOrchestrator:
|
||||
def __init__(self, sp_client: DomnaSharepointClient, csv_path: str) -> None:
|
||||
self._sp_client = sp_client
|
||||
self._csv_path = csv_path
|
||||
|
||||
def run(self) -> None:
|
||||
with open(self._csv_path, newline="", encoding="utf-8-sig") as f:
|
||||
reader = csv.DictReader(f)
|
||||
required = {"UPRN", "Address", "Postcode"}
|
||||
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
|
||||
raise ValueError(
|
||||
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
|
||||
)
|
||||
|
||||
for row in reader:
|
||||
uprn = row["UPRN"].strip()
|
||||
address = row["Address"].strip()
|
||||
postcode = row["Postcode"].strip()
|
||||
folder_path = (
|
||||
f"{BASE_PATH}/{address}, {postcode}"
|
||||
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
|
||||
)
|
||||
self._process_folder(folder_path, uprn, address, postcode)
|
||||
|
||||
def _process_folder(
|
||||
self, folder_path: str, uprn: str, address: str, postcode: str
|
||||
) -> None:
|
||||
try:
|
||||
contents = self._sp_client.get_folders_in_path(folder_path)
|
||||
except ValueError:
|
||||
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
|
||||
return
|
||||
|
||||
for item in contents.get("value", []):
|
||||
if "folder" in item:
|
||||
self._process_folder(
|
||||
f"{folder_path}/{item['name']}", uprn, address, postcode
|
||||
)
|
||||
elif "file" in item:
|
||||
original_name: str = item["name"]
|
||||
new_name = build_canonical_filename(uprn, address, postcode, original_name)
|
||||
|
||||
if new_name is None:
|
||||
continue
|
||||
|
||||
try:
|
||||
self._sp_client.rename_file(item["id"], new_name)
|
||||
logger.info(
|
||||
f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
|
||||
)
|
||||
|
|
@ -1,137 +0,0 @@
|
|||
"""
|
||||
Rename files in SharePoint property folders to the canonical format:
|
||||
{UPRN}_{Street} {Postcode}_{Document Name}.ext
|
||||
|
||||
Set DRY_RUN = False when ready to commit. Run from repo root.
|
||||
Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET,
|
||||
SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from utils.logger import setup_logger
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
DRY_RUN: bool = False
|
||||
CSV_PATH: str = "scripts/sero_address_list.csv"
|
||||
|
||||
BASE_PATH = (
|
||||
"Osmosis-ACD Projects/Sero-Clarion Housing/"
|
||||
"Sero Project Documents/Property Folders"
|
||||
)
|
||||
ASSESSMENT_SUBFOLDER = "A. Assessment"
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def build_canonical_filename(
|
||||
uprn: str, address: str, postcode: str, original_name: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the canonical filename, or None if the file is already renamed.
|
||||
|
||||
Already-renamed: name starts with "{uprn}_".
|
||||
Strips any existing address prefix (address+postcode first, then address alone)
|
||||
before inserting the canonical prefix.
|
||||
"""
|
||||
if original_name.startswith(f"{uprn}_"):
|
||||
return None
|
||||
|
||||
stem, ext = os.path.splitext(original_name)
|
||||
stem_lower = stem.lower()
|
||||
|
||||
street = address.split(",")[0].strip()
|
||||
prefixes = [
|
||||
f"{address} {postcode}",
|
||||
address,
|
||||
f"{street} {postcode}",
|
||||
street,
|
||||
]
|
||||
|
||||
doc_name = stem
|
||||
for prefix in prefixes:
|
||||
if stem_lower.startswith(prefix.lower()):
|
||||
doc_name = stem[len(prefix) :]
|
||||
break
|
||||
|
||||
if doc_name.startswith(" - "):
|
||||
doc_name = doc_name[3:]
|
||||
elif doc_name.startswith(" _ "):
|
||||
doc_name = doc_name[3:]
|
||||
doc_name = doc_name.strip()
|
||||
|
||||
street_post = f"{street} {postcode}"
|
||||
if doc_name:
|
||||
return f"{uprn}_{street_post}_{doc_name}{ext}"
|
||||
return f"{uprn}_{street_post}{ext}"
|
||||
|
||||
|
||||
def process_folder(
|
||||
sp_client: DomnaSharepointClient,
|
||||
folder_path: str,
|
||||
uprn: str,
|
||||
address: str,
|
||||
postcode: str,
|
||||
) -> None:
|
||||
try:
|
||||
contents = sp_client.get_folders_in_path(folder_path)
|
||||
except ValueError:
|
||||
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
|
||||
return
|
||||
|
||||
for item in contents.get("value", []):
|
||||
if "folder" in item:
|
||||
process_folder(
|
||||
sp_client, f"{folder_path}/{item['name']}", uprn, address, postcode
|
||||
)
|
||||
elif "file" in item:
|
||||
original_name: str = item["name"]
|
||||
new_name = build_canonical_filename(uprn, address, postcode, original_name)
|
||||
|
||||
if new_name is None:
|
||||
continue
|
||||
|
||||
if DRY_RUN:
|
||||
logger.info(
|
||||
f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
else:
|
||||
try:
|
||||
sp_client.rename_file(item["id"], new_name)
|
||||
logger.info(
|
||||
f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
|
||||
|
||||
with open(CSV_PATH, newline="", encoding="utf-8-sig") as f:
|
||||
reader = csv.DictReader(f)
|
||||
required = {"UPRN", "Address", "Postcode"}
|
||||
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
|
||||
raise ValueError(
|
||||
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
|
||||
)
|
||||
|
||||
for row in reader:
|
||||
uprn = row["UPRN"].strip()
|
||||
address = row["Address"].strip()
|
||||
postcode = row["Postcode"].strip()
|
||||
folder_path = (
|
||||
f"{BASE_PATH}/{address}, {postcode}"
|
||||
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
|
||||
)
|
||||
process_folder(sp_client, folder_path, uprn, address, postcode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
# scripts/tests/test_build_canonical_filename.py
|
||||
from scripts.rename_sharepoint_files import build_canonical_filename
|
||||
from orchestration.sharepoint_renamer_orchestrator import build_canonical_filename
|
||||
|
||||
UPRN = "10093456789"
|
||||
ADDRESS = "1 High Street, Anytown"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue