mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge pull request #1230 from Hestia-Homes/feature/deploy-sharepoint-renamer
Deploy sharepoint renamer
This commit is contained in:
commit
17420408e4
17 changed files with 430 additions and 174 deletions
39
.github/workflows/deploy_terraform.yml
vendored
39
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -495,6 +495,45 @@ jobs:
|
|||
TF_VAR_pashub_coordination_password: ${{ secrets.PASHUB_COORDINATION_PASSWORD }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Build SharePoint Renamer image and Push
|
||||
# ============================================================
|
||||
sharepoint_renamer_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: sharepoint-renamer-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: applications/sharepoint_renamer/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Deploy SharePoint Renamer Lambda
|
||||
# ============================================================
|
||||
sharepoint_renamer_lambda:
|
||||
needs: [sharepoint_renamer_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: sharepoint_renamer
|
||||
lambda_path: deployment/terraform/lambda/sharepoint_renamer
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: sharepoint-renamer-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.sharepoint_renamer_image.outputs.image_digest }}
|
||||
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
TF_VAR_sharepoint_client_id: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
||||
TF_VAR_sharepoint_client_secret: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||
TF_VAR_sharepoint_tenant_id: ${{ secrets.SHAREPOINT_TENANT_ID }}
|
||||
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Deploy FastAPI Lambda
|
||||
# ============================================================
|
||||
|
|
|
|||
0
applications/sharepoint_renamer/__init__.py
Normal file
0
applications/sharepoint_renamer/__init__.py
Normal file
13
applications/sharepoint_renamer/handler.py
Normal file
13
applications/sharepoint_renamer/handler.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from typing import Any
|
||||
|
||||
from orchestration.sharepoint_renamer_orchestrator import SharepointRenamerOrchestrator
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
CSV_PATH = "applications/sharepoint_renamer/sero_address_list.csv"
|
||||
|
||||
|
||||
def handler(event: dict[str, Any], context: Any) -> None:
|
||||
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
|
||||
orchestrator = SharepointRenamerOrchestrator(sp_client, CSV_PATH)
|
||||
orchestrator.run()
|
||||
13
applications/sharepoint_renamer/handler/Dockerfile
Normal file
13
applications/sharepoint_renamer/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
|
||||
WORKDIR /var/task
|
||||
|
||||
COPY applications/sharepoint_renamer/handler/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY utils/ utils/
|
||||
COPY backend/__init__.py backend/__init__.py
|
||||
COPY backend/pashub_fetcher/ backend/pashub_fetcher/
|
||||
COPY orchestration/ orchestration/
|
||||
COPY applications/sharepoint_renamer/ applications/sharepoint_renamer/
|
||||
CMD ["applications.sharepoint_renamer.handler.handler"]
|
||||
0
applications/sharepoint_renamer/handler/__init__.py
Normal file
0
applications/sharepoint_renamer/handler/__init__.py
Normal file
2
applications/sharepoint_renamer/handler/requirements.txt
Normal file
2
applications/sharepoint_renamer/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
msal
|
||||
requests
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
UPRN,Address,Postcode
|
||||
U1014630,"118 Faringdon Avenue, Bromley",BR2 8BU
|
||||
|
22
deployment/terraform/lambda/sharepoint_renamer/main.tf
Normal file
22
deployment/terraform/lambda/sharepoint_renamer/main.tf
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
module "lambda" {
|
||||
source = "../../modules/lambda_with_sqs"
|
||||
|
||||
name = "sharepoint_renamer"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
timeout = var.timeout
|
||||
|
||||
reserved_concurrent_executions = var.reserved_concurrent_executions
|
||||
|
||||
batch_size = var.batch_size
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
|
||||
SHAREPOINT_CLIENT_ID = var.sharepoint_client_id
|
||||
SHAREPOINT_CLIENT_SECRET = var.sharepoint_client_secret
|
||||
SHAREPOINT_TENANT_ID = var.sharepoint_tenant_id
|
||||
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
output "sharepoint_renamer_queue_url" {
|
||||
value = module.lambda.queue_url
|
||||
description = "URL of the SharePoint Renamer SQS queue"
|
||||
}
|
||||
|
||||
output "sharepoint_renamer_queue_arn" {
|
||||
value = module.lambda.queue_arn
|
||||
description = "ARN of the SharePoint Renamer SQS queue"
|
||||
}
|
||||
20
deployment/terraform/lambda/sharepoint_renamer/provider.tf
Normal file
20
deployment/terraform/lambda/sharepoint_renamer/provider.tf
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "sharepoint-renamer-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = "eu-west-2"
|
||||
}
|
||||
55
deployment/terraform/lambda/sharepoint_renamer/variables.tf
Normal file
55
deployment/terraform/lambda/sharepoint_renamer/variables.tf
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
variable "timeout" {
|
||||
type = number
|
||||
default = 900
|
||||
description = "Lambda timeout in seconds."
|
||||
}
|
||||
|
||||
variable "reserved_concurrent_executions" {
|
||||
type = number
|
||||
default = 1
|
||||
description = "Prevent parallel renames causing race conditions on SharePoint."
|
||||
}
|
||||
|
||||
variable "batch_size" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "sharepoint_client_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "sharepoint_client_secret" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "sharepoint_tenant_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "social_housing_wave_3_sharepoint_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
|
@ -844,3 +844,17 @@ module "audit_generator_registry" {
|
|||
stage = var.stage
|
||||
}
|
||||
|
||||
################################################
|
||||
# SharePoint Renamer – Lambda
|
||||
################################################
|
||||
module "sharepoint_renamer_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "sharepoint-renamer-terraform-state"
|
||||
}
|
||||
|
||||
module "sharepoint_renamer_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "sharepoint-renamer"
|
||||
stage = var.stage
|
||||
}
|
||||
|
||||
|
|
|
|||
113
orchestration/sharepoint_renamer_orchestrator.py
Normal file
113
orchestration/sharepoint_renamer_orchestrator.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import csv
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
|
||||
BASE_PATH = (
|
||||
"Osmosis-ACD Projects/Sero-Clarion Housing/"
|
||||
"Sero Project Documents/Property Folders"
|
||||
)
|
||||
ASSESSMENT_SUBFOLDER = "A. Assessment"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_canonical_filename(
|
||||
uprn: str, address: str, postcode: str, original_name: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the canonical filename, or None if the file is already renamed.
|
||||
|
||||
Already-renamed: name starts with "{uprn}_".
|
||||
Strips any existing address prefix (address+postcode first, then address alone)
|
||||
before inserting the canonical prefix.
|
||||
"""
|
||||
if original_name.startswith(f"{uprn}_"):
|
||||
return None
|
||||
|
||||
stem, ext = os.path.splitext(original_name)
|
||||
stem_lower = stem.lower()
|
||||
|
||||
street = address.split(",")[0].strip()
|
||||
prefixes = [
|
||||
f"{address} {postcode}",
|
||||
address,
|
||||
f"{street} {postcode}",
|
||||
street,
|
||||
]
|
||||
|
||||
doc_name = stem
|
||||
for prefix in prefixes:
|
||||
if stem_lower.startswith(prefix.lower()):
|
||||
doc_name = stem[len(prefix) :]
|
||||
break
|
||||
|
||||
if doc_name.startswith(" - "):
|
||||
doc_name = doc_name[3:]
|
||||
elif doc_name.startswith(" _ "):
|
||||
doc_name = doc_name[3:]
|
||||
doc_name = doc_name.strip()
|
||||
|
||||
street_post = f"{street} {postcode}"
|
||||
if doc_name:
|
||||
return f"{uprn}_{street_post}_{doc_name}{ext}"
|
||||
return f"{uprn}_{street_post}{ext}"
|
||||
|
||||
|
||||
class SharepointRenamerOrchestrator:
|
||||
def __init__(self, sp_client: DomnaSharepointClient, csv_path: str) -> None:
|
||||
self._sp_client = sp_client
|
||||
self._csv_path = csv_path
|
||||
|
||||
def run(self) -> None:
|
||||
with open(self._csv_path, newline="", encoding="utf-8-sig") as f:
|
||||
reader = csv.DictReader(f)
|
||||
required = {"UPRN", "Address", "Postcode"}
|
||||
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
|
||||
raise ValueError(
|
||||
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
|
||||
)
|
||||
|
||||
for row in reader:
|
||||
uprn = row["UPRN"].strip()
|
||||
address = row["Address"].strip()
|
||||
postcode = row["Postcode"].strip()
|
||||
folder_path = (
|
||||
f"{BASE_PATH}/{address}, {postcode}"
|
||||
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
|
||||
)
|
||||
self._process_folder(folder_path, uprn, address, postcode)
|
||||
|
||||
def _process_folder(
|
||||
self, folder_path: str, uprn: str, address: str, postcode: str
|
||||
) -> None:
|
||||
try:
|
||||
contents = self._sp_client.get_folders_in_path(folder_path)
|
||||
except ValueError:
|
||||
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
|
||||
return
|
||||
|
||||
for item in contents.get("value", []):
|
||||
if "folder" in item:
|
||||
self._process_folder(
|
||||
f"{folder_path}/{item['name']}", uprn, address, postcode
|
||||
)
|
||||
elif "file" in item:
|
||||
original_name: str = item["name"]
|
||||
new_name = build_canonical_filename(uprn, address, postcode, original_name)
|
||||
|
||||
if new_name is None:
|
||||
continue
|
||||
|
||||
try:
|
||||
self._sp_client.rename_file(item["id"], new_name)
|
||||
logger.info(
|
||||
f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
|
||||
)
|
||||
|
|
@ -1,137 +0,0 @@
|
|||
"""
|
||||
Rename files in SharePoint property folders to the canonical format:
|
||||
{UPRN}_{Street} {Postcode}_{Document Name}.ext
|
||||
|
||||
Set DRY_RUN = False when ready to commit. Run from repo root.
|
||||
Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET,
|
||||
SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from utils.logger import setup_logger
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
DRY_RUN: bool = False
|
||||
CSV_PATH: str = "scripts/sero_address_list_test.csv"
|
||||
|
||||
BASE_PATH = (
|
||||
"Osmosis-ACD Projects/Sero-Clarion Housing/"
|
||||
"Sero Project Documents/Property Folders"
|
||||
)
|
||||
ASSESSMENT_SUBFOLDER = "A. Assessment"
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def build_canonical_filename(
|
||||
uprn: str, address: str, postcode: str, original_name: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the canonical filename, or None if the file is already renamed.
|
||||
|
||||
Already-renamed: name starts with "{uprn}_".
|
||||
Strips any existing address prefix (address+postcode first, then address alone)
|
||||
before inserting the canonical prefix.
|
||||
"""
|
||||
if original_name.startswith(f"{uprn}_"):
|
||||
return None
|
||||
|
||||
stem, ext = os.path.splitext(original_name)
|
||||
stem_lower = stem.lower()
|
||||
|
||||
street = address.split(",")[0].strip()
|
||||
prefixes = [
|
||||
f"{address} {postcode}",
|
||||
address,
|
||||
f"{street} {postcode}",
|
||||
street,
|
||||
]
|
||||
|
||||
doc_name = stem
|
||||
for prefix in prefixes:
|
||||
if stem_lower.startswith(prefix.lower()):
|
||||
doc_name = stem[len(prefix) :]
|
||||
break
|
||||
|
||||
if doc_name.startswith(" - "):
|
||||
doc_name = doc_name[3:]
|
||||
elif doc_name.startswith(" _ "):
|
||||
doc_name = doc_name[3:]
|
||||
doc_name = doc_name.strip()
|
||||
|
||||
street_post = f"{street} {postcode}"
|
||||
if doc_name:
|
||||
return f"{uprn}_{street_post}_{doc_name}{ext}"
|
||||
return f"{uprn}_{street_post}{ext}"
|
||||
|
||||
|
||||
def process_folder(
|
||||
sp_client: DomnaSharepointClient,
|
||||
folder_path: str,
|
||||
uprn: str,
|
||||
address: str,
|
||||
postcode: str,
|
||||
) -> None:
|
||||
try:
|
||||
contents = sp_client.get_folders_in_path(folder_path)
|
||||
except ValueError:
|
||||
logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
|
||||
return
|
||||
|
||||
for item in contents.get("value", []):
|
||||
if "folder" in item:
|
||||
process_folder(
|
||||
sp_client, f"{folder_path}/{item['name']}", uprn, address, postcode
|
||||
)
|
||||
elif "file" in item:
|
||||
original_name: str = item["name"]
|
||||
new_name = build_canonical_filename(uprn, address, postcode, original_name)
|
||||
|
||||
if new_name is None:
|
||||
continue
|
||||
|
||||
if DRY_RUN:
|
||||
logger.info(
|
||||
f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
else:
|
||||
try:
|
||||
sp_client.rename_file(item["id"], new_name)
|
||||
logger.info(
|
||||
f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
|
||||
|
||||
with open(CSV_PATH, newline="", encoding="utf-8-sig") as f:
|
||||
reader = csv.DictReader(f)
|
||||
required = {"UPRN", "Address", "Postcode"}
|
||||
if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
|
||||
raise ValueError(
|
||||
f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
|
||||
)
|
||||
|
||||
for row in reader:
|
||||
uprn = row["UPRN"].strip()
|
||||
address = row["Address"].strip()
|
||||
postcode = row["Postcode"].strip()
|
||||
folder_path = (
|
||||
f"{BASE_PATH}/{address}, {postcode}"
|
||||
f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
|
||||
)
|
||||
process_folder(sp_client, folder_path, uprn, address, postcode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
tests/orchestration/test_build_canonical_filename.py
Normal file
106
tests/orchestration/test_build_canonical_filename.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# scripts/tests/test_build_canonical_filename.py
|
||||
from orchestration.sharepoint_renamer_orchestrator import build_canonical_filename
|
||||
|
||||
UPRN = "10093456789"
|
||||
ADDRESS = "1 High Street, Anytown"
|
||||
POSTCODE = "SW1A 1AA"
|
||||
STREET = "1 High Street"
|
||||
|
||||
|
||||
def test_already_renamed_returns_none() -> None:
|
||||
# Arrange
|
||||
original = f"{UPRN}_High Street SW1A 1AA_EPC Report.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_address_postcode_prefix_stripped() -> None:
|
||||
# Arrange
|
||||
original = f"{ADDRESS} {POSTCODE} - EPC Report.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
|
||||
|
||||
|
||||
def test_address_only_prefix_stripped() -> None:
|
||||
# Arrange
|
||||
original = f"{ADDRESS} - EPC Report.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
|
||||
|
||||
|
||||
def test_street_postcode_prefix_stripped() -> None:
|
||||
# Arrange
|
||||
original = f"{STREET} {POSTCODE} - EPC Report.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
|
||||
|
||||
|
||||
def test_street_only_prefix_stripped() -> None:
|
||||
# Arrange
|
||||
original = f"{STREET} - EPC Report.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
|
||||
|
||||
|
||||
def test_dash_separator_removed_after_prefix_strip() -> None:
|
||||
# Arrange – " - " separator between prefix and doc name
|
||||
original = f"{STREET} {POSTCODE} - Floor Plan.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_Floor Plan.pdf"
|
||||
|
||||
|
||||
def test_underscore_separator_removed_after_prefix_strip() -> None:
|
||||
# Arrange – " _ " separator between prefix and doc name
|
||||
original = f"{STREET} {POSTCODE} _ Floor Plan.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_Floor Plan.pdf"
|
||||
|
||||
|
||||
def test_no_recognised_prefix_preserves_stem() -> None:
|
||||
# Arrange
|
||||
original = "Completely Different Name.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}_Completely Different Name.pdf"
|
||||
|
||||
|
||||
def test_no_doc_name_after_strip_omits_trailing_separator() -> None:
|
||||
# Arrange – stem is exactly the address prefix with no trailing doc name
|
||||
original = f"{STREET} {POSTCODE}.pdf"
|
||||
|
||||
# Act
|
||||
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
|
||||
|
||||
# Assert
|
||||
assert result == f"{UPRN}_{STREET} {POSTCODE}.pdf"
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
from typing import Any
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
import scripts.rename_sharepoint_files as module
|
||||
from scripts.rename_sharepoint_files import build_canonical_filename, process_folder
|
||||
from orchestration.sharepoint_renamer_orchestrator import (
|
||||
SharepointRenamerOrchestrator,
|
||||
build_canonical_filename,
|
||||
)
|
||||
|
||||
|
||||
def _make_file(name: str, item_id: str = "id-1") -> dict[str, Any]:
|
||||
|
|
@ -19,6 +21,12 @@ def _make_package(name: str) -> dict[str, Any]:
|
|||
return {"name": name, "package": {}}
|
||||
|
||||
|
||||
def _make_orchestrator(sp: MagicMock) -> SharepointRenamerOrchestrator:
|
||||
orchestrator = SharepointRenamerOrchestrator.__new__(SharepointRenamerOrchestrator)
|
||||
orchestrator._sp_client = sp
|
||||
return orchestrator
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_canonical_filename
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -39,7 +47,7 @@ def test_no_prefix_still_canonical() -> None:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — files only at root level
|
||||
# _process_folder — files only at root level
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -52,8 +60,7 @@ def test_renames_top_level_files(caplog: pytest.LogCaptureFixture) -> None:
|
|||
]
|
||||
}
|
||||
|
||||
with patch.object(module, "DRY_RUN", False):
|
||||
process_folder(sp, "some/path", "100", "1 High St", "AB1 2CD")
|
||||
_make_orchestrator(sp)._process_folder("some/path", "100", "1 High St", "AB1 2CD")
|
||||
|
||||
assert sp.rename_file.call_count == 2
|
||||
sp.rename_file.assert_any_call("id-1", "100_1 High St AB1 2CD_Survey.pdf")
|
||||
|
|
@ -61,7 +68,7 @@ def test_renames_top_level_files(caplog: pytest.LogCaptureFixture) -> None:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — recursive two-level hierarchy
|
||||
# _process_folder — recursive two-level hierarchy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -84,8 +91,7 @@ def test_recurses_into_subfolders_and_renames_all_files() -> None:
|
|||
root_contents if path == "base/path" else suba_contents
|
||||
)
|
||||
|
||||
with patch.object(module, "DRY_RUN", False):
|
||||
process_folder(sp, "base/path", "200", "2 Main Rd", "XY9 8ZW")
|
||||
_make_orchestrator(sp)._process_folder("base/path", "200", "2 Main Rd", "XY9 8ZW")
|
||||
|
||||
assert sp.rename_file.call_count == 2
|
||||
sp.rename_file.assert_any_call("root-file", "200_2 Main Rd XY9 8ZW_Root.pdf")
|
||||
|
|
@ -95,25 +101,22 @@ def test_recurses_into_subfolders_and_renames_all_files() -> None:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — non-file, non-folder items are skipped
|
||||
# _process_folder — non-file, non-folder items are skipped
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ignores_package_items() -> None:
|
||||
sp = MagicMock()
|
||||
sp.get_folders_in_path.return_value = {
|
||||
"value": [_make_package("Notebook")]
|
||||
}
|
||||
sp.get_folders_in_path.return_value = {"value": [_make_package("Notebook")]}
|
||||
|
||||
with patch.object(module, "DRY_RUN", False):
|
||||
process_folder(sp, "some/path", "300", "3 Oak Ave", "ZZ1 1ZZ")
|
||||
_make_orchestrator(sp)._process_folder("some/path", "300", "3 Oak Ave", "ZZ1 1ZZ")
|
||||
|
||||
sp.rename_file.assert_not_called()
|
||||
assert sp.get_folders_in_path.call_count == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — missing folder
|
||||
# _process_folder — missing folder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -121,31 +124,14 @@ def test_missing_folder_logs_warning_and_returns(caplog: pytest.LogCaptureFixtur
|
|||
sp = MagicMock()
|
||||
sp.get_folders_in_path.side_effect = ValueError("not found")
|
||||
|
||||
with patch.object(module, "DRY_RUN", False):
|
||||
process_folder(sp, "missing/path", "400", "4 Elm St", "AA2 2BB")
|
||||
_make_orchestrator(sp)._process_folder("missing/path", "400", "4 Elm St", "AA2 2BB")
|
||||
|
||||
sp.rename_file.assert_not_called()
|
||||
assert any("Missing folder" in r.message and "400" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — dry run
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dry_run_logs_without_renaming(caplog: pytest.LogCaptureFixture) -> None:
|
||||
sp = MagicMock()
|
||||
sp.get_folders_in_path.return_value = {"value": [_make_file("Doc.pdf", "id-x")]}
|
||||
|
||||
with patch.object(module, "DRY_RUN", True):
|
||||
process_folder(sp, "some/path", "500", "5 Pine Ln", "BB3 3CC")
|
||||
|
||||
sp.rename_file.assert_not_called()
|
||||
assert any("[DRY RUN]" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_folder — already-canonical files are skipped
|
||||
# _process_folder — already-canonical files are skipped
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -155,7 +141,6 @@ def test_skips_already_canonical_files() -> None:
|
|||
"value": [_make_file("500_Pine Ln BB3 3CC_Doc.pdf", "id-y")]
|
||||
}
|
||||
|
||||
with patch.object(module, "DRY_RUN", False):
|
||||
process_folder(sp, "some/path", "500", "5 Pine Ln", "BB3 3CC")
|
||||
_make_orchestrator(sp)._process_folder("some/path", "500", "5 Pine Ln", "BB3 3CC")
|
||||
|
||||
sp.rename_file.assert_not_called()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue