SharePoint renamer build_canonical_filename behaviour verified by tests 🟩

This commit is contained in:
Daniel Roth 2026-06-15 10:48:17 +00:00
parent 9daf6a8668
commit 383b8b0c37
3 changed files with 108 additions and 0 deletions

View file

@ -25,5 +25,7 @@ testpaths =
etl/epc_clean/tests
etl/hubspot/tests
etl/spatial/tests
scripts/tests
; tests/
markers =
integration: mark a test as an integration test

View file

View file

@ -0,0 +1,106 @@
# scripts/tests/test_build_canonical_filename.py
from scripts.rename_sharepoint_files import build_canonical_filename
UPRN = "10093456789"
ADDRESS = "1 High Street, Anytown"
POSTCODE = "SW1A 1AA"
STREET = "1 High Street"
def test_already_renamed_returns_none() -> None:
# Arrange
original = f"{UPRN}_High Street SW1A 1AA_EPC Report.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result is None
def test_address_postcode_prefix_stripped() -> None:
# Arrange
original = f"{ADDRESS} {POSTCODE} - EPC Report.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
def test_address_only_prefix_stripped() -> None:
# Arrange
original = f"{ADDRESS} - EPC Report.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
def test_street_postcode_prefix_stripped() -> None:
# Arrange
original = f"{STREET} {POSTCODE} - EPC Report.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
def test_street_only_prefix_stripped() -> None:
# Arrange
original = f"{STREET} - EPC Report.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_EPC Report.pdf"
def test_dash_separator_removed_after_prefix_strip() -> None:
# Arrange " - " separator between prefix and doc name
original = f"{STREET} {POSTCODE} - Floor Plan.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_Floor Plan.pdf"
def test_underscore_separator_removed_after_prefix_strip() -> None:
# Arrange " _ " separator between prefix and doc name
original = f"{STREET} {POSTCODE} _ Floor Plan.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_Floor Plan.pdf"
def test_no_recognised_prefix_preserves_stem() -> None:
# Arrange
original = "Completely Different Name.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}_Completely Different Name.pdf"
def test_no_doc_name_after_strip_omits_trailing_separator() -> None:
# Arrange stem is exactly the address prefix with no trailing doc name
original = f"{STREET} {POSTCODE}.pdf"
# Act
result = build_canonical_filename(UPRN, ADDRESS, POSTCODE, original)
# Assert
assert result == f"{UPRN}_{STREET} {POSTCODE}.pdf"