From faf698eb7162af4a2f08da1379d5ce3f1be41444 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 10:57:37 +0000 Subject: [PATCH] rename functions and include typehints --- backend/pashub_fetcher/core_files.py | 6 +- backend/pashub_fetcher/pashub_client.py | 7 ++- backend/pashub_fetcher/pashub_service.py | 4 +- .../pashub_fetcher/tests/test_core_files.py | 61 +++++++++++-------- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index e668ba7f..30aa2ba8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -35,7 +35,7 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { } -def core_file_for( +def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: # Identify retrofit design doc using evidence category as the name is possibly unreliable. @@ -68,8 +68,8 @@ def core_file_for( return None -def file_type_for(filename: str) -> Optional[str]: - core_file = core_file_for(filename) +def get_file_type_string(filename: str) -> Optional[str]: + core_file = get_core_file_type(filename) if core_file is None: return None return _CORE_FILE_TO_FILE_TYPE[core_file] diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index f851c410..7896664d 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -5,12 +5,11 @@ from datetime import datetime import requests -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for +from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata from utils.logger import setup_logger - logger = setup_logger() @@ -93,7 +92,9 @@ class PashubClient: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) for file in files: - core_type = core_file_for(file.file_name, file.evidence_category) + core_type: Optional[CoreFiles] = get_core_file_type( + file.file_name, file.evidence_category + ) if not core_type: continue grouped[core_type].append(file) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index ec623f7a..b3302fd9 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import ( ) from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf -from backend.pashub_fetcher.core_files import file_type_for +from backend.pashub_fetcher.core_files import get_file_type_string from backend.pashub_fetcher.pashub_client import PashubClient from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -109,7 +109,7 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, - file_type=file_type_for(filename), + file_type=get_file_type_string(filename), ) file_paths.append(file_path) uploaded_files.append(uploaded_file) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index ee91298e..3c1d11b8 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,53 +1,60 @@ -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for +from backend.pashub_fetcher.core_files import ( + CoreFiles, + get_core_file_type, + get_file_type_string, +) def test_file_type_for_photopack(): - assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" + assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack" def test_file_type_for_sitenote(): - assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" + assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note" def test_file_type_for_rdsap_sitenote(): assert ( - file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") + == "rd_sap_site_note" ) def test_file_type_for_pas2023_ventilation(): assert ( - file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) def test_file_type_for_pas2023_condition(): assert ( - file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + get_file_type_string("PAS 2023 Condition Report_123456.pdf") + == "pas_2023_condition" ) def test_file_type_for_pas_significance(): - assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" + assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance" def test_file_type_for_par_photopack(): assert ( - file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf") + == "par_photo_pack" ) def test_file_type_for_pas2023_property(): assert ( - file_type_for("PAS 2023 Property Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) def test_file_type_for_pas2023_occupancy(): assert ( - file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) @@ -55,7 +62,7 @@ def test_file_type_for_pas2023_occupancy(): def test_file_type_for_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( - file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" ) @@ -63,18 +70,20 @@ def test_file_type_for_improvement_option_evaluation(): def test_file_type_for_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( - file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + get_file_type_string( + "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf" + ) == "medium_term_improvement_plan" ) def test_file_type_for_retrofit_design_doc(): assert ( - file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") + get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" ) assert ( - file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") + get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" ) @@ -89,7 +98,7 @@ def test_core_file_for_evidence_category_match_is_case_insensitive() -> None: filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="Retrofit Design") + result = get_core_file_type(filename, evidence_category="Retrofit Design") # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -100,7 +109,7 @@ def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None: filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="retrofit design") + result = get_core_file_type(filename, evidence_category="retrofit design") # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -111,7 +120,7 @@ def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION @@ -122,18 +131,20 @@ def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN -def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> None: +def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> ( + None +): # Arrange filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -144,7 +155,7 @@ def test_core_file_for_prefix_returns_photopack() -> None: filename = "Photopack_123456_V1.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.PHOTOPACK @@ -155,18 +166,20 @@ def test_core_file_for_unknown_filename_returns_none() -> None: filename = "unknown_document_123.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result is None -def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> None: +def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> ( + None +): # Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="some other category") + result = get_core_file_type(filename, evidence_category="some other category") # Assert assert result is None