From 5e31c0f3dadd4d4da36dc023612388ef66f5b4c9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:51:28 +0000 Subject: [PATCH] =?UTF-8?q?file=5Ftype=5Ffor=20delegates=20to=20core=5Ffil?= =?UTF-8?q?e=5Ffor;=20=5FMATCHERS=20removed=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 67 ++++++------------- backend/pashub_fetcher/pashub_service.py | 4 +- .../pashub_fetcher/tests/test_core_files.py | 30 ++++----- 3 files changed, 37 insertions(+), 64 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 87a4044a..01ae189f 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Callable, Optional +from typing import Optional from backend.app.db.models.uploaded_file import FileTypeEnum @@ -19,47 +19,20 @@ class CoreFiles(Enum): RETROFIT_DESIGN_DOC = "Retrofit Design Doc" -_MATCHERS: list[tuple[Callable[[str], bool], str]] = [ - (lambda f: f.startswith(CoreFiles.PHOTOPACK.value), FileTypeEnum.PHOTO_PACK.value), - (lambda f: f.startswith(CoreFiles.SITENOTE.value), FileTypeEnum.SITE_NOTE.value), - ( - lambda f: f.startswith(CoreFiles.RDSAP_SITENOTE.value), - FileTypeEnum.RD_SAP_SITE_NOTE.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_VENTILATION.value), - FileTypeEnum.PAS_2023_VENTILATION.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_CONDITION.value), - FileTypeEnum.PAS_2023_CONDITION.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS_SIGNIFICANCE.value), - FileTypeEnum.PAS_SIGNIFICANCE.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAR_PHOTOPACK.value), - FileTypeEnum.PAR_PHOTO_PACK.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_PROPERTY.value), - FileTypeEnum.PAS_2023_PROPERTY.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_OCCUPANCY.value), - FileTypeEnum.PAS_2023_OCCUPANCY.value, - ), - ( - lambda f: CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in f, - FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, - ), - ( - lambda f: CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in f, - FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, - ), - (lambda f: "-OSM-" in f and "DR-N-A" in f, FileTypeEnum.RETROFIT_DESIGN_DOC.value), -] +_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { + CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value, + CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value, + CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value, + CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value, + CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value, + CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value, + CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, + CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, + CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, + CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, + CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, + CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, +} def core_file_for( @@ -86,8 +59,8 @@ def core_file_for( return None -def infer_file_type(filename: str) -> Optional[str]: - for matcher, file_type in _MATCHERS: - if matcher(filename): - return file_type - return None +def file_type_for(filename: str) -> Optional[str]: + core_file = core_file_for(filename) + if core_file is None: + return None + return _CORE_FILE_TO_FILE_TYPE[core_file] diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 316902f4..ec623f7a 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import ( ) from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf -from backend.pashub_fetcher.core_files import infer_file_type +from backend.pashub_fetcher.core_files import file_type_for from backend.pashub_fetcher.pashub_client import PashubClient from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -109,7 +109,7 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, - file_type=infer_file_type(filename), + file_type=file_type_for(filename), ) file_paths.append(file_path) uploaded_files.append(uploaded_file) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index e97df476..09fcdcb2 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,65 +1,65 @@ -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, infer_file_type +from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for def test_infer_photopack(): - assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack" + assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" def test_infer_sitenote(): - assert infer_file_type("SiteNote_123456_V1.pdf") == "site_note" + assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" def test_infer_rdsap_sitenote(): assert ( - infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" ) def test_infer_pas2023_ventilation(): assert ( - infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") + file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) def test_infer_pas2023_condition(): assert ( - infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" ) def test_infer_pas_significance(): - assert infer_file_type("PAS Significance_123456.pdf") == "pas_significance" + assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" def test_infer_par_photopack(): assert ( - infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" ) def test_infer_pas2023_property(): assert ( - infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") + file_type_for("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) def test_infer_pas2023_occupancy(): assert ( - infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") + file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) def test_infer_unknown_returns_none(): - assert infer_file_type("unknown_document_123.pdf") is None + assert file_type_for("unknown_document_123.pdf") is None def test_infer_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( - infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" ) @@ -67,18 +67,18 @@ def test_infer_improvement_option_evaluation(): def test_infer_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( - infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan" ) def test_infer_retrofit_design_doc(): assert ( - infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") + file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" ) assert ( - infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") + file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" )