diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 01ae189f..e668ba7f 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -38,24 +38,33 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { def core_file_for( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: + # Identify retrofit design doc using evidence category as the name is possibly unreliable. + # We might change to always use evidence category, but needs more investigation if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC + if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: return CoreFiles.IMPROVEMENT_OPTION_EVALUATION + if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename: return CoreFiles.RETROFIT_DESIGN_DOC + _prefix_skip = { CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, } + for core_file in CoreFiles: if core_file in _prefix_skip: continue + if filename.startswith(core_file.value): return core_file + return None diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 09fcdcb2..ee91298e 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,62 +1,58 @@ from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for -def test_infer_photopack(): +def test_file_type_for_photopack(): assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" -def test_infer_sitenote(): +def test_file_type_for_sitenote(): assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" -def test_infer_rdsap_sitenote(): +def test_file_type_for_rdsap_sitenote(): assert ( file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" ) -def test_infer_pas2023_ventilation(): +def test_file_type_for_pas2023_ventilation(): assert ( file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) -def test_infer_pas2023_condition(): +def test_file_type_for_pas2023_condition(): assert ( file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" ) -def test_infer_pas_significance(): +def test_file_type_for_pas_significance(): assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" -def test_infer_par_photopack(): +def test_file_type_for_par_photopack(): assert ( file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" ) -def test_infer_pas2023_property(): +def test_file_type_for_pas2023_property(): assert ( file_type_for("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) -def test_infer_pas2023_occupancy(): +def test_file_type_for_pas2023_occupancy(): assert ( file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) -def test_infer_unknown_returns_none(): - assert file_type_for("unknown_document_123.pdf") is None - - -def test_infer_improvement_option_evaluation(): +def test_file_type_for_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") @@ -64,7 +60,7 @@ def test_infer_improvement_option_evaluation(): ) -def test_infer_medium_term_improvement_plan(): +def test_file_type_for_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") @@ -72,7 +68,7 @@ def test_infer_medium_term_improvement_plan(): ) -def test_infer_retrofit_design_doc(): +def test_file_type_for_retrofit_design_doc(): assert ( file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc"