From e3159665653557584edbe681a371c42b4a044a2f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 12:29:25 +0000 Subject: [PATCH 01/39] add coordination and design document types to enums --- backend/app/db/models/uploaded_file.py | 3 +++ backend/pashub_fetcher/core_files.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index c629f574..f3cfee79 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -18,6 +18,9 @@ class FileTypeEnum(enum.Enum): ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note" ECMK_SURVEY_XML = "ecmk_survey_xml" MAGIC_PLAN_JSON = "magic_plan_json" + IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" + MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" + RETROFIT_DESIGN_DOC = "retrofit_design_doc" class FileSourceEnum(enum.Enum): diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 4da10661..aa426475 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -14,6 +14,9 @@ class CoreFiles(Enum): PAR_PHOTOPACK = "PAR Photo Pack" PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" + IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation" + MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan" + RETROFIT_DESIGN_DOC = "Retrofit Design Doc" CORE_TO_FILETYPE_MAP = { @@ -26,6 +29,9 @@ CORE_TO_FILETYPE_MAP = { CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, + CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, + CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, + CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, } From e3646162de686884b17da231a26eeeaa3c4cdc41 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 13:09:40 +0000 Subject: [PATCH 02/39] =?UTF-8?q?new=20files=20types=20inferred=20from=20f?= =?UTF-8?q?ile=20names=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pashub_fetcher/tests/test_core_files.py | 64 +++++++++++++++++++ pytest.ini | 23 ++++++- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 backend/pashub_fetcher/tests/test_core_files.py diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py new file mode 100644 index 00000000..fca29b7e --- /dev/null +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -0,0 +1,64 @@ +import pytest + +from backend.pashub_fetcher.core_files import infer_file_type + + +# --- GREEN: pre-existing file types (startswith match) --- + + +def test_infer_photopack(): + assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack" + + +def test_infer_sitenote(): + assert infer_file_type("SiteNote_123456_V1.pdf") == "site_note" + + +def test_infer_rdsap_sitenote(): + assert infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + + +def test_infer_pas2023_ventilation(): + assert infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" + + +def test_infer_pas2023_condition(): + assert infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + + +def test_infer_pas_significance(): + assert infer_file_type("PAS Significance_123456.pdf") == "pas_significance" + + +def test_infer_par_photopack(): + assert infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + + +def test_infer_pas2023_property(): + assert infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" + + +def test_infer_pas2023_occupancy(): + assert infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" + + +def test_infer_unknown_returns_none(): + assert infer_file_type("unknown_document_123.pdf") is None + + +# --- RED: new file types (suffix match not yet implemented) --- + + +def test_infer_improvement_option_evaluation(): + # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" + assert infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" + + +def test_infer_medium_term_improvement_plan(): + # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" + assert infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan" + + +@pytest.mark.skip(reason="Retrofit Design Doc filename pattern not yet known") +def test_infer_retrofit_design_doc(): + assert infer_file_type("2512-OSM-H56M900-XX-DR-N-A_Radford Road 408.pdf") == "retrofit_design_doc" diff --git a/pytest.ini b/pytest.ini index e2a4a25d..99cc8e1b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,27 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests +testpaths = + recommendations/tests + backend/tests + backend/address2UPRN/tests + backend/app/db/functions/tests + backend/categorisation/tests + backend/condition/tests + backend/documents_parser/tests + backend/ecmk_fetcher/tests + backend/export/tests + backend/magic_plan/tests + backend/onboarders/tests + backend/pashub_fetcher/tests + datatypes/epc/domain/tests + datatypes/epc/schema/tests + datatypes/epc/surveys/tests + datatypes/magicplan/api/tests + datatypes/magicplan/domain/tests + etl/epc/tests + etl/epc_clean/tests + etl/hubspot/tests + etl/spatial/tests markers = integration: mark a test as an integration test From b3a68a264a08af77fc047f97f9adb7453b77f037 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 13:32:54 +0000 Subject: [PATCH 03/39] =?UTF-8?q?new=20files=20types=20inferred=20from=20f?= =?UTF-8?q?ile=20names=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 10 ++++++++++ backend/pashub_fetcher/tests/test_core_files.py | 6 ++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index aa426475..b5ce1073 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -39,4 +39,14 @@ def infer_file_type(filename: str) -> Optional[str]: for core_file, file_type in CORE_TO_FILETYPE_MAP.items(): if filename.startswith(core_file.value): return file_type + + if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: + return CORE_TO_FILETYPE_MAP[CoreFiles.IMPROVEMENT_OPTION_EVALUATION] + + if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: + return CORE_TO_FILETYPE_MAP[CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN] + + if "-OSM-" in filename and "DR-N-A" in filename: + return CORE_TO_FILETYPE_MAP[CoreFiles.RETROFIT_DESIGN_DOC] + return None diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index fca29b7e..f8e8b431 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,5 +1,3 @@ -import pytest - from backend.pashub_fetcher.core_files import infer_file_type @@ -59,6 +57,6 @@ def test_infer_medium_term_improvement_plan(): assert infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan" -@pytest.mark.skip(reason="Retrofit Design Doc filename pattern not yet known") def test_infer_retrofit_design_doc(): - assert infer_file_type("2512-OSM-H56M900-XX-DR-N-A_Radford Road 408.pdf") == "retrofit_design_doc" + assert infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" + assert infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" From 39c5fd57693e6ceb5af2ce0bac7d1e53e7aca7e1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 13:41:41 +0000 Subject: [PATCH 04/39] =?UTF-8?q?new=20files=20types=20inferred=20from=20f?= =?UTF-8?q?ile=20names=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 71 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index b5ce1073..3e69bf9a 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Optional +from typing import Callable, Optional from backend.app.db.models.uploaded_file import FileTypeEnum @@ -19,34 +19,51 @@ class CoreFiles(Enum): RETROFIT_DESIGN_DOC = "Retrofit Design Doc" -CORE_TO_FILETYPE_MAP = { - CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value, - CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value, - CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value, - CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value, - CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value, - CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value, - CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, - CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, - CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, - CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, - CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, - CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, -} +_MATCHERS: list[tuple[Callable[[str], bool], str]] = [ + (lambda f: f.startswith(CoreFiles.PHOTOPACK.value), FileTypeEnum.PHOTO_PACK.value), + (lambda f: f.startswith(CoreFiles.SITENOTE.value), FileTypeEnum.SITE_NOTE.value), + ( + lambda f: f.startswith(CoreFiles.RDSAP_SITENOTE.value), + FileTypeEnum.RD_SAP_SITE_NOTE.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAS2023_VENTILATION.value), + FileTypeEnum.PAS_2023_VENTILATION.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAS2023_CONDITION.value), + FileTypeEnum.PAS_2023_CONDITION.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAS_SIGNIFICANCE.value), + FileTypeEnum.PAS_SIGNIFICANCE.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAR_PHOTOPACK.value), + FileTypeEnum.PAR_PHOTO_PACK.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAS2023_PROPERTY.value), + FileTypeEnum.PAS_2023_PROPERTY.value, + ), + ( + lambda f: f.startswith(CoreFiles.PAS2023_OCCUPANCY.value), + FileTypeEnum.PAS_2023_OCCUPANCY.value, + ), + ( + lambda f: CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in f, + FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, + ), + ( + lambda f: CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in f, + FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, + ), + (lambda f: "-OSM-" in f and "DR-N-A" in f, FileTypeEnum.RETROFIT_DESIGN_DOC.value), +] def infer_file_type(filename: str) -> Optional[str]: - for core_file, file_type in CORE_TO_FILETYPE_MAP.items(): - if filename.startswith(core_file.value): + for matcher, file_type in _MATCHERS: + if matcher(filename): return file_type - - if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: - return CORE_TO_FILETYPE_MAP[CoreFiles.IMPROVEMENT_OPTION_EVALUATION] - - if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: - return CORE_TO_FILETYPE_MAP[CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN] - - if "-OSM-" in filename and "DR-N-A" in filename: - return CORE_TO_FILETYPE_MAP[CoreFiles.RETROFIT_DESIGN_DOC] - return None From df0f089d4f65d1107d69195820706205380d7e66 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:05:20 +0000 Subject: [PATCH 05/39] =?UTF-8?q?Retrofit=20design=20doc=20selected=20by?= =?UTF-8?q?=20evidence=5Fcategory=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 backend/pashub_fetcher/tests/test_pashub_client.py diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py new file mode 100644 index 00000000..4f5aef98 --- /dev/null +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -0,0 +1,44 @@ +from typing import Optional + +from backend.pashub_fetcher.core_files import CoreFiles +from backend.pashub_fetcher.evidence_file_data import EvidenceFileData +from backend.pashub_fetcher.pashub_client import PashubClient + + +def make_client() -> PashubClient: + return PashubClient(token="test-token") + + +def make_file( + file_name: str = "unknown.pdf", + evidence_category: Optional[str] = None, + created_utc: str = "2024-01-01T00:00:00", +) -> EvidenceFileData: + return EvidenceFileData( + file_id="id-1", + file_name=file_name, + created_utc=created_utc, + file_size=1024, + file_extension="pdf", + evidence_category=evidence_category, + ) + + +# --------------------------------------------------------------------------- +# _get_core_file_type +# --------------------------------------------------------------------------- + + +def test_get_core_file_type_returns_retrofit_design_doc_for_evidence_category() -> None: + # Arrange + client = make_client() + file = make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="retrofit design", + ) + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC From f2bbb44207cc9971e8a04436dd8591d16846c2ef Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:10:56 +0000 Subject: [PATCH 06/39] =?UTF-8?q?Retrofit=20design=20doc=20selected=20by?= =?UTF-8?q?=20evidence=5Fcategory=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 20b8590d..11195960 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -87,6 +87,9 @@ class PashubClient: return None def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: + if file.evidence_category == "retrofit design": + return CoreFiles.RETROFIT_DESIGN_DOC + for core_file in CoreFiles: if file.file_name.startswith(core_file.value): return core_file From 157a36f0cd5801799d2df54cd836b12894b56284 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:14:07 +0000 Subject: [PATCH 07/39] =?UTF-8?q?Evidence=20category=20matching=20is=20cas?= =?UTF-8?q?e-insensitive=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pashub_fetcher/tests/test_pashub_client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 4f5aef98..ccf32fa6 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -42,3 +42,18 @@ def test_get_core_file_type_returns_retrofit_design_doc_for_evidence_category() # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC + + +def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: + # Arrange + client = make_client() + file = make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="Retrofit Design", + ) + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC From 6922ff3e06be9dd1f12b4914aeaa960e25ee08d9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:16:14 +0000 Subject: [PATCH 08/39] =?UTF-8?q?Evidence=20category=20matching=20is=20cas?= =?UTF-8?q?e-insensitive=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 11195960..d7200a1f 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -87,7 +87,7 @@ class PashubClient: return None def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: - if file.evidence_category == "retrofit design": + if file.evidence_category is not None and file.evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC for core_file in CoreFiles: From 5c652d94852476d469436064dbc940ae7c62f46a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:24:14 +0000 Subject: [PATCH 09/39] =?UTF-8?q?Retrofit=20Design=20Doc=20startswith=20ch?= =?UTF-8?q?eck=20removed=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_pashub_client.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index ccf32fa6..8654a137 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -44,6 +44,18 @@ def test_get_core_file_type_returns_retrofit_design_doc_for_evidence_category() assert result == CoreFiles.RETROFIT_DESIGN_DOC +def test_get_core_file_type_returns_improvement_option_evaluation_via_substring() -> None: + # Arrange + client = make_client() + file = make_file(file_name="6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From a1f6ffd6b39f9b1b077cf98cf2346d2414c1c0c0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:24:34 +0000 Subject: [PATCH 10/39] =?UTF-8?q?Improvement=20Option=20Evaluation=20selec?= =?UTF-8?q?ted=20via=20substring=20match=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index d7200a1f..ba0f0221 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -90,6 +90,9 @@ class PashubClient: if file.evidence_category is not None and file.evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC + if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in file.file_name: + return CoreFiles.IMPROVEMENT_OPTION_EVALUATION + for core_file in CoreFiles: if file.file_name.startswith(core_file.value): return core_file From d99d8a33479470156c671dd440e0e5e61269380f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:25:02 +0000 Subject: [PATCH 11/39] =?UTF-8?q?Medium=20Term=20Improvement=20Plan=20sele?= =?UTF-8?q?cted=20via=20substring=20match=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_pashub_client.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 8654a137..9b99cf5c 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -56,6 +56,18 @@ def test_get_core_file_type_returns_improvement_option_evaluation_via_substring( assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION +def test_get_core_file_type_returns_medium_term_improvement_plan_via_substring() -> None: + # Arrange + client = make_client() + file = make_file(file_name="60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From 084c8218a6c5acbc532d8d41ced6cd2eb364e402 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:25:57 +0000 Subject: [PATCH 12/39] =?UTF-8?q?Medium=20Term=20Improvement=20Plan=20sele?= =?UTF-8?q?cted=20via=20substring=20match=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index ba0f0221..556884fe 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -93,6 +93,9 @@ class PashubClient: if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in file.file_name: return CoreFiles.IMPROVEMENT_OPTION_EVALUATION + if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in file.file_name: + return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + for core_file in CoreFiles: if file.file_name.startswith(core_file.value): return core_file From a8e876d83d1e5b0bf7f204a8401e76b7fafe3170 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:26:34 +0000 Subject: [PATCH 13/39] =?UTF-8?q?Prefix=20and=20unknown=20file=20matching?= =?UTF-8?q?=20behaviour=20documented=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 9b99cf5c..036e50bc 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -68,6 +68,30 @@ def test_get_core_file_type_returns_medium_term_improvement_plan_via_substring() assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN +def test_get_core_file_type_returns_photopack_via_prefix() -> None: + # Arrange + client = make_client() + file = make_file(file_name="Photopack_123456_V1.pdf") + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.PHOTOPACK + + +def test_get_core_file_type_returns_none_for_unknown_file() -> None: + # Arrange + client = make_client() + file = make_file(file_name="unknown_document_123.pdf") + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result is None + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From 506dc92aa3ccc9ef4b3b7f6ab0351e0c76dd7ec8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:27:42 +0000 Subject: [PATCH 14/39] =?UTF-8?q?=5Fselect=5Flatest=5Fcore=5Ffiles=20retur?= =?UTF-8?q?ns=20single=20retrofit=20design=20doc=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 036e50bc..334f2de0 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -1,3 +1,4 @@ +# pyright: reportPrivateUsage=false from typing import Optional from backend.pashub_fetcher.core_files import CoreFiles @@ -92,6 +93,29 @@ def test_get_core_file_type_returns_none_for_unknown_file() -> None: assert result is None +# --------------------------------------------------------------------------- +# _select_latest_core_files +# --------------------------------------------------------------------------- + + +def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="retrofit design", + created_utc="2024-06-01T00:00:00", + ) + ] + + # Act + result = client._select_latest_core_files(files) + + # Assert + assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From b685008e5ee1816588dedc096866a63764fc9c2a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:28:19 +0000 Subject: [PATCH 15/39] =?UTF-8?q?OSM=20candidate=20wins=20over=20non-OSM?= =?UTF-8?q?=20retrofit=20design=20doc=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 334f2de0..646ff3bc 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -116,6 +116,29 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" +def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: + # Arrange - the non-OSM file is newer but should lose to the OSM file + client = make_client() + files = [ + make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="retrofit design", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="Retrofit Design Doc non-osm variant.pdf", + evidence_category="retrofit design", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._select_latest_core_files(files) + + # Assert + assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From aff79d4151da0b8b0958a34b8090abac7a27260b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:28:50 +0000 Subject: [PATCH 16/39] =?UTF-8?q?OSM=20candidate=20wins=20over=20non-OSM?= =?UTF-8?q?=20retrofit=20design=20doc=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 556884fe..4435c278 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -116,6 +116,9 @@ class PashubClient: latest_files: Dict[CoreFiles, EvidenceFileData] = {} for core_type, group in grouped.items(): + if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1: + osm_candidates = [f for f in group if "-OSM-" in f.file_name] + group = osm_candidates if osm_candidates else group latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) latest_files[core_type] = latest From 3fe85a635ca94aca2af08ee71b7ee59e9495b106 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:29:24 +0000 Subject: [PATCH 17/39] =?UTF-8?q?Latest=20wins=20when=20both=20retrofit=20?= =?UTF-8?q?design=20doc=20candidates=20have=20OSM=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 646ff3bc..7f0663db 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -139,6 +139,29 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" +def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="retrofit design", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf", + evidence_category="retrofit design", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._select_latest_core_files(files) + + # Assert + assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From 9a04d89cae07671fbe182334df59e079a22f5e78 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:29:54 +0000 Subject: [PATCH 18/39] =?UTF-8?q?Latest=20wins=20as=20fallback=20when=20no?= =?UTF-8?q?=20OSM=20retrofit=20design=20doc=20candidates=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 7f0663db..9ee8948a 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -162,6 +162,29 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" +def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="retrofit_design_v1.pdf", + evidence_category="retrofit design", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="retrofit_design_v2.pdf", + evidence_category="retrofit design", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._select_latest_core_files(files) + + # Assert + assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + + def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: # Arrange client = make_client() From 16af543560f559c005f649a47b05c60cce2b2c94 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 13 May 2026 16:32:44 +0000 Subject: [PATCH 19/39] =?UTF-8?q?Consolidate=20three-tier=20matching=20and?= =?UTF-8?q?=20tidy=20test=20ordering=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 13 ++++---- .../tests/test_pashub_client.py | 30 +++++++++---------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 4435c278..25bf7b72 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -90,13 +90,16 @@ class PashubClient: if file.evidence_category is not None and file.evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC - if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in file.file_name: - return CoreFiles.IMPROVEMENT_OPTION_EVALUATION - - if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in file.file_name: - return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + for core_file in ( + CoreFiles.IMPROVEMENT_OPTION_EVALUATION, + CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, + ): + if core_file.value in file.file_name: + return core_file for core_file in CoreFiles: + if core_file is CoreFiles.RETROFIT_DESIGN_DOC: + continue if file.file_name.startswith(core_file.value): return core_file return None diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 9ee8948a..7fd10381 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -45,6 +45,21 @@ def test_get_core_file_type_returns_retrofit_design_doc_for_evidence_category() assert result == CoreFiles.RETROFIT_DESIGN_DOC +def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: + # Arrange + client = make_client() + file = make_file( + file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", + evidence_category="Retrofit Design", + ) + + # Act + result = client._get_core_file_type(file) + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC + + def test_get_core_file_type_returns_improvement_option_evaluation_via_substring() -> None: # Arrange client = make_client() @@ -183,18 +198,3 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() # Assert assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" - - -def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: - # Arrange - client = make_client() - file = make_file( - file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", - evidence_category="Retrofit Design", - ) - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.RETROFIT_DESIGN_DOC From 664c9b91fa9e280766dbadda11a065b6c044d0a9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 07:38:43 +0000 Subject: [PATCH 20/39] delete incorrect comment in test --- .../pashub_fetcher/tests/test_core_files.py | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index f8e8b431..8715f6ca 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,6 +1,5 @@ from backend.pashub_fetcher.core_files import infer_file_type - # --- GREEN: pre-existing file types (startswith match) --- @@ -13,15 +12,22 @@ def test_infer_sitenote(): def test_infer_rdsap_sitenote(): - assert infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + assert ( + infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + ) def test_infer_pas2023_ventilation(): - assert infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" + assert ( + infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") + == "pas_2023_ventilation" + ) def test_infer_pas2023_condition(): - assert infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + assert ( + infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + ) def test_infer_pas_significance(): @@ -29,34 +35,51 @@ def test_infer_pas_significance(): def test_infer_par_photopack(): - assert infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + assert ( + infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + ) def test_infer_pas2023_property(): - assert infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" + assert ( + infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") + == "pas_2023_property" + ) def test_infer_pas2023_occupancy(): - assert infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" + assert ( + infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") + == "pas_2023_occupancy" + ) def test_infer_unknown_returns_none(): assert infer_file_type("unknown_document_123.pdf") is None -# --- RED: new file types (suffix match not yet implemented) --- - - def test_infer_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" - assert infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" + assert ( + infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + == "improvement_option_evaluation" + ) def test_infer_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" - assert infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan" + assert ( + infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + == "medium_term_improvement_plan" + ) def test_infer_retrofit_design_doc(): - assert infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" - assert infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" + assert ( + infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") + == "retrofit_design_doc" + ) + assert ( + infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") + == "retrofit_design_doc" + ) From 75093fc8333b1cb2ff80cca61e4588e73a448f6a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 07:38:58 +0000 Subject: [PATCH 21/39] delete incorrect comment in test --- backend/pashub_fetcher/tests/test_core_files.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 8715f6ca..8bd31f15 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,7 +1,5 @@ from backend.pashub_fetcher.core_files import infer_file_type -# --- GREEN: pre-existing file types (startswith match) --- - def test_infer_photopack(): assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack" From 1a789ec609c4b6ca6afe3ea83e9a753687f8a0a4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:37:32 +0000 Subject: [PATCH 22/39] =?UTF-8?q?new=20core=5Ffile=5Ffor=20function=20iden?= =?UTF-8?q?tifies=20CoreFiles=20type=20from=20filename=20and=20evidence=20?= =?UTF-8?q?category=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 6 ++++++ .../pashub_fetcher/tests/test_core_files.py | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 3e69bf9a..050dde27 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -62,6 +62,12 @@ _MATCHERS: list[tuple[Callable[[str], bool], str]] = [ ] +def core_file_for( + filename: str, evidence_category: Optional[str] = None +) -> Optional[CoreFiles]: + raise NotImplementedError + + def infer_file_type(filename: str) -> Optional[str]: for matcher, file_type in _MATCHERS: if matcher(filename): diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 8bd31f15..5ac6b4f7 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,4 +1,4 @@ -from backend.pashub_fetcher.core_files import infer_file_type +from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, infer_file_type def test_infer_photopack(): @@ -81,3 +81,19 @@ def test_infer_retrofit_design_doc(): infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" ) + + +# --------------------------------------------------------------------------- +# core_file_for +# --------------------------------------------------------------------------- + + +def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None: + # Arrange + filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + # Act + result = core_file_for(filename, evidence_category="retrofit design") + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC From 9adb467a02e42d1d0a82285f1acafa4c344deb1d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:38:36 +0000 Subject: [PATCH 23/39] =?UTF-8?q?new=20core=5Ffile=5Ffor=20function=20iden?= =?UTF-8?q?tifies=20CoreFiles=20type=20from=20filename=20and=20evidence=20?= =?UTF-8?q?category=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 050dde27..07297653 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -65,6 +65,8 @@ _MATCHERS: list[tuple[Callable[[str], bool], str]] = [ def core_file_for( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: + if evidence_category is not None and evidence_category.lower() == "retrofit design": + return CoreFiles.RETROFIT_DESIGN_DOC raise NotImplementedError From e312dd26146115b467437ee60f93de4cb76125ee Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:39:11 +0000 Subject: [PATCH 24/39] =?UTF-8?q?core=5Ffile=5Ffor=20evidence=5Fcategory?= =?UTF-8?q?=20match=20is=20case-insensitive=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 5ac6b4f7..f968a976 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -88,6 +88,17 @@ def test_infer_retrofit_design_doc(): # --------------------------------------------------------------------------- +def test_core_file_for_evidence_category_match_is_case_insensitive() -> None: + # Arrange + filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + # Act + result = core_file_for(filename, evidence_category="Retrofit Design") + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC + + def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None: # Arrange filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" From 9bbd5f1ff9fc0810383c73a2d7bc8863c4f2c258 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:39:58 +0000 Subject: [PATCH 25/39] =?UTF-8?q?core=5Ffile=5Ffor=20identifies=20IOE=20fi?= =?UTF-8?q?les=20via=20filename=20substring=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index f968a976..c6970def 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -108,3 +108,14 @@ def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None: # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC + + +def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> None: + # Arrange + filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf" + + # Act + result = core_file_for(filename) + + # Assert + assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION From 46355be3f1e24d10662583afa3b8b55f3a1d8cc6 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:40:21 +0000 Subject: [PATCH 26/39] =?UTF-8?q?core=5Ffile=5Ffor=20identifies=20IOE=20fi?= =?UTF-8?q?les=20via=20filename=20substring=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 07297653..72ef15f8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -67,6 +67,8 @@ def core_file_for( ) -> Optional[CoreFiles]: if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC + if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: + return CoreFiles.IMPROVEMENT_OPTION_EVALUATION raise NotImplementedError From 176239475a977943bf81e6bddb9d042bbbb5d014 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:40:49 +0000 Subject: [PATCH 27/39] =?UTF-8?q?core=5Ffile=5Ffor=20identifies=20MTIP=20f?= =?UTF-8?q?iles=20via=20filename=20substring=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index c6970def..85e7607e 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -119,3 +119,14 @@ def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> # Assert assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION + + +def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> None: + # Arrange + filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf" + + # Act + result = core_file_for(filename) + + # Assert + assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN From 4d3d6dba05477bef466f64dde09f4d88956efad0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:41:26 +0000 Subject: [PATCH 28/39] =?UTF-8?q?core=5Ffile=5Ffor=20identifies=20MTIP=20f?= =?UTF-8?q?iles=20via=20filename=20substring=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 72ef15f8..4b1023d2 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -69,6 +69,8 @@ def core_file_for( return CoreFiles.RETROFIT_DESIGN_DOC if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: return CoreFiles.IMPROVEMENT_OPTION_EVALUATION + if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: + return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN raise NotImplementedError From e940e75a43f1a3aebe8a78dc7bd06d4c648997fb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:41:52 +0000 Subject: [PATCH 29/39] =?UTF-8?q?core=5Ffile=5Ffor=20falls=20back=20to=20O?= =?UTF-8?q?SM=20filename=20pattern=20for=20Retrofit=20Design=20Doc=20?= =?UTF-8?q?=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 85e7607e..7b991c23 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -130,3 +130,14 @@ def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> # Assert assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + + +def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> None: + # Arrange + filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + # Act + result = core_file_for(filename) + + # Assert + assert result == CoreFiles.RETROFIT_DESIGN_DOC From 3ef8a591223ea50ade12b36545dda1f92542abee Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:43:04 +0000 Subject: [PATCH 30/39] =?UTF-8?q?core=5Ffile=5Ffor=20falls=20back=20to=20O?= =?UTF-8?q?SM=20filename=20pattern=20for=20Retrofit=20Design=20Doc=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 4b1023d2..75981cb1 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -71,6 +71,8 @@ def core_file_for( return CoreFiles.IMPROVEMENT_OPTION_EVALUATION if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename: + return CoreFiles.RETROFIT_DESIGN_DOC raise NotImplementedError From a2dc945bf38005826a6bc713e3d93ca30b5a79e0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:43:41 +0000 Subject: [PATCH 31/39] =?UTF-8?q?core=5Ffile=5Ffor=20matches=20remaining?= =?UTF-8?q?=20core=20file=20types=20via=20filename=20prefix=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 7b991c23..f87d8679 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -141,3 +141,14 @@ def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_ # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC + + +def test_core_file_for_prefix_returns_photopack() -> None: + # Arrange + filename = "Photopack_123456_V1.pdf" + + # Act + result = core_file_for(filename) + + # Assert + assert result == CoreFiles.PHOTOPACK From 605f2e3d1e1f5bdc3ceaa953fd7150f938ade72f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:45:18 +0000 Subject: [PATCH 32/39] =?UTF-8?q?core=5Ffile=5Ffor=20matches=20remaining?= =?UTF-8?q?=20core=20file=20types=20via=20filename=20prefix=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 75981cb1..87a4044a 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -73,7 +73,17 @@ def core_file_for( return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename: return CoreFiles.RETROFIT_DESIGN_DOC - raise NotImplementedError + _prefix_skip = { + CoreFiles.RETROFIT_DESIGN_DOC, + CoreFiles.IMPROVEMENT_OPTION_EVALUATION, + CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, + } + for core_file in CoreFiles: + if core_file in _prefix_skip: + continue + if filename.startswith(core_file.value): + return core_file + return None def infer_file_type(filename: str) -> Optional[str]: From d4cc00b5e31d7b6653ccb0a7f1307b2638dc2a12 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:46:10 +0000 Subject: [PATCH 33/39] =?UTF-8?q?core=5Ffile=5Ffor=20returns=20None=20for?= =?UTF-8?q?=20unrecognised=20filenames=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index f87d8679..2b20803c 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -152,3 +152,14 @@ def test_core_file_for_prefix_returns_photopack() -> None: # Assert assert result == CoreFiles.PHOTOPACK + + +def test_core_file_for_unknown_filename_returns_none() -> None: + # Arrange + filename = "unknown_document_123.pdf" + + # Act + result = core_file_for(filename) + + # Assert + assert result is None From 541d5965b7619090b9d1a564761e424cba37d86e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:46:48 +0000 Subject: [PATCH 34/39] =?UTF-8?q?core=5Ffile=5Ffor=20OSM=20fallback=20is?= =?UTF-8?q?=20suppressed=20when=20evidence=5Fcategory=20is=20present=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/tests/test_core_files.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 2b20803c..e97df476 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -163,3 +163,14 @@ def test_core_file_for_unknown_filename_returns_none() -> None: # Assert assert result is None + + +def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> None: + # Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design + filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + + # Act + result = core_file_for(filename, evidence_category="some other category") + + # Assert + assert result is None From 5e31c0f3dadd4d4da36dc023612388ef66f5b4c9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:51:28 +0000 Subject: [PATCH 35/39] =?UTF-8?q?file=5Ftype=5Ffor=20delegates=20to=20core?= =?UTF-8?q?=5Ffile=5Ffor;=20=5FMATCHERS=20removed=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 67 ++++++------------- backend/pashub_fetcher/pashub_service.py | 4 +- .../pashub_fetcher/tests/test_core_files.py | 30 ++++----- 3 files changed, 37 insertions(+), 64 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 87a4044a..01ae189f 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Callable, Optional +from typing import Optional from backend.app.db.models.uploaded_file import FileTypeEnum @@ -19,47 +19,20 @@ class CoreFiles(Enum): RETROFIT_DESIGN_DOC = "Retrofit Design Doc" -_MATCHERS: list[tuple[Callable[[str], bool], str]] = [ - (lambda f: f.startswith(CoreFiles.PHOTOPACK.value), FileTypeEnum.PHOTO_PACK.value), - (lambda f: f.startswith(CoreFiles.SITENOTE.value), FileTypeEnum.SITE_NOTE.value), - ( - lambda f: f.startswith(CoreFiles.RDSAP_SITENOTE.value), - FileTypeEnum.RD_SAP_SITE_NOTE.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_VENTILATION.value), - FileTypeEnum.PAS_2023_VENTILATION.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_CONDITION.value), - FileTypeEnum.PAS_2023_CONDITION.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS_SIGNIFICANCE.value), - FileTypeEnum.PAS_SIGNIFICANCE.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAR_PHOTOPACK.value), - FileTypeEnum.PAR_PHOTO_PACK.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_PROPERTY.value), - FileTypeEnum.PAS_2023_PROPERTY.value, - ), - ( - lambda f: f.startswith(CoreFiles.PAS2023_OCCUPANCY.value), - FileTypeEnum.PAS_2023_OCCUPANCY.value, - ), - ( - lambda f: CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in f, - FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, - ), - ( - lambda f: CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in f, - FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, - ), - (lambda f: "-OSM-" in f and "DR-N-A" in f, FileTypeEnum.RETROFIT_DESIGN_DOC.value), -] +_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { + CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value, + CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value, + CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value, + CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value, + CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value, + CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value, + CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, + CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, + CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, + CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, + CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, + CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, +} def core_file_for( @@ -86,8 +59,8 @@ def core_file_for( return None -def infer_file_type(filename: str) -> Optional[str]: - for matcher, file_type in _MATCHERS: - if matcher(filename): - return file_type - return None +def file_type_for(filename: str) -> Optional[str]: + core_file = core_file_for(filename) + if core_file is None: + return None + return _CORE_FILE_TO_FILE_TYPE[core_file] diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 316902f4..ec623f7a 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import ( ) from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf -from backend.pashub_fetcher.core_files import infer_file_type +from backend.pashub_fetcher.core_files import file_type_for from backend.pashub_fetcher.pashub_client import PashubClient from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -109,7 +109,7 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, - file_type=infer_file_type(filename), + file_type=file_type_for(filename), ) file_paths.append(file_path) uploaded_files.append(uploaded_file) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index e97df476..09fcdcb2 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,65 +1,65 @@ -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, infer_file_type +from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for def test_infer_photopack(): - assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack" + assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" def test_infer_sitenote(): - assert infer_file_type("SiteNote_123456_V1.pdf") == "site_note" + assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" def test_infer_rdsap_sitenote(): assert ( - infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" ) def test_infer_pas2023_ventilation(): assert ( - infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") + file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) def test_infer_pas2023_condition(): assert ( - infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" ) def test_infer_pas_significance(): - assert infer_file_type("PAS Significance_123456.pdf") == "pas_significance" + assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" def test_infer_par_photopack(): assert ( - infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" ) def test_infer_pas2023_property(): assert ( - infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") + file_type_for("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) def test_infer_pas2023_occupancy(): assert ( - infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") + file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) def test_infer_unknown_returns_none(): - assert infer_file_type("unknown_document_123.pdf") is None + assert file_type_for("unknown_document_123.pdf") is None def test_infer_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( - infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" ) @@ -67,18 +67,18 @@ def test_infer_improvement_option_evaluation(): def test_infer_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( - infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan" ) def test_infer_retrofit_design_doc(): assert ( - infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") + file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" ) assert ( - infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") + file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" ) From fb9bdbc585940e4afe714c152cfc52b48559336d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 08:53:56 +0000 Subject: [PATCH 36/39] =?UTF-8?q?=5Fselect=5Flatest=5Fcore=5Ffiles=20deleg?= =?UTF-8?q?ates=20to=20core=5Ffile=5Ffor;=20=5Fget=5Fcore=5Ffile=5Ftype=20?= =?UTF-8?q?removed=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 22 +---- .../tests/test_pashub_client.py | 83 ------------------- 2 files changed, 2 insertions(+), 103 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 25bf7b72..f851c410 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -5,7 +5,7 @@ from datetime import datetime import requests -from backend.pashub_fetcher.core_files import CoreFiles +from backend.pashub_fetcher.core_files import CoreFiles, core_file_for from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata from utils.logger import setup_logger @@ -86,24 +86,6 @@ class PashubClient: except Exception: return None - def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: - if file.evidence_category is not None and file.evidence_category.lower() == "retrofit design": - return CoreFiles.RETROFIT_DESIGN_DOC - - for core_file in ( - CoreFiles.IMPROVEMENT_OPTION_EVALUATION, - CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, - ): - if core_file.value in file.file_name: - return core_file - - for core_file in CoreFiles: - if core_file is CoreFiles.RETROFIT_DESIGN_DOC: - continue - if file.file_name.startswith(core_file.value): - return core_file - return None - def _select_latest_core_files( self, files: List[EvidenceFileData], @@ -111,7 +93,7 @@ class PashubClient: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) for file in files: - core_type = self._get_core_file_type(file) + core_type = core_file_for(file.file_name, file.evidence_category) if not core_type: continue grouped[core_type].append(file) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 7fd10381..34260c73 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -25,89 +25,6 @@ def make_file( ) -# --------------------------------------------------------------------------- -# _get_core_file_type -# --------------------------------------------------------------------------- - - -def test_get_core_file_type_returns_retrofit_design_doc_for_evidence_category() -> None: - # Arrange - client = make_client() - file = make_file( - file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", - evidence_category="retrofit design", - ) - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.RETROFIT_DESIGN_DOC - - -def test_get_core_file_type_evidence_category_match_is_case_insensitive() -> None: - # Arrange - client = make_client() - file = make_file( - file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", - evidence_category="Retrofit Design", - ) - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.RETROFIT_DESIGN_DOC - - -def test_get_core_file_type_returns_improvement_option_evaluation_via_substring() -> None: - # Arrange - client = make_client() - file = make_file(file_name="6000802 - NG4 4HD - Improvement Option Evaluation.pdf") - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION - - -def test_get_core_file_type_returns_medium_term_improvement_plan_via_substring() -> None: - # Arrange - client = make_client() - file = make_file(file_name="60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN - - -def test_get_core_file_type_returns_photopack_via_prefix() -> None: - # Arrange - client = make_client() - file = make_file(file_name="Photopack_123456_V1.pdf") - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result == CoreFiles.PHOTOPACK - - -def test_get_core_file_type_returns_none_for_unknown_file() -> None: - # Arrange - client = make_client() - file = make_file(file_name="unknown_document_123.pdf") - - # Act - result = client._get_core_file_type(file) - - # Assert - assert result is None - - # --------------------------------------------------------------------------- # _select_latest_core_files # --------------------------------------------------------------------------- From e8b7cfdcec0c62389759ba4d7ce8642994df062e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 09:01:56 +0000 Subject: [PATCH 37/39] =?UTF-8?q?remove=20redundant=20unknown-file=20test;?= =?UTF-8?q?=20rename=20test=5Finfer=5F*=20to=20test=5Ffile=5Ftype=5Ffor=5F?= =?UTF-8?q?*=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 9 ++++++ .../pashub_fetcher/tests/test_core_files.py | 28 ++++++++----------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 01ae189f..e668ba7f 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -38,24 +38,33 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { def core_file_for( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: + # Identify retrofit design doc using evidence category as the name is possibly unreliable. + # We might change to always use evidence category, but needs more investigation if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC + if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename: return CoreFiles.IMPROVEMENT_OPTION_EVALUATION + if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename: return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN + if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename: return CoreFiles.RETROFIT_DESIGN_DOC + _prefix_skip = { CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, } + for core_file in CoreFiles: if core_file in _prefix_skip: continue + if filename.startswith(core_file.value): return core_file + return None diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 09fcdcb2..ee91298e 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,62 +1,58 @@ from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for -def test_infer_photopack(): +def test_file_type_for_photopack(): assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" -def test_infer_sitenote(): +def test_file_type_for_sitenote(): assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" -def test_infer_rdsap_sitenote(): +def test_file_type_for_rdsap_sitenote(): assert ( file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" ) -def test_infer_pas2023_ventilation(): +def test_file_type_for_pas2023_ventilation(): assert ( file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) -def test_infer_pas2023_condition(): +def test_file_type_for_pas2023_condition(): assert ( file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" ) -def test_infer_pas_significance(): +def test_file_type_for_pas_significance(): assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" -def test_infer_par_photopack(): +def test_file_type_for_par_photopack(): assert ( file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" ) -def test_infer_pas2023_property(): +def test_file_type_for_pas2023_property(): assert ( file_type_for("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) -def test_infer_pas2023_occupancy(): +def test_file_type_for_pas2023_occupancy(): assert ( file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) -def test_infer_unknown_returns_none(): - assert file_type_for("unknown_document_123.pdf") is None - - -def test_infer_improvement_option_evaluation(): +def test_file_type_for_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") @@ -64,7 +60,7 @@ def test_infer_improvement_option_evaluation(): ) -def test_infer_medium_term_improvement_plan(): +def test_file_type_for_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") @@ -72,7 +68,7 @@ def test_infer_medium_term_improvement_plan(): ) -def test_infer_retrofit_design_doc(): +def test_file_type_for_retrofit_design_doc(): assert ( file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" From faf698eb7162af4a2f08da1379d5ce3f1be41444 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 10:57:37 +0000 Subject: [PATCH 38/39] rename functions and include typehints --- backend/pashub_fetcher/core_files.py | 6 +- backend/pashub_fetcher/pashub_client.py | 7 ++- backend/pashub_fetcher/pashub_service.py | 4 +- .../pashub_fetcher/tests/test_core_files.py | 61 +++++++++++-------- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index e668ba7f..30aa2ba8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -35,7 +35,7 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { } -def core_file_for( +def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: # Identify retrofit design doc using evidence category as the name is possibly unreliable. @@ -68,8 +68,8 @@ def core_file_for( return None -def file_type_for(filename: str) -> Optional[str]: - core_file = core_file_for(filename) +def get_file_type_string(filename: str) -> Optional[str]: + core_file = get_core_file_type(filename) if core_file is None: return None return _CORE_FILE_TO_FILE_TYPE[core_file] diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index f851c410..7896664d 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -5,12 +5,11 @@ from datetime import datetime import requests -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for +from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata from utils.logger import setup_logger - logger = setup_logger() @@ -93,7 +92,9 @@ class PashubClient: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) for file in files: - core_type = core_file_for(file.file_name, file.evidence_category) + core_type: Optional[CoreFiles] = get_core_file_type( + file.file_name, file.evidence_category + ) if not core_type: continue grouped[core_type].append(file) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index ec623f7a..b3302fd9 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import ( ) from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf -from backend.pashub_fetcher.core_files import file_type_for +from backend.pashub_fetcher.core_files import get_file_type_string from backend.pashub_fetcher.pashub_client import PashubClient from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -109,7 +109,7 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, - file_type=file_type_for(filename), + file_type=get_file_type_string(filename), ) file_paths.append(file_path) uploaded_files.append(uploaded_file) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index ee91298e..3c1d11b8 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -1,53 +1,60 @@ -from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for +from backend.pashub_fetcher.core_files import ( + CoreFiles, + get_core_file_type, + get_file_type_string, +) def test_file_type_for_photopack(): - assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack" + assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack" def test_file_type_for_sitenote(): - assert file_type_for("SiteNote_123456_V1.pdf") == "site_note" + assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note" def test_file_type_for_rdsap_sitenote(): assert ( - file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note" + get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") + == "rd_sap_site_note" ) def test_file_type_for_pas2023_ventilation(): assert ( - file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation" ) def test_file_type_for_pas2023_condition(): assert ( - file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition" + get_file_type_string("PAS 2023 Condition Report_123456.pdf") + == "pas_2023_condition" ) def test_file_type_for_pas_significance(): - assert file_type_for("PAS Significance_123456.pdf") == "pas_significance" + assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance" def test_file_type_for_par_photopack(): assert ( - file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack" + get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf") + == "par_photo_pack" ) def test_file_type_for_pas2023_property(): assert ( - file_type_for("PAS 2023 Property Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property" ) def test_file_type_for_pas2023_occupancy(): assert ( - file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf") + get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy" ) @@ -55,7 +62,7 @@ def test_file_type_for_pas2023_occupancy(): def test_file_type_for_improvement_option_evaluation(): # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf" assert ( - file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") + get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation" ) @@ -63,18 +70,20 @@ def test_file_type_for_improvement_option_evaluation(): def test_file_type_for_medium_term_improvement_plan(): # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf" assert ( - file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") + get_file_type_string( + "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf" + ) == "medium_term_improvement_plan" ) def test_file_type_for_retrofit_design_doc(): assert ( - file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") + get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf") == "retrofit_design_doc" ) assert ( - file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") + get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf") == "retrofit_design_doc" ) @@ -89,7 +98,7 @@ def test_core_file_for_evidence_category_match_is_case_insensitive() -> None: filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="Retrofit Design") + result = get_core_file_type(filename, evidence_category="Retrofit Design") # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -100,7 +109,7 @@ def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None: filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="retrofit design") + result = get_core_file_type(filename, evidence_category="retrofit design") # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -111,7 +120,7 @@ def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION @@ -122,18 +131,20 @@ def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN -def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> None: +def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> ( + None +): # Arrange filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.RETROFIT_DESIGN_DOC @@ -144,7 +155,7 @@ def test_core_file_for_prefix_returns_photopack() -> None: filename = "Photopack_123456_V1.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result == CoreFiles.PHOTOPACK @@ -155,18 +166,20 @@ def test_core_file_for_unknown_filename_returns_none() -> None: filename = "unknown_document_123.pdf" # Act - result = core_file_for(filename) + result = get_core_file_type(filename) # Assert assert result is None -def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> None: +def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> ( + None +): # Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" # Act - result = core_file_for(filename, evidence_category="some other category") + result = get_core_file_type(filename, evidence_category="some other category") # Assert assert result is None From 955db1c3eb8167bfbd1aa277624e2966eb16f6f8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 14 May 2026 10:58:38 +0000 Subject: [PATCH 39/39] additional typehint --- backend/pashub_fetcher/core_files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 30aa2ba8..e63511eb 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -69,7 +69,9 @@ def get_core_file_type( def get_file_type_string(filename: str) -> Optional[str]: - core_file = get_core_file_type(filename) + core_file: Optional[CoreFiles] = get_core_file_type(filename) + if core_file is None: return None + return _CORE_FILE_TO_FILE_TYPE[core_file]