file_type_for delegates to core_file_for; _MATCHERS removed 🟪

This commit is contained in:
Daniel Roth 2026-05-14 08:51:28 +00:00
parent 541d5965b7
commit 5e31c0f3da
3 changed files with 37 additions and 64 deletions

View file

@ -1,5 +1,5 @@
from enum import Enum
from typing import Callable, Optional
from typing import Optional
from backend.app.db.models.uploaded_file import FileTypeEnum
@ -19,47 +19,20 @@ class CoreFiles(Enum):
RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
_MATCHERS: list[tuple[Callable[[str], bool], str]] = [
(lambda f: f.startswith(CoreFiles.PHOTOPACK.value), FileTypeEnum.PHOTO_PACK.value),
(lambda f: f.startswith(CoreFiles.SITENOTE.value), FileTypeEnum.SITE_NOTE.value),
(
lambda f: f.startswith(CoreFiles.RDSAP_SITENOTE.value),
FileTypeEnum.RD_SAP_SITE_NOTE.value,
),
(
lambda f: f.startswith(CoreFiles.PAS2023_VENTILATION.value),
FileTypeEnum.PAS_2023_VENTILATION.value,
),
(
lambda f: f.startswith(CoreFiles.PAS2023_CONDITION.value),
FileTypeEnum.PAS_2023_CONDITION.value,
),
(
lambda f: f.startswith(CoreFiles.PAS_SIGNIFICANCE.value),
FileTypeEnum.PAS_SIGNIFICANCE.value,
),
(
lambda f: f.startswith(CoreFiles.PAR_PHOTOPACK.value),
FileTypeEnum.PAR_PHOTO_PACK.value,
),
(
lambda f: f.startswith(CoreFiles.PAS2023_PROPERTY.value),
FileTypeEnum.PAS_2023_PROPERTY.value,
),
(
lambda f: f.startswith(CoreFiles.PAS2023_OCCUPANCY.value),
FileTypeEnum.PAS_2023_OCCUPANCY.value,
),
(
lambda f: CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in f,
FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
),
(
lambda f: CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in f,
FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
),
(lambda f: "-OSM-" in f and "DR-N-A" in f, FileTypeEnum.RETROFIT_DESIGN_DOC.value),
]
_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value,
CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value,
CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value,
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
}
def core_file_for(
@ -86,8 +59,8 @@ def core_file_for(
return None
def infer_file_type(filename: str) -> Optional[str]:
for matcher, file_type in _MATCHERS:
if matcher(filename):
return file_type
return None
def file_type_for(filename: str) -> Optional[str]:
core_file = core_file_for(filename)
if core_file is None:
return None
return _CORE_FILE_TO_FILE_TYPE[core_file]

View file

@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import (
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.core_files import file_type_for
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
@ -109,7 +109,7 @@ class PashubService:
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
file_type=file_type_for(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)

View file

@ -1,65 +1,65 @@
from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, infer_file_type
from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for
def test_infer_photopack():
assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack"
assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack"
def test_infer_sitenote():
assert infer_file_type("SiteNote_123456_V1.pdf") == "site_note"
assert file_type_for("SiteNote_123456_V1.pdf") == "site_note"
def test_infer_rdsap_sitenote():
assert (
infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note"
file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note"
)
def test_infer_pas2023_ventilation():
assert (
infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf")
file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf")
== "pas_2023_ventilation"
)
def test_infer_pas2023_condition():
assert (
infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition"
file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition"
)
def test_infer_pas_significance():
assert infer_file_type("PAS Significance_123456.pdf") == "pas_significance"
assert file_type_for("PAS Significance_123456.pdf") == "pas_significance"
def test_infer_par_photopack():
assert (
infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack"
file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack"
)
def test_infer_pas2023_property():
assert (
infer_file_type("PAS 2023 Property Assessment Report_123456.pdf")
file_type_for("PAS 2023 Property Assessment Report_123456.pdf")
== "pas_2023_property"
)
def test_infer_pas2023_occupancy():
assert (
infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf")
file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf")
== "pas_2023_occupancy"
)
def test_infer_unknown_returns_none():
assert infer_file_type("unknown_document_123.pdf") is None
assert file_type_for("unknown_document_123.pdf") is None
def test_infer_improvement_option_evaluation():
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
assert (
infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
== "improvement_option_evaluation"
)
@ -67,18 +67,18 @@ def test_infer_improvement_option_evaluation():
def test_infer_medium_term_improvement_plan():
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
assert (
infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf")
file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf")
== "medium_term_improvement_plan"
)
def test_infer_retrofit_design_doc():
assert (
infer_file_type("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
== "retrofit_design_doc"
)
assert (
infer_file_type("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
== "retrofit_design_doc"
)