remove redundant unknown-file test; rename test_infer_* to test_file_type_for_* 🟪

This commit is contained in:
Daniel Roth 2026-05-14 09:01:56 +00:00
parent fb9bdbc585
commit e8b7cfdcec
2 changed files with 21 additions and 16 deletions

View file

@ -38,24 +38,33 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
def core_file_for(
filename: str, evidence_category: Optional[str] = None
) -> Optional[CoreFiles]:
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
# We might change to always use evidence category, but needs more investigation
if evidence_category is not None and evidence_category.lower() == "retrofit design":
return CoreFiles.RETROFIT_DESIGN_DOC
if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename:
return CoreFiles.IMPROVEMENT_OPTION_EVALUATION
if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename:
return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename:
return CoreFiles.RETROFIT_DESIGN_DOC
_prefix_skip = {
CoreFiles.RETROFIT_DESIGN_DOC,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
}
for core_file in CoreFiles:
if core_file in _prefix_skip:
continue
if filename.startswith(core_file.value):
return core_file
return None

View file

@ -1,62 +1,58 @@
from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for
def test_infer_photopack():
def test_file_type_for_photopack():
assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack"
def test_infer_sitenote():
def test_file_type_for_sitenote():
assert file_type_for("SiteNote_123456_V1.pdf") == "site_note"
def test_infer_rdsap_sitenote():
def test_file_type_for_rdsap_sitenote():
assert (
file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note"
)
def test_infer_pas2023_ventilation():
def test_file_type_for_pas2023_ventilation():
assert (
file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf")
== "pas_2023_ventilation"
)
def test_infer_pas2023_condition():
def test_file_type_for_pas2023_condition():
assert (
file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition"
)
def test_infer_pas_significance():
def test_file_type_for_pas_significance():
assert file_type_for("PAS Significance_123456.pdf") == "pas_significance"
def test_infer_par_photopack():
def test_file_type_for_par_photopack():
assert (
file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack"
)
def test_infer_pas2023_property():
def test_file_type_for_pas2023_property():
assert (
file_type_for("PAS 2023 Property Assessment Report_123456.pdf")
== "pas_2023_property"
)
def test_infer_pas2023_occupancy():
def test_file_type_for_pas2023_occupancy():
assert (
file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf")
== "pas_2023_occupancy"
)
def test_infer_unknown_returns_none():
assert file_type_for("unknown_document_123.pdf") is None
def test_infer_improvement_option_evaluation():
def test_file_type_for_improvement_option_evaluation():
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
assert (
file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
@ -64,7 +60,7 @@ def test_infer_improvement_option_evaluation():
)
def test_infer_medium_term_improvement_plan():
def test_file_type_for_medium_term_improvement_plan():
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
assert (
file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf")
@ -72,7 +68,7 @@ def test_infer_medium_term_improvement_plan():
)
def test_infer_retrofit_design_doc():
def test_file_type_for_retrofit_design_doc():
assert (
file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
== "retrofit_design_doc"