new files types inferred from file names 🟥

This commit is contained in:
Daniel Roth 2026-05-13 13:09:40 +00:00
parent e315966565
commit e3646162de
2 changed files with 86 additions and 1 deletions

View file

@ -0,0 +1,64 @@
import pytest
from backend.pashub_fetcher.core_files import infer_file_type
# --- GREEN: pre-existing file types (startswith match) ---
def test_infer_photopack():
assert infer_file_type("Photopack_123456_V1.pdf") == "photo_pack"
def test_infer_sitenote():
assert infer_file_type("SiteNote_123456_V1.pdf") == "site_note"
def test_infer_rdsap_sitenote():
assert infer_file_type("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note"
def test_infer_pas2023_ventilation():
assert infer_file_type("PAS 2023 Ventilation Assessment Report_123456.pdf") == "pas_2023_ventilation"
def test_infer_pas2023_condition():
assert infer_file_type("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition"
def test_infer_pas_significance():
assert infer_file_type("PAS Significance_123456.pdf") == "pas_significance"
def test_infer_par_photopack():
assert infer_file_type("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack"
def test_infer_pas2023_property():
assert infer_file_type("PAS 2023 Property Assessment Report_123456.pdf") == "pas_2023_property"
def test_infer_pas2023_occupancy():
assert infer_file_type("PAS 2023 Occupancy Assessment Report_123456.pdf") == "pas_2023_occupancy"
def test_infer_unknown_returns_none():
assert infer_file_type("unknown_document_123.pdf") is None
# --- RED: new file types (suffix match not yet implemented) ---
def test_infer_improvement_option_evaluation():
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
assert infer_file_type("6000802 - NG4 4HD - Improvement Option Evaluation.pdf") == "improvement_option_evaluation"
def test_infer_medium_term_improvement_plan():
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
assert infer_file_type("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf") == "medium_term_improvement_plan"
@pytest.mark.skip(reason="Retrofit Design Doc filename pattern not yet known")
def test_infer_retrofit_design_doc():
assert infer_file_type("2512-OSM-H56M900-XX-DR-N-A_Radford Road 408.pdf") == "retrofit_design_doc"

View file

@ -3,6 +3,27 @@ pythonpath = .
log_cli = true
log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests
testpaths =
recommendations/tests
backend/tests
backend/address2UPRN/tests
backend/app/db/functions/tests
backend/categorisation/tests
backend/condition/tests
backend/documents_parser/tests
backend/ecmk_fetcher/tests
backend/export/tests
backend/magic_plan/tests
backend/onboarders/tests
backend/pashub_fetcher/tests
datatypes/epc/domain/tests
datatypes/epc/schema/tests
datatypes/epc/surveys/tests
datatypes/magicplan/api/tests
datatypes/magicplan/domain/tests
etl/epc/tests
etl/epc_clean/tests
etl/hubspot/tests
etl/spatial/tests
markers =
integration: mark a test as an integration test