rename functions and include typehints

This commit is contained in:
Daniel Roth 2026-05-14 10:57:37 +00:00
parent e8b7cfdcec
commit faf698eb71
4 changed files with 46 additions and 32 deletions

View file

@ -35,7 +35,7 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
}
def core_file_for(
def get_core_file_type(
filename: str, evidence_category: Optional[str] = None
) -> Optional[CoreFiles]:
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
@ -68,8 +68,8 @@ def core_file_for(
return None
def file_type_for(filename: str) -> Optional[str]:
core_file = core_file_for(filename)
def get_file_type_string(filename: str) -> Optional[str]:
core_file = get_core_file_type(filename)
if core_file is None:
return None
return _CORE_FILE_TO_FILE_TYPE[core_file]

View file

@ -5,12 +5,11 @@ from datetime import datetime
import requests
from backend.pashub_fetcher.core_files import CoreFiles, core_file_for
from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from utils.logger import setup_logger
logger = setup_logger()
@ -93,7 +92,9 @@ class PashubClient:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
for file in files:
core_type = core_file_for(file.file_name, file.evidence_category)
core_type: Optional[CoreFiles] = get_core_file_type(
file.file_name, file.evidence_category
)
if not core_type:
continue
grouped[core_type].append(file)

View file

@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import (
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import file_type_for
from backend.pashub_fetcher.core_files import get_file_type_string
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
@ -109,7 +109,7 @@ class PashubService:
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=file_type_for(filename),
file_type=get_file_type_string(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)

View file

@ -1,53 +1,60 @@
from backend.pashub_fetcher.core_files import CoreFiles, core_file_for, file_type_for
from backend.pashub_fetcher.core_files import (
CoreFiles,
get_core_file_type,
get_file_type_string,
)
def test_file_type_for_photopack():
assert file_type_for("Photopack_123456_V1.pdf") == "photo_pack"
assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack"
def test_file_type_for_sitenote():
assert file_type_for("SiteNote_123456_V1.pdf") == "site_note"
assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note"
def test_file_type_for_rdsap_sitenote():
assert (
file_type_for("RdSAP_SiteNote_9510890_V1_Assessmet.pdf") == "rd_sap_site_note"
get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf")
== "rd_sap_site_note"
)
def test_file_type_for_pas2023_ventilation():
assert (
file_type_for("PAS 2023 Ventilation Assessment Report_123456.pdf")
get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf")
== "pas_2023_ventilation"
)
def test_file_type_for_pas2023_condition():
assert (
file_type_for("PAS 2023 Condition Report_123456.pdf") == "pas_2023_condition"
get_file_type_string("PAS 2023 Condition Report_123456.pdf")
== "pas_2023_condition"
)
def test_file_type_for_pas_significance():
assert file_type_for("PAS Significance_123456.pdf") == "pas_significance"
assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance"
def test_file_type_for_par_photopack():
assert (
file_type_for("PAR Photo Pack_95101890_V2_Assessment.pdf") == "par_photo_pack"
get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf")
== "par_photo_pack"
)
def test_file_type_for_pas2023_property():
assert (
file_type_for("PAS 2023 Property Assessment Report_123456.pdf")
get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf")
== "pas_2023_property"
)
def test_file_type_for_pas2023_occupancy():
assert (
file_type_for("PAS 2023 Occupancy Assessment Report_123456.pdf")
get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf")
== "pas_2023_occupancy"
)
@ -55,7 +62,7 @@ def test_file_type_for_pas2023_occupancy():
def test_file_type_for_improvement_option_evaluation():
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
assert (
file_type_for("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
== "improvement_option_evaluation"
)
@ -63,18 +70,20 @@ def test_file_type_for_improvement_option_evaluation():
def test_file_type_for_medium_term_improvement_plan():
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
assert (
file_type_for("60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf")
get_file_type_string(
"60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
)
== "medium_term_improvement_plan"
)
def test_file_type_for_retrofit_design_doc():
assert (
file_type_for("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
== "retrofit_design_doc"
)
assert (
file_type_for("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
== "retrofit_design_doc"
)
@ -89,7 +98,7 @@ def test_core_file_for_evidence_category_match_is_case_insensitive() -> None:
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = core_file_for(filename, evidence_category="Retrofit Design")
result = get_core_file_type(filename, evidence_category="Retrofit Design")
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
@ -100,7 +109,7 @@ def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None:
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = core_file_for(filename, evidence_category="retrofit design")
result = get_core_file_type(filename, evidence_category="retrofit design")
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
@ -111,7 +120,7 @@ def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() ->
filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf"
# Act
result = core_file_for(filename)
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION
@ -122,18 +131,20 @@ def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() ->
filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
# Act
result = core_file_for(filename)
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> None:
def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> (
None
):
# Arrange
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = core_file_for(filename)
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
@ -144,7 +155,7 @@ def test_core_file_for_prefix_returns_photopack() -> None:
filename = "Photopack_123456_V1.pdf"
# Act
result = core_file_for(filename)
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.PHOTOPACK
@ -155,18 +166,20 @@ def test_core_file_for_unknown_filename_returns_none() -> None:
filename = "unknown_document_123.pdf"
# Act
result = core_file_for(filename)
result = get_core_file_type(filename)
# Assert
assert result is None
def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> None:
def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> (
None
):
# Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = core_file_for(filename, evidence_category="some other category")
result = get_core_file_type(filename, evidence_category="some other category")
# Assert
assert result is None