diff --git a/backend/documents_parser/extractor.py b/backend/documents_parser/extractor.py index 341664a4..7ea3ccd1 100644 --- a/backend/documents_parser/extractor.py +++ b/backend/documents_parser/extractor.py @@ -101,11 +101,9 @@ class PasHubRdSapSiteNotesExtractor: def extract_general(self) -> General: inspection_date_raw = self._get("Inspection Date:") - inspection_date = ( - datetime.strptime(inspection_date_raw, "%d/%m/%Y").strftime("%Y-%m-%d") - if inspection_date_raw - else "" - ) + if not inspection_date_raw: + raise ValueError("Inspection Date not found in document") + inspection_date = datetime.strptime(inspection_date_raw, "%d/%m/%Y").date() storeys_raw = self._get("Number of storeys:") or "0" extensions_raw = self._get("Number of Extensions:") or "0" diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py index 78e21d2f..222a30e6 100644 --- a/backend/documents_parser/tests/test_extractor.py +++ b/backend/documents_parser/tests/test_extractor.py @@ -1,5 +1,6 @@ import json import os +from datetime import date import pytest @@ -51,7 +52,7 @@ class TestGeneral: assert general.epc_exists_at_point_of_assessment is False def test_inspection_date(self, general: General) -> None: - assert general.inspection_date == "2025-09-25" + assert general.inspection_date == date(2025, 9, 25) def test_transaction_type(self, general: General) -> None: assert general.transaction_type == "Grant-Scheme (ECO, RHI, etc.)" @@ -84,7 +85,7 @@ class TestGeneral: assert general == General( epc_checked_before_assessment=True, epc_exists_at_point_of_assessment=False, - inspection_date="2025-09-25", + inspection_date=date(2025, 9, 25), transaction_type="Grant-Scheme (ECO, RHI, etc.)", tenure="Rented Social", property_type="House", @@ -530,7 +531,7 @@ class TestExtract: def test_full_extract(self) -> None: result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract() assert result.inspection_metadata is None - assert result.general.inspection_date == "2025-09-25" + assert result.general.inspection_date == date(2025, 9, 25) assert result.building_construction.main_building.wall_thickness_mm == 310 assert result.building_measurements.main_building.floors[0].area_m2 == 35.68 assert result.roof_space.main_building.insulation_thickness_mm == 100 diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ccc4dd82..61793c7d 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -105,7 +105,7 @@ class EpcPropertyDataMapper: return EpcPropertyData( dwelling_type=f"{general.detachment_type} {general.property_type.lower()}", - inspection_date=date.fromisoformat(general.inspection_date), + inspection_date=general.inspection_date, tenure=general.tenure, transaction_type=general.transaction_type, roofs=[], diff --git a/datatypes/epc/schema/tests/helpers.py b/datatypes/epc/schema/tests/helpers.py index 677bd8b7..22f132d2 100644 --- a/datatypes/epc/schema/tests/helpers.py +++ b/datatypes/epc/schema/tests/helpers.py @@ -1,5 +1,6 @@ import dataclasses import typing +from datetime import date from typing import Any, Dict, Type, TypeVar T = TypeVar("T") @@ -70,4 +71,7 @@ def _coerce(value: Any, hint: Any) -> Any: if dataclasses.is_dataclass(hint) and isinstance(value, dict): return _from_dict_impl(hint, value) + if hint is date and isinstance(value, str): + return date.fromisoformat(value) + return value diff --git a/datatypes/epc/surveys/pashub_rdsap_site_notes.py b/datatypes/epc/surveys/pashub_rdsap_site_notes.py index aee4655b..54c84a17 100644 --- a/datatypes/epc/surveys/pashub_rdsap_site_notes.py +++ b/datatypes/epc/surveys/pashub_rdsap_site_notes.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import date from typing import List, Optional @@ -8,7 +9,7 @@ class InspectionMetadata: email_address: str report_reference: str created_on: str - date_of_inspection: str + date_of_inspection: date property_address: str property_photo: Optional[bool] = None @@ -17,7 +18,7 @@ class InspectionMetadata: class General: epc_checked_before_assessment: bool epc_exists_at_point_of_assessment: bool - inspection_date: str + inspection_date: date transaction_type: str tenure: str property_type: str diff --git a/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py b/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py index d89f989d..a1a126a2 100644 --- a/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py +++ b/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py @@ -1,5 +1,6 @@ import json import os +from datetime import date from typing import Any, Dict import pytest @@ -224,7 +225,7 @@ class TestExample2: self, survey: PasHubRdSapSiteNotes ) -> None: assert survey.inspection_metadata.created_on == "2025-11-10" - assert survey.inspection_metadata.date_of_inspection == "2025-09-25" + assert survey.inspection_metadata.date_of_inspection == date(2025, 9, 25) # --- general ---