From bb648b8223357d3c4a2af323bc09a006f6f4e04d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 16 Apr 2026 13:56:27 +0000 Subject: [PATCH] =?UTF-8?q?Map=20to=20RdSapSiteNotes=20from=20site=20notes?= =?UTF-8?q?=20JSON=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/documents_parser/extractor.py | 53 ++++++++++++++++++- .../documents_parser/tests/test_extractor.py | 2 +- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/backend/documents_parser/extractor.py b/backend/documents_parser/extractor.py index 268c86de..32572261 100644 --- a/backend/documents_parser/extractor.py +++ b/backend/documents_parser/extractor.py @@ -1,9 +1,60 @@ -from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes +from datetime import datetime + +from datatypes.epc.surveys.pashub_rdsap_site_notes import ( + General, + PasHubRdSapSiteNotes, +) class PasHubRdSapSiteNotesExtractor: def __init__(self, text_list: list[str]) -> None: self.text_list = text_list + def _get(self, key: str, offset: int = 1) -> str | None: + try: + idx = self.text_list.index(key) + return self.text_list[idx + offset].strip() or None + except (ValueError, IndexError): + return None + + def _bool(self, key: str, offset: int = 1) -> bool: + val = self._get(key, offset) + return val is not None and val.lower() == "yes" + def extract(self) -> PasHubRdSapSiteNotes: raise NotImplementedError + + def extract_general(self) -> General: + inspection_date_raw = self._get("Inspection Date:") + inspection_date = ( + datetime.strptime(inspection_date_raw, "%d/%m/%Y").strftime("%Y-%m-%d") + if inspection_date_raw + else "" + ) + + storeys_raw = self._get("Number of storeys:") or "0" + extensions_raw = self._get("Number of Extensions:") or "0" + + return General( + epc_checked_before_assessment=self._bool( + "Confirm you have checked for the existence of an", offset=2 + ), + epc_exists_at_point_of_assessment=self._bool( + "Does an EPC exist at the point of carrying out this", offset=2 + ), + inspection_date=inspection_date, + transaction_type=self._get("Transaction Type:") or "", + tenure=self._get("Tenure:") or "", + property_type=self._get("Type of Property:") or "", + detachment_type=self._get("Detachment Type:") or "", + number_of_storeys=int(storeys_raw.split()[0]), + terrain_type=self._get("Terrain Type:") or "", + number_of_extensions=int(extensions_raw.split()[0]), + electricity_smart_meter=self._bool("Is an electricity smart meter present?"), + electric_meter_type=self._get("Electric meter type:") or "", + dwelling_export_capable=self._bool("Is the dwelling export-capable?"), + mains_gas_available=self._bool("Is mains gas available?"), + gas_smart_meter=self._bool("Is there a gas smart meter?"), + gas_meter_accessible=self._bool("Is the gas meter accessible?"), + measurements_location=self._get("Select Measurements Location:") or "", + ) diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py index dea3ab92..38c7f328 100644 --- a/backend/documents_parser/tests/test_extractor.py +++ b/backend/documents_parser/tests/test_extractor.py @@ -17,7 +17,7 @@ def load_text_fixture() -> list[str]: class TestGeneral: @pytest.fixture def general(self) -> General: - return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract().general + return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_general() def test_epc_checked_before_assessment(self, general: General) -> None: assert general.epc_checked_before_assessment is True