From 01ebb2e0e1049782b40d71601640b29eb7285045 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 16:04:02 +0000 Subject: [PATCH] =?UTF-8?q?extract=20window=20frame=20details=20from=20elm?= =?UTF-8?q?hurst=20site=20notes=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../documents_parser/elmhurst_extractor.py | 22 +++++++++++++++++-- datatypes/epc/surveys/elmhurst_site_notes.py | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 3063e358..e78d98de 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -230,7 +230,7 @@ class ElmhurstSiteNotesExtractor: i += 1 continue i += 3 - # Collect glazing type until frame_factor (0 < v ≤ 1.0) + # Collect glazing type tokens until frame_factor (0 < v ≤ 1.0) glazing_parts: List[str] = [] while i < len(tokens): try: @@ -241,10 +241,21 @@ class ElmhurstSiteNotesExtractor: except ValueError: glazing_parts.append(tokens[i]) i += 1 + # If last glazing token is a single word (no spaces, not numeric) it's the frame_type + frame_type: Optional[str] = None + if glazing_parts and " " not in glazing_parts[-1] and not glazing_parts[-1].replace(".", "").isdigit(): + frame_type = glazing_parts.pop() glazing_type = " ".join(glazing_parts).strip() if i >= len(tokens): break frame_factor = float(tokens[i]); i += 1 + # Consume glazing_gap if present ("mm" token, possibly multi-token e.g. "16 mm or more") + glazing_gap: Optional[str] = None + if i < len(tokens) and "mm" in tokens[i]: + gap_parts = [tokens[i]]; i += 1 + while i < len(tokens) and tokens[i].lower() in {"or", "more"}: + gap_parts.append(tokens[i]); i += 1 + glazing_gap = " ".join(gap_parts) building_part = tokens[i]; i += 1 location = tokens[i]; i += 1 orientation = tokens[i]; i += 1 @@ -268,6 +279,8 @@ class ElmhurstSiteNotesExtractor: g_value=g_value, draught_proofed=draught_proofed, permanent_shutters=permanent_shutters, + frame_type=frame_type, + glazing_gap=glazing_gap, ) ) return windows @@ -296,12 +309,17 @@ class ElmhurstSiteNotesExtractor: ) def _extract_lighting(self) -> Lighting: + led_cfl_count_known = self._bool_val("Number of LED and CFL Known") return Lighting( total_bulbs=self._int_val("Total number of bulbs"), - led_cfl_count_known=self._bool_val("Number of LED and CFL Known"), + led_cfl_count_known=led_cfl_count_known, led_count=self._int_val("Number of LED lights"), cfl_count=self._int_val("Number of CFL lights"), incandescent_count=self._int_val("Total number of incandescents"), + low_energy_count=( + 0 if led_cfl_count_known + else self._int_val("Total number of Low Energy") + ), ) def _extract_main_heating(self) -> MainHeating: diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 3b2c279f..eec22a27 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -121,6 +121,7 @@ class Lighting: led_count: int cfl_count: int incandescent_count: int + low_energy_count: int = 0 @dataclass