From d0e5aa9e3f7ccb8c63b1799671b7fb54f2af6862 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 22 May 2026 14:53:31 +0000 Subject: [PATCH] =?UTF-8?q?Classify=20a=20landlord=20description=20into=20?= =?UTF-8?q?a=20SAL=20property=20type=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- domain/sal/__init__.py | 0 domain/sal/property_type.py | 25 ++++++++++++ domain/sal/property_type_classifier.py | 27 +++++++++++++ .../chatgpt_property_type_classifier.py | 38 +++++++++++++++++++ tests/infrastructure/chatgpt/__init__.py | 0 .../test_chatgpt_property_type_classifier.py | 33 ++++++++++++++++ 6 files changed, 123 insertions(+) create mode 100644 domain/sal/__init__.py create mode 100644 domain/sal/property_type.py create mode 100644 domain/sal/property_type_classifier.py create mode 100644 infrastructure/chatgpt/chatgpt_property_type_classifier.py create mode 100644 tests/infrastructure/chatgpt/__init__.py create mode 100644 tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py diff --git a/domain/sal/__init__.py b/domain/sal/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/domain/sal/property_type.py b/domain/sal/property_type.py new file mode 100644 index 00000000..9659639a --- /dev/null +++ b/domain/sal/property_type.py @@ -0,0 +1,25 @@ +from enum import Enum + + +class PropertyType(Enum): + """A landlord-supplied property type, as resolved by the SAL context. + + Distinct from the EPC context's ``PropertyType``: a landlord CSV value + may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member. + """ + + HOUSE = "House" + BUNGALOW = "Bungalow" + FLAT = "Flat" + MAISONETTE = "Maisonette" + PARK_HOME = "Park home" + UNKNOWN = "Unknown" + + +class PropertyTypeClassificationError(Exception): + """Raised when property-type classification fails wholesale. + + A whole-batch failure (the AI backend is unreachable, or returns a reply + that cannot be parsed) raises this. A single description that merely + cannot be resolved is not an error -- it maps to ``PropertyType.UNKNOWN``. + """ diff --git a/domain/sal/property_type_classifier.py b/domain/sal/property_type_classifier.py new file mode 100644 index 00000000..af941e83 --- /dev/null +++ b/domain/sal/property_type_classifier.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + +from domain.sal.property_type import PropertyType + + +class PropertyTypeClassifier(ABC): + """Port: resolves free-text descriptions into SAL ``PropertyType`` values. + + Implementations decide *how* (an LLM, a lookup table, a rules engine); + ``SALOrchestrator`` depends only on this interface. + """ + + @abstractmethod + def classify(self, descriptions: set[str]) -> dict[str, PropertyType]: + """Classify each description into a ``PropertyType``. + + Every input description appears as a key in the result. A description + that cannot be resolved maps to ``PropertyType.UNKNOWN``. + + Raises: + PropertyTypeClassificationError: If the classification call fails + wholesale (e.g. the backend is unreachable or returns an + unparseable response). + """ + ... diff --git a/infrastructure/chatgpt/chatgpt_property_type_classifier.py b/infrastructure/chatgpt/chatgpt_property_type_classifier.py new file mode 100644 index 00000000..d4f0c060 --- /dev/null +++ b/infrastructure/chatgpt/chatgpt_property_type_classifier.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import json +from typing import Any + +from domain.sal.property_type import PropertyType +from domain.sal.property_type_classifier import PropertyTypeClassifier +from infrastructure.chatgpt.chatgpt import ChatGPT + + +class ChatGptPropertyTypeClassifier(PropertyTypeClassifier): + """PropertyTypeClassifier backed by the ChatGPT client.""" + + _CATEGORIES = ", ".join( + member.value + for member in PropertyType + if member is not PropertyType.UNKNOWN + ) + _SYSTEM_PROMPT = ( + "Classify each UK property description into exactly one category. " + f"Categories: {_CATEGORIES}. " + "Reply with only a JSON object mapping each original description " + "to its category, and nothing else." + ) + + def __init__(self, chat_gpt: ChatGPT) -> None: + self._chat_gpt = chat_gpt + + def classify(self, descriptions: set[str]) -> dict[str, PropertyType]: + reply = self._chat_gpt.generate( + prompt=json.dumps(sorted(descriptions)), + system_prompt=self._SYSTEM_PROMPT, + ) + raw: dict[str, Any] = json.loads(reply) + return { + description: PropertyType(raw[description]) + for description in descriptions + } diff --git a/tests/infrastructure/chatgpt/__init__.py b/tests/infrastructure/chatgpt/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py new file mode 100644 index 00000000..8c697eb2 --- /dev/null +++ b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Optional + +from domain.sal.property_type import PropertyType +from infrastructure.chatgpt.chatgpt import ChatGPT +from infrastructure.chatgpt.chatgpt_property_type_classifier import ( + ChatGptPropertyTypeClassifier, +) + + +class _FakeChatGPT(ChatGPT): + """Hand-written ChatGPT stand-in: returns a canned reply, records prompts.""" + + def __init__(self, reply: str = "{}") -> None: + self.prompts: list[str] = [] + self._reply = reply + + def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str: + self.prompts.append(prompt) + return self._reply + + +def test_classifies_description_into_property_type() -> None: + # Arrange + chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}') + classifier = ChatGptPropertyTypeClassifier(chat_gpt) + + # Act + result = classifier.classify({"semi-detached"}) + + # Assert + assert result == {"semi-detached": PropertyType.HOUSE}