Classify a landlord description into a SAL property type 🟩

This commit is contained in:
Jun-te Kim 2026-05-22 14:53:31 +00:00
parent e23bcd7e13
commit d0e5aa9e3f
6 changed files with 123 additions and 0 deletions

0
domain/sal/__init__.py Normal file
View file

View file

@ -0,0 +1,25 @@
from enum import Enum
class PropertyType(Enum):
"""A landlord-supplied property type, as resolved by the SAL context.
Distinct from the EPC context's ``PropertyType``: a landlord CSV value
may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member.
"""
HOUSE = "House"
BUNGALOW = "Bungalow"
FLAT = "Flat"
MAISONETTE = "Maisonette"
PARK_HOME = "Park home"
UNKNOWN = "Unknown"
class PropertyTypeClassificationError(Exception):
"""Raised when property-type classification fails wholesale.
A whole-batch failure (the AI backend is unreachable, or returns a reply
that cannot be parsed) raises this. A single description that merely
cannot be resolved is not an error -- it maps to ``PropertyType.UNKNOWN``.
"""

View file

@ -0,0 +1,27 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from domain.sal.property_type import PropertyType
class PropertyTypeClassifier(ABC):
"""Port: resolves free-text descriptions into SAL ``PropertyType`` values.
Implementations decide *how* (an LLM, a lookup table, a rules engine);
``SALOrchestrator`` depends only on this interface.
"""
@abstractmethod
def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
"""Classify each description into a ``PropertyType``.
Every input description appears as a key in the result. A description
that cannot be resolved maps to ``PropertyType.UNKNOWN``.
Raises:
PropertyTypeClassificationError: If the classification call fails
wholesale (e.g. the backend is unreachable or returns an
unparseable response).
"""
...

View file

@ -0,0 +1,38 @@
from __future__ import annotations
import json
from typing import Any
from domain.sal.property_type import PropertyType
from domain.sal.property_type_classifier import PropertyTypeClassifier
from infrastructure.chatgpt.chatgpt import ChatGPT
class ChatGptPropertyTypeClassifier(PropertyTypeClassifier):
"""PropertyTypeClassifier backed by the ChatGPT client."""
_CATEGORIES = ", ".join(
member.value
for member in PropertyType
if member is not PropertyType.UNKNOWN
)
_SYSTEM_PROMPT = (
"Classify each UK property description into exactly one category. "
f"Categories: {_CATEGORIES}. "
"Reply with only a JSON object mapping each original description "
"to its category, and nothing else."
)
def __init__(self, chat_gpt: ChatGPT) -> None:
self._chat_gpt = chat_gpt
def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
reply = self._chat_gpt.generate(
prompt=json.dumps(sorted(descriptions)),
system_prompt=self._SYSTEM_PROMPT,
)
raw: dict[str, Any] = json.loads(reply)
return {
description: PropertyType(raw[description])
for description in descriptions
}

View file

View file

@ -0,0 +1,33 @@
from __future__ import annotations
from typing import Optional
from domain.sal.property_type import PropertyType
from infrastructure.chatgpt.chatgpt import ChatGPT
from infrastructure.chatgpt.chatgpt_property_type_classifier import (
ChatGptPropertyTypeClassifier,
)
class _FakeChatGPT(ChatGPT):
"""Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
def __init__(self, reply: str = "{}") -> None:
self.prompts: list[str] = []
self._reply = reply
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
self.prompts.append(prompt)
return self._reply
def test_classifies_description_into_property_type() -> None:
# Arrange
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
classifier = ChatGptPropertyTypeClassifier(chat_gpt)
# Act
result = classifier.classify({"semi-detached"})
# Assert
assert result == {"semi-detached": PropertyType.HOUSE}