mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Classify a landlord description into a SAL property type 🟩
This commit is contained in:
parent
e23bcd7e13
commit
d0e5aa9e3f
6 changed files with 123 additions and 0 deletions
0
domain/sal/__init__.py
Normal file
0
domain/sal/__init__.py
Normal file
25
domain/sal/property_type.py
Normal file
25
domain/sal/property_type.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class PropertyType(Enum):
|
||||
"""A landlord-supplied property type, as resolved by the SAL context.
|
||||
|
||||
Distinct from the EPC context's ``PropertyType``: a landlord CSV value
|
||||
may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member.
|
||||
"""
|
||||
|
||||
HOUSE = "House"
|
||||
BUNGALOW = "Bungalow"
|
||||
FLAT = "Flat"
|
||||
MAISONETTE = "Maisonette"
|
||||
PARK_HOME = "Park home"
|
||||
UNKNOWN = "Unknown"
|
||||
|
||||
|
||||
class PropertyTypeClassificationError(Exception):
|
||||
"""Raised when property-type classification fails wholesale.
|
||||
|
||||
A whole-batch failure (the AI backend is unreachable, or returns a reply
|
||||
that cannot be parsed) raises this. A single description that merely
|
||||
cannot be resolved is not an error -- it maps to ``PropertyType.UNKNOWN``.
|
||||
"""
|
||||
27
domain/sal/property_type_classifier.py
Normal file
27
domain/sal/property_type_classifier.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from domain.sal.property_type import PropertyType
|
||||
|
||||
|
||||
class PropertyTypeClassifier(ABC):
|
||||
"""Port: resolves free-text descriptions into SAL ``PropertyType`` values.
|
||||
|
||||
Implementations decide *how* (an LLM, a lookup table, a rules engine);
|
||||
``SALOrchestrator`` depends only on this interface.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
|
||||
"""Classify each description into a ``PropertyType``.
|
||||
|
||||
Every input description appears as a key in the result. A description
|
||||
that cannot be resolved maps to ``PropertyType.UNKNOWN``.
|
||||
|
||||
Raises:
|
||||
PropertyTypeClassificationError: If the classification call fails
|
||||
wholesale (e.g. the backend is unreachable or returns an
|
||||
unparseable response).
|
||||
"""
|
||||
...
|
||||
38
infrastructure/chatgpt/chatgpt_property_type_classifier.py
Normal file
38
infrastructure/chatgpt/chatgpt_property_type_classifier.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from domain.sal.property_type import PropertyType
|
||||
from domain.sal.property_type_classifier import PropertyTypeClassifier
|
||||
from infrastructure.chatgpt.chatgpt import ChatGPT
|
||||
|
||||
|
||||
class ChatGptPropertyTypeClassifier(PropertyTypeClassifier):
|
||||
"""PropertyTypeClassifier backed by the ChatGPT client."""
|
||||
|
||||
_CATEGORIES = ", ".join(
|
||||
member.value
|
||||
for member in PropertyType
|
||||
if member is not PropertyType.UNKNOWN
|
||||
)
|
||||
_SYSTEM_PROMPT = (
|
||||
"Classify each UK property description into exactly one category. "
|
||||
f"Categories: {_CATEGORIES}. "
|
||||
"Reply with only a JSON object mapping each original description "
|
||||
"to its category, and nothing else."
|
||||
)
|
||||
|
||||
def __init__(self, chat_gpt: ChatGPT) -> None:
|
||||
self._chat_gpt = chat_gpt
|
||||
|
||||
def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
|
||||
reply = self._chat_gpt.generate(
|
||||
prompt=json.dumps(sorted(descriptions)),
|
||||
system_prompt=self._SYSTEM_PROMPT,
|
||||
)
|
||||
raw: dict[str, Any] = json.loads(reply)
|
||||
return {
|
||||
description: PropertyType(raw[description])
|
||||
for description in descriptions
|
||||
}
|
||||
0
tests/infrastructure/chatgpt/__init__.py
Normal file
0
tests/infrastructure/chatgpt/__init__.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from domain.sal.property_type import PropertyType
|
||||
from infrastructure.chatgpt.chatgpt import ChatGPT
|
||||
from infrastructure.chatgpt.chatgpt_property_type_classifier import (
|
||||
ChatGptPropertyTypeClassifier,
|
||||
)
|
||||
|
||||
|
||||
class _FakeChatGPT(ChatGPT):
|
||||
"""Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
|
||||
|
||||
def __init__(self, reply: str = "{}") -> None:
|
||||
self.prompts: list[str] = []
|
||||
self._reply = reply
|
||||
|
||||
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||
self.prompts.append(prompt)
|
||||
return self._reply
|
||||
|
||||
|
||||
def test_classifies_description_into_property_type() -> None:
|
||||
# Arrange
|
||||
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
|
||||
classifier = ChatGptPropertyTypeClassifier(chat_gpt)
|
||||
|
||||
# Act
|
||||
result = classifier.classify({"semi-detached"})
|
||||
|
||||
# Assert
|
||||
assert result == {"semi-detached": PropertyType.HOUSE}
|
||||
Loading…
Add table
Reference in a new issue