Model/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py

185 lines
6.1 KiB
Python

from __future__ import annotations
from typing import Optional
import pytest
from domain.data_transformation.column_classifier import ClassificationError
from domain.epc.property_type import PropertyType
from domain.epc.wall_type import WallType
from infrastructure.chatgpt.chatgpt import ChatGPT
from infrastructure.chatgpt.chatgpt_column_classifier import (
ChatGptColumnClassifier,
)
from infrastructure.chatgpt.exceptions import ChatGPTClientError
class _FakeChatGPT(ChatGPT):
"""Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
def __init__(
self,
reply: str = "{}",
error: Optional[Exception] = None,
) -> None:
self.prompts: list[str] = []
self.system_prompts: list[Optional[str]] = []
self._reply = reply
self._error = error
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
self.prompts.append(prompt)
self.system_prompts.append(system_prompt)
if self._error is not None:
raise self._error
return self._reply
def _property_type_classifier(
chat_gpt: ChatGPT,
) -> ChatGptColumnClassifier[PropertyType]:
return ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
def test_classifies_description_into_its_category() -> None:
# Arrange
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
classifier = _property_type_classifier(chat_gpt)
# Act
result = classifier.classify({"semi-detached"})
# Assert
assert result == {"semi-detached": PropertyType.HOUSE}
def test_classifies_when_reply_is_wrapped_in_a_markdown_fence() -> None:
# Arrange: ChatGPT wraps the JSON in a ```json ... ``` code fence.
chat_gpt = _FakeChatGPT(reply='```json\n{"semi-detached": "House"}\n```')
classifier = _property_type_classifier(chat_gpt)
# Act
result = classifier.classify({"semi-detached"})
# Assert
assert result == {"semi-detached": PropertyType.HOUSE}
def test_unrecognised_category_maps_to_unknown() -> None:
# Arrange
chat_gpt = _FakeChatGPT(reply='{"garden shed": "Shed"}')
classifier = _property_type_classifier(chat_gpt)
# Act
result = classifier.classify({"garden shed"})
# Assert
assert result == {"garden shed": PropertyType.UNKNOWN}
def test_description_omitted_from_reply_maps_to_unknown() -> None:
# Arrange: the reply classifies one description but not the other.
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
classifier = _property_type_classifier(chat_gpt)
# Act
result = classifier.classify({"semi-detached", "TBC"})
# Assert
assert result == {
"semi-detached": PropertyType.HOUSE,
"TBC": PropertyType.UNKNOWN,
}
def test_chatgpt_failure_raises_classification_error() -> None:
# Arrange
chat_gpt = _FakeChatGPT(error=ChatGPTClientError("backend unreachable"))
classifier = _property_type_classifier(chat_gpt)
# Act / Assert
with pytest.raises(ClassificationError):
classifier.classify({"semi-detached"})
def test_non_json_reply_raises_classification_error_with_the_raw_reply() -> None:
# Arrange
chat_gpt = _FakeChatGPT(reply="sorry, I can't do that")
classifier = _property_type_classifier(chat_gpt)
# Act / Assert: the error surfaces the offending reply for diagnosis.
with pytest.raises(ClassificationError, match="sorry, I can't do that"):
classifier.classify({"semi-detached"})
def test_empty_description_set_returns_empty_without_calling_chatgpt() -> None:
# Arrange
chat_gpt = _FakeChatGPT(reply='{"unused": "House"}')
classifier = _property_type_classifier(chat_gpt)
# Act
result = classifier.classify(set())
# Assert
assert result == {}
assert chat_gpt.prompts == []
def test_classifies_with_a_different_category_enum() -> None:
# Arrange: the same adapter classifies a WallType column.
chat_gpt = _FakeChatGPT(
reply='{"solid brick wall": "Solid brick, as built, no insulation (assumed)"}'
)
classifier = ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN)
# Act
result = classifier.classify({"solid brick wall"})
# Assert
assert result == {
"solid brick wall": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
}
def test_extra_instructions_are_appended_to_the_system_prompt() -> None:
# Arrange: column-specific guidance (e.g. wall-type build-era hints)
# should reach the model verbatim, in the system prompt ahead of the
# JSON-output instruction.
chat_gpt = _FakeChatGPT(reply='{"1970s semi": "House"}')
classifier = ChatGptColumnClassifier(
chat_gpt,
PropertyType,
PropertyType.UNKNOWN,
extra_instructions="If the description carries a build decade, prefer X.",
)
# Act
classifier.classify({"1970s semi"})
# Assert: the hint sits in the system prompt, before the JSON instruction.
system_prompt = chat_gpt.system_prompts[0]
assert system_prompt is not None
assert "If the description carries a build decade, prefer X." in system_prompt
hint_index = system_prompt.index("If the description carries a build decade")
json_index = system_prompt.index("Reply with only a JSON object")
assert hint_index < json_index
def test_omitting_extra_instructions_leaves_the_system_prompt_unchanged() -> None:
# Arrange: a classifier without per-column guidance must still produce
# the original system prompt -- no trailing whitespace, no orphan hint.
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
classifier = ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
# Act
classifier.classify({"semi-detached"})
# Assert
system_prompt = chat_gpt.system_prompts[0]
assert system_prompt is not None
assert system_prompt == (
"Classify each free-text description into exactly one category. "
"Categories: House, Bungalow, Flat, Maisonette, Park home. "
"Reply with only a JSON object mapping each original description "
"to its category, and nothing else."
)