classifier

This commit is contained in:
Jun-te Kim 2026-06-01 14:30:09 +00:00
parent c9a9620527
commit 0febf0e6d5
4 changed files with 30 additions and 10 deletions

View file

@ -5,8 +5,8 @@ from enum import Enum
from typing import Any, Optional, TypeVar
from infrastructure.column_classifier import (
BaseColumnClassifier,
ClassificationError,
ColumnClassifier,
)
from infrastructure.chatgpt.chatgpt import ChatGPT
from infrastructure.chatgpt.exceptions import ChatGPTClientError
@ -14,7 +14,7 @@ from infrastructure.chatgpt.exceptions import ChatGPTClientError
E = TypeVar("E", bound=Enum)
class ChatGptColumnClassifier(ColumnClassifier[E]):
class ChatGptColumnClassifier(BaseColumnClassifier[E]):
"""ColumnClassifier backed by ChatGPT, parametrised by a category enum.
The same classification path -- prompt, JSON parsing, UNKNOWN fallback --

View file

@ -16,12 +16,14 @@ class ClassificationError(Exception):
"""
class ColumnClassifier(ABC, Generic[E]):
"""Port: resolves free-text descriptions into a category enum ``E``.
class BaseColumnClassifier(ABC, Generic[E]):
"""Adapter base: shared scaffolding for concrete column classifiers.
One classifier handles one landlord-CSV column. Implementations decide
*how* the mapping is performed (an LLM, a lookup table, a rules engine);
``LandlordDescriptionOverridesOrchestrator`` depends only on this interface.
*how* the mapping is performed (an LLM, a lookup table, a rules engine).
Consumers do not depend on this class -- they depend on the structural
``ColumnClassifier`` Protocol declared in the orchestration layer; this
ABC merely gives adapters a common base for the ``classify`` contract.
"""
@abstractmethod

View file

@ -2,9 +2,8 @@ from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from typing import Generic, TypeVar
from typing import Generic, Protocol, TypeVar, runtime_checkable
from infrastructure.column_classifier import ColumnClassifier
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
@ -12,6 +11,26 @@ from repositories.landlord_overrides.landlord_override_repository import (
E = TypeVar("E", bound=Enum)
@runtime_checkable
class ColumnClassifier(Protocol[E]):
"""Port: resolves free-text descriptions into a category enum ``E``.
The orchestration layer owns this contract because it is the consumer.
Any object exposing a matching ``classify`` satisfies it structurally --
e.g. ``infrastructure.chatgpt.ChatGptColumnClassifier`` -- so orchestration
never imports an adapter.
"""
def classify(self, descriptions: set[str]) -> dict[str, E]:
"""Classify each description into a category enum member.
Every input description appears as a key in the result. A description
that cannot be resolved maps to the enum's UNKNOWN member. A wholesale
failure raises the adapter's ``ClassificationError``.
"""
...
@dataclass(frozen=True)
class ClassifiableColumn(Generic[E]):
"""Pairs a column's classifier with the repository that persists its results.

View file

@ -5,11 +5,10 @@ from typing import Any, Optional
from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
from domain.epc.built_form_type import BuiltFormType
from infrastructure.column_classifier import ColumnClassifier
from domain.epc.property_type import PropertyType
from domain.epc.wall_type import WallType
from domain.postcode import Postcode
from orchestration.classifiable_column import ClassifiableColumn
from orchestration.classifiable_column import ClassifiableColumn, ColumnClassifier
from orchestration.landlord_description_overrides_orchestrator import (
LandlordDescriptionOverridesOrchestrator,
)