Model/domain/data_transformation/column_classifier.py

39 lines
1.4 KiB
Python

from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum
from typing import Generic, TypeVar
E = TypeVar("E", bound=Enum)
class ClassificationError(Exception):
"""Raised when classifying a column's descriptions fails wholesale.
A whole-batch failure (the AI backend is unreachable, or returns a reply
that cannot be parsed) raises this. A single description that merely
cannot be resolved is not an error -- it maps to the enum's UNKNOWN member.
"""
class ColumnClassifier(ABC, Generic[E]):
"""Port: resolves free-text descriptions into a category enum ``E``.
One classifier handles one landlord-CSV column. Implementations decide
*how* the mapping is performed (an LLM, a lookup table, a rules engine);
``LandlordDescriptionOverridesOrchestrator`` depends only on this interface.
"""
@abstractmethod
def classify(self, descriptions: set[str]) -> dict[str, E]:
"""Classify each description into a category enum member.
Every input description appears as a key in the result. A description
that cannot be resolved maps to the enum's UNKNOWN member.
Raises:
ClassificationError: If the classification call fails wholesale
(e.g. the backend is unreachable or returns an unparseable
response).
"""
...