Model/orchestration/classifiable_column.py
2026-06-01 14:30:09 +00:00

56 lines
2.1 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from typing import Generic, Protocol, TypeVar, runtime_checkable
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
E = TypeVar("E", bound=Enum)
@runtime_checkable
class ColumnClassifier(Protocol[E]):
"""Port: resolves free-text descriptions into a category enum ``E``.
The orchestration layer owns this contract because it is the consumer.
Any object exposing a matching ``classify`` satisfies it structurally --
e.g. ``infrastructure.chatgpt.ChatGptColumnClassifier`` -- so orchestration
never imports an adapter.
"""
def classify(self, descriptions: set[str]) -> dict[str, E]:
"""Classify each description into a category enum member.
Every input description appears as a key in the result. A description
that cannot be resolved maps to the enum's UNKNOWN member. A wholesale
failure raises the adapter's ``ClassificationError``.
"""
...
@dataclass(frozen=True)
class ClassifiableColumn(Generic[E]):
"""Pairs a column's classifier with the repository that persists its results.
The orchestrator registers one ``ClassifiableColumn`` per
(source column, target enum) pair. Bundling the classifier and the
repository together makes the "this enum lands in this table" invariant
structural -- the handler can no longer wire ``PropertyType``
classifications to a ``WallType`` repo by keying two dicts with the same
string.
``source_column`` is the landlord-CSV header to read from; ``name`` is the
unique key the orchestrator uses to report this classification's results
(and the key the handler logs). Two ``ClassifiableColumn``s may share a
``source_column`` -- e.g. the ``"Property Type"`` CSV column feeds both
``PropertyType`` and ``BuiltFormType`` classifiers off the same free-text
description -- but each must have a unique ``name``.
"""
name: str
source_column: str
classifier: ColumnClassifier[E]
repo: LandlordOverrideRepository[E]