Model/orchestration/classifiable_column.py

37 lines
1.4 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from typing import Generic, TypeVar
from domain.data_transformation.column_classifier import ColumnClassifier
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
E = TypeVar("E", bound=Enum)
@dataclass(frozen=True)
class ClassifiableColumn(Generic[E]):
"""Pairs a column's classifier with the repository that persists its results.
The orchestrator registers one ``ClassifiableColumn`` per
(source column, target enum) pair. Bundling the classifier and the
repository together makes the "this enum lands in this table" invariant
structural -- the handler can no longer wire ``PropertyType``
classifications to a ``WallType`` repo by keying two dicts with the same
string.
``source_column`` is the landlord-CSV header to read from; ``name`` is the
unique key the orchestrator uses to report this classification's results
(and the key the handler logs). Two ``ClassifiableColumn``s may share a
``source_column`` -- e.g. the ``"Property Type"`` CSV column feeds both
``PropertyType`` and ``BuiltFormType`` classifiers off the same free-text
description -- but each must have a unique ``name``.
"""
name: str
source_column: str
classifier: ColumnClassifier[E]
repo: LandlordOverrideRepository[E]