diff --git a/domain/data_transformation/__init__.py b/domain/data_transformation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/column_classifier.py b/domain/data_transformation/column_classifier.py similarity index 76% rename from infrastructure/column_classifier.py rename to domain/data_transformation/column_classifier.py index b16fd8c0..adc88c6a 100644 --- a/infrastructure/column_classifier.py +++ b/domain/data_transformation/column_classifier.py @@ -16,14 +16,12 @@ class ClassificationError(Exception): """ -class BaseColumnClassifier(ABC, Generic[E]): - """Adapter base: shared scaffolding for concrete column classifiers. +class ColumnClassifier(ABC, Generic[E]): + """Port: resolves free-text descriptions into a category enum ``E``. One classifier handles one landlord-CSV column. Implementations decide - *how* the mapping is performed (an LLM, a lookup table, a rules engine). - Consumers do not depend on this class -- they depend on the structural - ``ColumnClassifier`` Protocol declared in the orchestration layer; this - ABC merely gives adapters a common base for the ``classify`` contract. + *how* the mapping is performed (an LLM, a lookup table, a rules engine); + ``LandlordDescriptionOverridesOrchestrator`` depends only on this interface. """ @abstractmethod diff --git a/infrastructure/chatgpt/chatgpt_column_classifier.py b/infrastructure/chatgpt/chatgpt_column_classifier.py index a42b9b4b..b782cf33 100644 --- a/infrastructure/chatgpt/chatgpt_column_classifier.py +++ b/infrastructure/chatgpt/chatgpt_column_classifier.py @@ -4,9 +4,9 @@ import json from enum import Enum from typing import Any, Optional, TypeVar -from infrastructure.column_classifier import ( - BaseColumnClassifier, +from domain.data_transformation.column_classifier import ( ClassificationError, + ColumnClassifier, ) from infrastructure.chatgpt.chatgpt import ChatGPT from infrastructure.chatgpt.exceptions import ChatGPTClientError @@ -14,7 +14,7 @@ from infrastructure.chatgpt.exceptions import ChatGPTClientError E = TypeVar("E", bound=Enum) -class ChatGptColumnClassifier(BaseColumnClassifier[E]): +class ChatGptColumnClassifier(ColumnClassifier[E]): """ColumnClassifier backed by ChatGPT, parametrised by a category enum. The same classification path -- prompt, JSON parsing, UNKNOWN fallback -- diff --git a/orchestration/classifiable_column.py b/orchestration/classifiable_column.py index b8aeb08a..92334a38 100644 --- a/orchestration/classifiable_column.py +++ b/orchestration/classifiable_column.py @@ -2,8 +2,9 @@ from __future__ import annotations from dataclasses import dataclass from enum import Enum -from typing import Generic, Protocol, TypeVar, runtime_checkable +from typing import Generic, TypeVar +from domain.data_transformation.column_classifier import ColumnClassifier from repositories.landlord_overrides.landlord_override_repository import ( LandlordOverrideRepository, ) @@ -11,26 +12,6 @@ from repositories.landlord_overrides.landlord_override_repository import ( E = TypeVar("E", bound=Enum) -@runtime_checkable -class ColumnClassifier(Protocol[E]): - """Port: resolves free-text descriptions into a category enum ``E``. - - The orchestration layer owns this contract because it is the consumer. - Any object exposing a matching ``classify`` satisfies it structurally -- - e.g. ``infrastructure.chatgpt.ChatGptColumnClassifier`` -- so orchestration - never imports an adapter. - """ - - def classify(self, descriptions: set[str]) -> dict[str, E]: - """Classify each description into a category enum member. - - Every input description appears as a key in the result. A description - that cannot be resolved maps to the enum's UNKNOWN member. A wholesale - failure raises the adapter's ``ClassificationError``. - """ - ... - - @dataclass(frozen=True) class ClassifiableColumn(Generic[E]): """Pairs a column's classifier with the repository that persists its results. diff --git a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py index 0462f3ce..425d2625 100644 --- a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py +++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py @@ -4,7 +4,7 @@ from typing import Optional import pytest -from infrastructure.column_classifier import ClassificationError +from domain.data_transformation.column_classifier import ClassificationError from domain.epc.property_type import PropertyType from domain.epc.wall_type import WallType from infrastructure.chatgpt.chatgpt import ChatGPT diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py index 1b9785c9..bf5b13ce 100644 --- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py +++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py @@ -8,7 +8,8 @@ from domain.epc.built_form_type import BuiltFormType from domain.epc.property_type import PropertyType from domain.epc.wall_type import WallType from domain.postcode import Postcode -from orchestration.classifiable_column import ClassifiableColumn, ColumnClassifier +from domain.data_transformation.column_classifier import ColumnClassifier +from orchestration.classifiable_column import ClassifiableColumn from orchestration.landlord_description_overrides_orchestrator import ( LandlordDescriptionOverridesOrchestrator, )