diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py index e2afb4bd..b5d22620 100644 --- a/applications/landlord_description_overrides/handler.py +++ b/applications/landlord_description_overrides/handler.py @@ -18,17 +18,20 @@ from infrastructure.chatgpt.chatgpt import ChatGPT from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier from infrastructure.postgres.config import PostgresConfig from infrastructure.postgres.engine import commit_scope, make_engine, make_session -from infrastructure.postgres.landlord_built_form_type_override_postgres_repository import ( - LandlordBuiltFormTypeOverridePostgresRepository, +from infrastructure.postgres.landlord_built_form_type_override_table import ( + LandlordBuiltFormTypeOverrideRow, ) -from infrastructure.postgres.landlord_property_type_override_postgres_repository import ( - LandlordPropertyTypeOverridePostgresRepository, +from infrastructure.postgres.landlord_overrides_postgres_repository import ( + LandlordOverridesRepository, ) -from infrastructure.postgres.landlord_roof_type_override_postgres_repository import ( - LandlordRoofTypeOverridePostgresRepository, +from infrastructure.postgres.landlord_property_type_override_table import ( + LandlordPropertyTypeOverrideRow, ) -from infrastructure.postgres.landlord_wall_type_override_postgres_repository import ( - LandlordWallTypeOverridePostgresRepository, +from infrastructure.postgres.landlord_roof_type_override_table import ( + LandlordRoofTypeOverrideRow, +) +from infrastructure.postgres.landlord_wall_type_override_table import ( + LandlordWallTypeOverrideRow, ) from infrastructure.s3.csv_s3_client import CsvS3Client from infrastructure.s3.s3_uri import parse_s3_uri @@ -62,7 +65,9 @@ def _build_columns( classifier=ChatGptColumnClassifier( chat_gpt, PropertyType, PropertyType.UNKNOWN ), - repo=LandlordPropertyTypeOverridePostgresRepository(session), + repo=LandlordOverridesRepository[PropertyType]( + session, LandlordPropertyTypeOverrideRow + ), ), "built_form_type": lambda src: ClassifiableColumn( name="built_form_type", @@ -70,7 +75,9 @@ def _build_columns( classifier=ChatGptColumnClassifier( chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN ), - repo=LandlordBuiltFormTypeOverridePostgresRepository(session), + repo=LandlordOverridesRepository[BuiltFormType]( + session, LandlordBuiltFormTypeOverrideRow + ), ), "wall_type": lambda src: ClassifiableColumn( name="wall_type", @@ -81,7 +88,9 @@ def _build_columns( WallType.UNKNOWN, extra_instructions=wall_type_construction_date_prompt_hint(), ), - repo=LandlordWallTypeOverridePostgresRepository(session), + repo=LandlordOverridesRepository[WallType]( + session, LandlordWallTypeOverrideRow + ), ), "roof_type": lambda src: ClassifiableColumn( name="roof_type", @@ -89,7 +98,9 @@ def _build_columns( classifier=ChatGptColumnClassifier( chat_gpt, RoofType, RoofType.UNKNOWN ), - repo=LandlordRoofTypeOverridePostgresRepository(session), + repo=LandlordOverridesRepository[RoofType]( + session, LandlordRoofTypeOverrideRow + ), ), } diff --git a/docs/adr/0003-python-writes-landlord-overrides-directly.md b/docs/adr/0003-python-writes-landlord-overrides-directly.md index ea0fda9b..84f9065a 100644 --- a/docs/adr/0003-python-writes-landlord-overrides-directly.md +++ b/docs/adr/0003-python-writes-landlord-overrides-directly.md @@ -20,7 +20,7 @@ The Model service (specifically `applications/landlord_description_overrides/han Transaction boundaries live in `infrastructure/postgres/engine.transactional_session` — a context manager that commits on clean exit and rolls back on exception. The application layer (`handler.py`) never calls `.commit()` or `.rollback()` itself; it only opens the context. Orchestration and repository code likewise never commits — keeping transaction semantics confined to one infrastructure helper. -The conflict policy lives in SQL and is identical for every adapter implementation: +The conflict policy lives in SQL and is identical for every override category. A single generic adapter, `LandlordOverridesRepository[E]`, implements it once; the target table is selected by the SQLModel `…Row` class passed at construction. Each category (property / built-form / wall / roof type) is that same adapter parameterised by its row class: ```sql INSERT INTO landlord_property_type_overrides (portfolio_id, description, value, source) @@ -59,7 +59,7 @@ The convention going forward is: - **Postgres adapter (concrete):** `infrastructure/postgres/_postgres_repository.py` - **SQLModel row class:** `infrastructure/postgres/_table.py` -The new `LandlordOverrideRepository` family follows this convention. +The `LandlordOverridesRepository` adapter follows this convention: the concrete class lives at `infrastructure/postgres/landlord_overrides_postgres_repository.py`, with one `…_table.py` per category alongside it. The `…Row` classes stay one-per-table — each mirrors a genuinely distinct Drizzle table and `value` pgEnum, so they are schema mirrors, not duplicated logic. **Existing outliers to relocate in a follow-up:** @@ -71,7 +71,7 @@ Both moves are mechanical (import-path updates only). They are intentionally out ## Out of scope (deferred to follow-up work) - Relocating `task_postgres_repository.py` and `subtask_postgres_repository.py` into `infrastructure/postgres/` per the convention above. -- Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the two adapters' 95%-identical bodies are kept side-by-side for direct comparison. +- ~~Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the per-category adapters' 95%-identical bodies are kept side-by-side for direct comparison.~~ **Done.** The per-category adapter bodies were byte-identical (varying only in their row class), so they were consolidated into one generic `LandlordOverridesRepository[E]` parameterised by row class rather than waiting for a third column. - Switching `applications/landlord_description_overrides/handler.py` to acquire its `Session` via a `@subtask_handler()`-style decorator instead of building its own engine. - A cross-repo PR amending ADR-0002 to point at this ADR. - A CI check (or codegen) that diffs the Drizzle pgEnum literals against the Python `Enum.value` strings. diff --git a/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py b/infrastructure/postgres/landlord_overrides_postgres_repository.py similarity index 52% rename from infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py rename to infrastructure/postgres/landlord_overrides_postgres_repository.py index aec4ea4d..d561a492 100644 --- a/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py +++ b/infrastructure/postgres/landlord_overrides_postgres_repository.py @@ -1,42 +1,60 @@ -"""Postgres adapter for ``LandlordOverrideRepository[BuiltFormType]``. +"""One Postgres adapter for every ``landlord__overrides`` table. -Writes to ``landlord_built_form_type_overrides`` (Drizzle-managed; mirrored by -``LandlordBuiltFormTypeOverrideRow``). The conflict policy lives in the SQL -- -see ADR-0003 §Decision. Shape mirrors -``LandlordPropertyTypeOverridePostgresRepository``; the duplication is -deliberate while there are only three columns -- if a fourth lands and the -duplication becomes painful, extract a shared upsert helper then. +The four override categories (property / built-form / wall / roof type) share +an identical write path: the same ``(portfolio_id, description) -> value`` row +shape and the same source-aware conflict policy (ADR-0003 §Decision fixes one +policy for all of them). The only thing that varies per category is the target +table, which is selected by the SQLModel row class handed to the constructor. + +So a single generic adapter serves all four -- callers parameterise it by the +category enum ``E`` and pass the matching ``…Row`` class: + + LandlordOverridesRepository[PropertyType](session, LandlordPropertyTypeOverrideRow) + +Per ADR-0003 §File layout, Postgres adapters live in ``infrastructure/postgres/``. """ from __future__ import annotations from datetime import datetime, timezone -from typing import cast +from enum import Enum +from typing import TypeVar, cast from sqlalchemy import Table from sqlalchemy.dialects.postgresql import insert as pg_insert -from sqlmodel import Session +from sqlmodel import Session, SQLModel -from domain.epc.built_form_type import BuiltFormType -from infrastructure.postgres.landlord_built_form_type_override_table import ( - LandlordBuiltFormTypeOverrideRow, -) from infrastructure.postgres.landlord_override_enums import OverrideSource from repositories.landlord_overrides.landlord_override_repository import ( LandlordOverrideRepository, ) +E = TypeVar("E", bound=Enum) -class LandlordBuiltFormTypeOverridePostgresRepository( - LandlordOverrideRepository[BuiltFormType] -): - def __init__(self, session: Session) -> None: + +class LandlordOverridesRepository(LandlordOverrideRepository[E]): + """Writes classifier overrides to the ``landlord__overrides`` table + backing ``row_type``. + + ``row_type`` is the SQLModel mirror of the target table (e.g. + ``LandlordPropertyTypeOverrideRow``); its category enum must match the type + argument ``E`` the caller binds. The pairing is a convention, not a type + constraint -- the row classes are not generic over their value enum -- so + keep the two in step at the call site. + """ + + def __init__(self, session: Session, row_type: type[SQLModel]) -> None: self._session = session + # SQLModel's class-level ``__table__`` is injected at runtime on + # ``table=True`` classes but isn't exposed by the stubs; pin it to + # ``Table`` via ``getattr`` so the dialect insert helper carries + # through with strict types. + self._table: Table = cast(Table, getattr(row_type, "__table__")) def upsert_all( self, portfolio_id: int, - descriptions_to_values: dict[str, BuiltFormType], + descriptions_to_values: dict[str, E], ) -> None: if not descriptions_to_values: return @@ -54,14 +72,7 @@ class LandlordBuiltFormTypeOverridePostgresRepository( for description, value in descriptions_to_values.items() ] - # SQLModel's class-level ``__table__`` is injected at runtime on - # ``table=True`` classes but isn't exposed by the stubs; pin it to - # ``Table`` via ``getattr`` so the dialect insert helper below - # carries through with strict types. - table: Table = cast( - Table, getattr(LandlordBuiltFormTypeOverrideRow, "__table__") - ) - stmt = pg_insert(table).values(rows) + stmt = pg_insert(self._table).values(rows) # The classifier may refresh its own past output, but must never # overwrite a user correction -- the ``WHERE existing.source = @@ -73,7 +84,7 @@ class LandlordBuiltFormTypeOverridePostgresRepository( "source": stmt.excluded.source, "updated_at": stmt.excluded.updated_at, }, - where=table.c.source == OverrideSource.CLASSIFIER, + where=self._table.c.source == OverrideSource.CLASSIFIER, ) # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the diff --git a/infrastructure/postgres/landlord_property_type_override_postgres_repository.py b/infrastructure/postgres/landlord_property_type_override_postgres_repository.py deleted file mode 100644 index 3cd7dbb2..00000000 --- a/infrastructure/postgres/landlord_property_type_override_postgres_repository.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Postgres adapter for ``LandlordOverrideRepository[PropertyType]``. - -Writes to ``landlord_property_type_overrides`` (Drizzle-managed; mirrored by -``LandlordPropertyTypeOverrideRow``). The conflict policy lives in the SQL -- -see ADR-0003 §Decision. - -Per the convention this ADR fixes, Postgres adapters live in -``infrastructure/postgres/``. The existing ``task_postgres_repository.py`` / -``subtask_postgres_repository.py`` are outliers still under ``repositories/``; -relocating them is tracked as a follow-up in ADR-0003 §"File layout". -""" - -from __future__ import annotations - -from datetime import datetime, timezone -from typing import cast - -from sqlalchemy import Table -from sqlalchemy.dialects.postgresql import insert as pg_insert -from sqlmodel import Session - -from domain.epc.property_type import PropertyType -from infrastructure.postgres.landlord_override_enums import OverrideSource -from infrastructure.postgres.landlord_property_type_override_table import ( - LandlordPropertyTypeOverrideRow, -) -from repositories.landlord_overrides.landlord_override_repository import ( - LandlordOverrideRepository, -) - - -class LandlordPropertyTypeOverridePostgresRepository( - LandlordOverrideRepository[PropertyType] -): - def __init__(self, session: Session) -> None: - self._session = session - - def upsert_all( - self, - portfolio_id: int, - descriptions_to_values: dict[str, PropertyType], - ) -> None: - if not descriptions_to_values: - return - - now = datetime.now(timezone.utc) - rows = [ - { - "portfolio_id": portfolio_id, - "description": description, - "value": value.value, - "source": OverrideSource.CLASSIFIER, - "created_at": now, - "updated_at": now, - } - for description, value in descriptions_to_values.items() - ] - - # SQLModel's class-level ``__table__`` is injected at runtime on - # ``table=True`` classes but isn't exposed by the stubs; pin it to - # ``Table`` via ``getattr`` so the dialect insert helper below - # carries through with strict types. - table: Table = cast(Table, getattr(LandlordPropertyTypeOverrideRow, "__table__")) - stmt = pg_insert(table).values(rows) - - # The classifier may refresh its own past output, but must never - # overwrite a user correction -- the ``WHERE existing.source = - # 'classifier'`` guard enforces that. See ADR-0003 §Decision. - stmt = stmt.on_conflict_do_update( - index_elements=["portfolio_id", "description"], - set_={ - "value": stmt.excluded.value, - "source": stmt.excluded.source, - "updated_at": stmt.excluded.updated_at, - }, - where=table.c.source == OverrideSource.CLASSIFIER, - ) - - # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the - # overload signatures is marked deprecated in stubs, which fires - # here even though our INSERT path is the supported one. - self._session.execute(stmt) # pyright: ignore[reportDeprecated] diff --git a/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py b/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py deleted file mode 100644 index c3f263a9..00000000 --- a/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Postgres adapter for ``LandlordOverrideRepository[RoofType]``. - -Writes to ``landlord_roof_type_overrides`` (Drizzle-managed; mirrored by -``LandlordRoofTypeOverrideRow``). The conflict policy lives in the SQL -- -see ADR-0003 §Decision. Shape mirrors -``LandlordPropertyTypeOverridePostgresRepository``; the duplication is -deliberate while there are only a handful of override columns -- if the -duplication becomes painful, extract a shared upsert helper then. -""" - -from __future__ import annotations - -from datetime import datetime, timezone -from typing import cast - -from sqlalchemy import Table -from sqlalchemy.dialects.postgresql import insert as pg_insert -from sqlmodel import Session - -from domain.epc.roof_type import RoofType -from infrastructure.postgres.landlord_override_enums import OverrideSource -from infrastructure.postgres.landlord_roof_type_override_table import ( - LandlordRoofTypeOverrideRow, -) -from repositories.landlord_overrides.landlord_override_repository import ( - LandlordOverrideRepository, -) - - -class LandlordRoofTypeOverridePostgresRepository( - LandlordOverrideRepository[RoofType] -): - def __init__(self, session: Session) -> None: - self._session = session - - def upsert_all( - self, - portfolio_id: int, - descriptions_to_values: dict[str, RoofType], - ) -> None: - if not descriptions_to_values: - return - - now = datetime.now(timezone.utc) - rows = [ - { - "portfolio_id": portfolio_id, - "description": description, - "value": value.value, - "source": OverrideSource.CLASSIFIER, - "created_at": now, - "updated_at": now, - } - for description, value in descriptions_to_values.items() - ] - - # SQLModel's class-level ``__table__`` is injected at runtime on - # ``table=True`` classes but isn't exposed by the stubs; pin it to - # ``Table`` via ``getattr`` so the dialect insert helper below - # carries through with strict types. - table: Table = cast(Table, getattr(LandlordRoofTypeOverrideRow, "__table__")) - stmt = pg_insert(table).values(rows) - - # The classifier may refresh its own past output, but must never - # overwrite a user correction -- the ``WHERE existing.source = - # 'classifier'`` guard enforces that. See ADR-0003 §Decision. - stmt = stmt.on_conflict_do_update( - index_elements=["portfolio_id", "description"], - set_={ - "value": stmt.excluded.value, - "source": stmt.excluded.source, - "updated_at": stmt.excluded.updated_at, - }, - where=table.c.source == OverrideSource.CLASSIFIER, - ) - - # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the - # overload signatures is marked deprecated in stubs, which fires - # here even though our INSERT path is the supported one. - self._session.execute(stmt) # pyright: ignore[reportDeprecated] diff --git a/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py b/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py deleted file mode 100644 index 711e5c30..00000000 --- a/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Postgres adapter for ``LandlordOverrideRepository[WallType]``. - -Writes to ``landlord_wall_type_overrides`` (Drizzle-managed; mirrored by -``LandlordWallTypeOverrideRow``). The conflict policy lives in the SQL -- -see ADR-0003 §Decision. Shape mirrors -``LandlordPropertyTypeOverridePostgresRepository``; the duplication is -deliberate while there are only two columns -- if a third lands and the -duplication becomes painful, extract a shared upsert helper then. -""" - -from __future__ import annotations - -from datetime import datetime, timezone -from typing import cast - -from sqlalchemy import Table -from sqlalchemy.dialects.postgresql import insert as pg_insert -from sqlmodel import Session - -from domain.epc.wall_type import WallType -from infrastructure.postgres.landlord_override_enums import OverrideSource -from infrastructure.postgres.landlord_wall_type_override_table import ( - LandlordWallTypeOverrideRow, -) -from repositories.landlord_overrides.landlord_override_repository import ( - LandlordOverrideRepository, -) - - -class LandlordWallTypeOverridePostgresRepository( - LandlordOverrideRepository[WallType] -): - def __init__(self, session: Session) -> None: - self._session = session - - def upsert_all( - self, - portfolio_id: int, - descriptions_to_values: dict[str, WallType], - ) -> None: - if not descriptions_to_values: - return - - now = datetime.now(timezone.utc) - rows = [ - { - "portfolio_id": portfolio_id, - "description": description, - "value": value.value, - "source": OverrideSource.CLASSIFIER, - "created_at": now, - "updated_at": now, - } - for description, value in descriptions_to_values.items() - ] - - # SQLModel's class-level ``__table__`` is injected at runtime on - # ``table=True`` classes but isn't exposed by the stubs; pin it to - # ``Table`` via ``getattr`` so the dialect insert helper below - # carries through with strict types. - table: Table = cast(Table, getattr(LandlordWallTypeOverrideRow, "__table__")) - stmt = pg_insert(table).values(rows) - - # The classifier may refresh its own past output, but must never - # overwrite a user correction -- the ``WHERE existing.source = - # 'classifier'`` guard enforces that. See ADR-0003 §Decision. - stmt = stmt.on_conflict_do_update( - index_elements=["portfolio_id", "description"], - set_={ - "value": stmt.excluded.value, - "source": stmt.excluded.source, - "updated_at": stmt.excluded.updated_at, - }, - where=table.c.source == OverrideSource.CLASSIFIER, - ) - - # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the - # overload signatures is marked deprecated in stubs, which fires - # here even though our INSERT path is the supported one. - self._session.execute(stmt) # pyright: ignore[reportDeprecated] diff --git a/repositories/landlord_overrides/landlord_override_repository.py b/repositories/landlord_overrides/landlord_override_repository.py index 47e873fe..df1c55a7 100644 --- a/repositories/landlord_overrides/landlord_override_repository.py +++ b/repositories/landlord_overrides/landlord_override_repository.py @@ -15,8 +15,10 @@ class LandlordOverrideRepository(ABC, Generic[E]): which table the adapter writes to; the orchestrator depends only on this interface and never names a concrete table. - Concrete adapters live in ``infrastructure/`` (see ADR-0003): for example - ``infrastructure/postgres/landlord_property_type_override_postgres_repository.py``. + A single concrete adapter, + ``infrastructure/postgres/landlord_overrides_postgres_repository.LandlordOverridesRepository``, + serves every category (see ADR-0003) -- it is parameterised by the + SQLModel row class for the target table. """ @abstractmethod diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py new file mode 100644 index 00000000..31b9df33 --- /dev/null +++ b/tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py @@ -0,0 +1,220 @@ +"""Integration tests for the consolidated landlord-overrides upsert adapter. + +``LandlordOverridesRepository`` serves every ``landlord__overrides`` +table; the table is selected by the ``…Row`` class passed at construction. The +source-aware conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT +... DO UPDATE ... WHERE existing.source = 'classifier'``), so the only way to +verify it correctly distinguishes ``EXCLUDED.source`` from the qualified +``.source`` is against a real Postgres -- the ``db_engine`` fixture in +``tests/conftest.py`` spins one up per test. + +Each scenario is parametrised across two distinct categories (property type and +wall type). Running the shared body against two different ``…Row`` classes +proves the adapter writes to whichever table it was handed -- not just that one +table happens to work. +""" + +from __future__ import annotations + +from collections.abc import Iterator +from dataclasses import dataclass +from enum import Enum +from typing import cast + +import pytest +from sqlalchemy import Engine, Table +from sqlmodel import Session, SQLModel, select + +from domain.epc.property_type import PropertyType +from domain.epc.wall_type import WallType +from infrastructure.postgres.landlord_override_enums import OverrideSource +from infrastructure.postgres.landlord_overrides_postgres_repository import ( + LandlordOverridesRepository, +) +from infrastructure.postgres.landlord_property_type_override_table import ( + LandlordPropertyTypeOverrideRow, +) +from infrastructure.postgres.landlord_wall_type_override_table import ( + LandlordWallTypeOverrideRow, +) + + +@dataclass(frozen=True) +class Category: + """One ``landlord__overrides`` table plus a few of its enum values. + + ``first``/``second`` are two distinct classifier values for re-upsert + tests; ``user_correction`` stands in for a value only a user would pick; + ``unknown`` is the category's ``UNKNOWN`` member. + """ + + id: str + row_type: type[SQLModel] + first: Enum + second: Enum + user_correction: Enum + unknown: Enum + + +CATEGORIES = [ + Category( + id="property_type", + row_type=LandlordPropertyTypeOverrideRow, + first=PropertyType.FLAT, + second=PropertyType.HOUSE, + user_correction=PropertyType.BUNGALOW, + unknown=PropertyType.UNKNOWN, + ), + Category( + id="wall_type", + row_type=LandlordWallTypeOverrideRow, + first=WallType.CAVITY_FILLED, + second=WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED, + user_correction=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED, + unknown=WallType.UNKNOWN, + ), +] + + +@pytest.fixture +def session(db_engine: Engine) -> Iterator[Session]: + with Session(db_engine) as s: + yield s + + +@pytest.fixture(params=CATEGORIES, ids=lambda c: c.id) +def category(request: pytest.FixtureRequest) -> Category: + return request.param + + +def _select_row( + session: Session, row_type: type[SQLModel], portfolio_id: int, description: str +) -> SQLModel: + # The row class is only known as ``type[SQLModel]`` here, so its column + # descriptors aren't typed; filter via the Core ``Table`` columns instead. + table = cast(Table, getattr(row_type, "__table__")) + rows = session.exec( + select(row_type).where( + table.c.portfolio_id == portfolio_id, + table.c.description == description, + ) + ).all() + assert len(rows) == 1, f"expected exactly one row, got {len(rows)}" + return rows[0] + + +def test_inserts_a_fresh_row_with_source_classifier( + session: Session, category: Category +) -> None: + # arrange + repo = LandlordOverridesRepository[Enum](session, category.row_type) + + # act + repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first}) + session.commit() + + # assert + row = _select_row(session, category.row_type, portfolio_id=1, description="cosy") + assert getattr(row, "value") is category.first + assert getattr(row, "source") == OverrideSource.CLASSIFIER + + +def test_reupsert_overwrites_a_classifier_row( + session: Session, category: Category +) -> None: + # arrange: a stale classifier row exists. + repo = LandlordOverridesRepository[Enum](session, category.row_type) + repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first}) + session.commit() + + # act: re-classify with a different value. + repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second}) + session.commit() + + # assert: the new classification wins. + row = _select_row(session, category.row_type, portfolio_id=1, description="cosy") + assert getattr(row, "value") is category.second + assert getattr(row, "source") == OverrideSource.CLASSIFIER + + +def test_reupsert_does_not_overwrite_a_user_row( + session: Session, category: Category +) -> None: + # arrange: a user has corrected the row. The classifier path never produces + # ``source = 'user'``; we install the row directly to mimic the override + # frontend. + user_row = category.row_type( + portfolio_id=1, + description="cosy", + value=category.user_correction, + source=OverrideSource.USER, + ) + session.add(user_row) + session.commit() + + # act: the classifier re-runs and tries to reclassify the same description. + # Under the source-aware conflict policy, this must be silently skipped -- + # user edits beat classifier reruns. + repo = LandlordOverridesRepository[Enum](session, category.row_type) + repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second}) + session.commit() + + # assert: the user row is unchanged. + row = _select_row(session, category.row_type, portfolio_id=1, description="cosy") + assert getattr(row, "value") is category.user_correction + assert getattr(row, "source") == OverrideSource.USER + + +def test_upsert_keeps_other_portfolios_descriptions_independent( + session: Session, category: Category +) -> None: + # arrange / act: the unique key is ``(portfolio_id, description)``, so the + # same description for two different portfolios must coexist as two rows. + repo = LandlordOverridesRepository[Enum](session, category.row_type) + repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first}) + repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": category.second}) + session.commit() + + # assert: both rows survive with their own values. + assert ( + getattr(_select_row(session, category.row_type, 1, "cosy"), "value") + is category.first + ) + assert ( + getattr(_select_row(session, category.row_type, 2, "cosy"), "value") + is category.second + ) + + +def test_upsert_persists_unknown_so_a_user_can_resolve_it_later( + session: Session, category: Category +) -> None: + # arrange / act: a description the classifier couldn't resolve still lands + # -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user override can + # upgrade it to a real value. + repo = LandlordOverridesRepository[Enum](session, category.row_type) + repo.upsert_all( + portfolio_id=1, + descriptions_to_values={"unparseable nonsense": category.unknown}, + ) + session.commit() + + # assert: the row exists with value=UNKNOWN, source=classifier. + row = _select_row( + session, category.row_type, portfolio_id=1, description="unparseable nonsense" + ) + assert getattr(row, "value") is category.unknown + assert getattr(row, "source") == OverrideSource.CLASSIFIER + + +def test_upsert_all_with_empty_mapping_is_a_no_op( + session: Session, category: Category +) -> None: + # arrange / act + repo = LandlordOverridesRepository[Enum](session, category.row_type) + repo.upsert_all(portfolio_id=1, descriptions_to_values={}) + session.commit() + + # assert: nothing was inserted. + rows = session.exec(select(category.row_type)).all() + assert rows == [] diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py deleted file mode 100644 index c2b81293..00000000 --- a/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Integration tests for the source-aware upsert policy. - -The conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT -... DO UPDATE ... WHERE existing.source = 'classifier'``). The only way to -verify it correctly distinguishes ``EXCLUDED.source`` from the qualified -``landlord_property_type_overrides.source`` is against a real Postgres -- -the ``db_engine`` fixture in ``tests/conftest.py`` spins one up per test. -""" - -from __future__ import annotations - -from collections.abc import Iterator - -import pytest -from sqlalchemy import Engine -from sqlmodel import Session, select - -from domain.epc.property_type import PropertyType -from infrastructure.postgres.landlord_override_enums import OverrideSource -from infrastructure.postgres.landlord_property_type_override_postgres_repository import ( - LandlordPropertyTypeOverridePostgresRepository, -) -from infrastructure.postgres.landlord_property_type_override_table import ( - LandlordPropertyTypeOverrideRow, -) - - -@pytest.fixture -def session(db_engine: Engine) -> Iterator[Session]: - with Session(db_engine) as s: - yield s - - -def _select_row( - session: Session, portfolio_id: int, description: str -) -> LandlordPropertyTypeOverrideRow: - rows = session.exec( - select(LandlordPropertyTypeOverrideRow).where( - LandlordPropertyTypeOverrideRow.portfolio_id == portfolio_id, - LandlordPropertyTypeOverrideRow.description == description, - ) - ).all() - assert len(rows) == 1, f"expected exactly one row, got {len(rows)}" - return rows[0] - - -def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None: - # arrange - repo = LandlordPropertyTypeOverridePostgresRepository(session) - - # act - repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE}) - session.commit() - - # assert - row = _select_row(session, portfolio_id=1, description="cosy") - assert row.value is PropertyType.HOUSE - assert row.source == OverrideSource.CLASSIFIER - - -def test_reupsert_overwrites_a_classifier_row(session: Session) -> None: - # arrange: a stale classifier row exists. - repo = LandlordPropertyTypeOverridePostgresRepository(session) - repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.FLAT}) - session.commit() - - # act: re-classify with a different category. - repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE}) - session.commit() - - # assert: the new classification wins. - row = _select_row(session, portfolio_id=1, description="cosy") - assert row.value is PropertyType.HOUSE - assert row.source == OverrideSource.CLASSIFIER - - -def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None: - # arrange: a user has corrected the row to ``BUNGALOW``. The classifier - # path never produces ``source = 'user'``; we install the row directly - # to mimic the override frontend. - user_row = LandlordPropertyTypeOverrideRow( - portfolio_id=1, - description="cosy", - value=PropertyType.BUNGALOW, - source=OverrideSource.USER, - ) - session.add(user_row) - session.commit() - - # act: the classifier re-runs and tries to classify the same description - # as a ``HOUSE``. Under the source-aware conflict policy, this must be - # silently skipped -- user edits beat classifier reruns. - repo = LandlordPropertyTypeOverridePostgresRepository(session) - repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE}) - session.commit() - - # assert: the user row is unchanged. - row = _select_row(session, portfolio_id=1, description="cosy") - assert row.value is PropertyType.BUNGALOW - assert row.source == OverrideSource.USER - - -def test_upsert_keeps_other_portfolios_descriptions_independent( - session: Session, -) -> None: - # arrange: the unique key is ``(portfolio_id, description)``, so the same - # description for two different portfolios must coexist as two rows. - repo = LandlordPropertyTypeOverridePostgresRepository(session) - - # act - repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE}) - repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": PropertyType.FLAT}) - session.commit() - - # assert: both rows survive with their own values. - assert _select_row(session, 1, "cosy").value is PropertyType.HOUSE - assert _select_row(session, 2, "cosy").value is PropertyType.FLAT - - -def test_upsert_persists_unknown_so_a_user_can_resolve_it_later( - session: Session, -) -> None: - # arrange / act: a description the classifier couldn't resolve still - # lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user - # override can upgrade it to a real value. - repo = LandlordPropertyTypeOverridePostgresRepository(session) - repo.upsert_all( - portfolio_id=1, - descriptions_to_values={"unparseable nonsense": PropertyType.UNKNOWN}, - ) - session.commit() - - # assert: the row exists with value=UNKNOWN, source=classifier. - row = _select_row(session, portfolio_id=1, description="unparseable nonsense") - assert row.value is PropertyType.UNKNOWN - assert row.source == OverrideSource.CLASSIFIER - - -def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None: - # arrange / act - repo = LandlordPropertyTypeOverridePostgresRepository(session) - repo.upsert_all(portfolio_id=1, descriptions_to_values={}) - session.commit() - - # assert: nothing was inserted. - rows = session.exec(select(LandlordPropertyTypeOverrideRow)).all() - assert rows == [] diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py deleted file mode 100644 index 9504a520..00000000 --- a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py +++ /dev/null @@ -1,158 +0,0 @@ -"""Integration tests for the source-aware upsert policy on the WallType table. - -Mirror of ``test_landlord_property_type_override_postgres_repository.py`` -- -the SQL is structurally identical, but the conflict policy lives in two -separate concrete adapters and so warrants two parallel test suites until -(if) the adapters are factored through a shared upsert helper. -""" - -from __future__ import annotations - -from collections.abc import Iterator - -import pytest -from sqlalchemy import Engine -from sqlmodel import Session, select - -from domain.epc.wall_type import WallType -from infrastructure.postgres.landlord_override_enums import OverrideSource -from infrastructure.postgres.landlord_wall_type_override_postgres_repository import ( - LandlordWallTypeOverridePostgresRepository, -) -from infrastructure.postgres.landlord_wall_type_override_table import ( - LandlordWallTypeOverrideRow, -) - - -@pytest.fixture -def session(db_engine: Engine) -> Iterator[Session]: - with Session(db_engine) as s: - yield s - - -def _select_row( - session: Session, portfolio_id: int, description: str -) -> LandlordWallTypeOverrideRow: - rows = session.exec( - select(LandlordWallTypeOverrideRow).where( - LandlordWallTypeOverrideRow.portfolio_id == portfolio_id, - LandlordWallTypeOverrideRow.description == description, - ) - ).all() - assert len(rows) == 1, f"expected exactly one row, got {len(rows)}" - return rows[0] - - -def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None: - # arrange - repo = LandlordWallTypeOverridePostgresRepository(session) - - # act - repo.upsert_all( - portfolio_id=1, descriptions_to_values={"cavity insulated": WallType.CAVITY_FILLED} - ) - session.commit() - - # assert - row = _select_row(session, portfolio_id=1, description="cavity insulated") - assert row.value is WallType.CAVITY_FILLED - assert row.source == OverrideSource.CLASSIFIER - - -def test_reupsert_overwrites_a_classifier_row(session: Session) -> None: - # arrange: a stale classifier row exists. - repo = LandlordWallTypeOverridePostgresRepository(session) - repo.upsert_all( - portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED} - ) - session.commit() - - # act: re-classify with a different category. - repo.upsert_all( - portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED} - ) - session.commit() - - # assert: the new classification wins. - row = _select_row(session, portfolio_id=1, description="old red brick") - assert row.value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED - assert row.source == OverrideSource.CLASSIFIER - - -def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None: - # arrange: a user has corrected the row to ``SANDSTONE``. The classifier - # path never produces ``source = 'user'``; we install the row directly - # to mimic the override frontend. - user_row = LandlordWallTypeOverrideRow( - portfolio_id=1, - description="old red brick", - value=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED, - source=OverrideSource.USER, - ) - session.add(user_row) - session.commit() - - # act: the classifier re-runs and tries to classify the same description - # as ``SOLID_BRICK``. Under the source-aware conflict policy, this must - # be silently skipped -- user edits beat classifier reruns. - repo = LandlordWallTypeOverridePostgresRepository(session) - repo.upsert_all( - portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED} - ) - session.commit() - - # assert: the user row is unchanged. - row = _select_row(session, portfolio_id=1, description="old red brick") - assert row.value is WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED - assert row.source == OverrideSource.USER - - -def test_upsert_keeps_other_portfolios_descriptions_independent( - session: Session, -) -> None: - # arrange / act: the unique key is ``(portfolio_id, description)``, so the - # same description for two different portfolios must coexist as two rows. - repo = LandlordWallTypeOverridePostgresRepository(session) - repo.upsert_all( - portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED} - ) - repo.upsert_all( - portfolio_id=2, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED} - ) - session.commit() - - # assert: both rows survive with their own values. - assert _select_row(session, 1, "old red brick").value is WallType.CAVITY_FILLED - assert _select_row(session, 2, "old red brick").value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED - - -def test_upsert_persists_unknown_so_a_user_can_resolve_it_later( - session: Session, -) -> None: - # arrange / act: a description the classifier couldn't resolve still - # lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user - # override can upgrade it to a real value. - repo = LandlordWallTypeOverridePostgresRepository(session) - repo.upsert_all( - portfolio_id=1, - descriptions_to_values={"unparseable wall description": WallType.UNKNOWN}, - ) - session.commit() - - # assert: the row exists with value=UNKNOWN, source=classifier. - row = _select_row( - session, portfolio_id=1, description="unparseable wall description" - ) - assert row.value is WallType.UNKNOWN - assert row.source == OverrideSource.CLASSIFIER - - -def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None: - # arrange / act - repo = LandlordWallTypeOverridePostgresRepository(session) - repo.upsert_all(portfolio_id=1, descriptions_to_values={}) - session.commit() - - # assert: nothing was inserted. - rows = session.exec(select(LandlordWallTypeOverrideRow)).all() - assert rows == []