landlord overrids moved into one repo

This commit is contained in:
Jun-te Kim 2026-06-01 15:16:23 +00:00
parent 3845ac10b0
commit 8a9d14a45c
10 changed files with 288 additions and 591 deletions

View file

@ -18,17 +18,20 @@ from infrastructure.chatgpt.chatgpt import ChatGPT
from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
from infrastructure.postgres.config import PostgresConfig
from infrastructure.postgres.engine import commit_scope, make_engine, make_session
from infrastructure.postgres.landlord_built_form_type_override_postgres_repository import (
LandlordBuiltFormTypeOverridePostgresRepository,
from infrastructure.postgres.landlord_built_form_type_override_table import (
LandlordBuiltFormTypeOverrideRow,
)
from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
LandlordPropertyTypeOverridePostgresRepository,
from infrastructure.postgres.landlord_overrides_postgres_repository import (
LandlordOverridesRepository,
)
from infrastructure.postgres.landlord_roof_type_override_postgres_repository import (
LandlordRoofTypeOverridePostgresRepository,
from infrastructure.postgres.landlord_property_type_override_table import (
LandlordPropertyTypeOverrideRow,
)
from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
LandlordWallTypeOverridePostgresRepository,
from infrastructure.postgres.landlord_roof_type_override_table import (
LandlordRoofTypeOverrideRow,
)
from infrastructure.postgres.landlord_wall_type_override_table import (
LandlordWallTypeOverrideRow,
)
from infrastructure.s3.csv_s3_client import CsvS3Client
from infrastructure.s3.s3_uri import parse_s3_uri
@ -62,7 +65,9 @@ def _build_columns(
classifier=ChatGptColumnClassifier(
chat_gpt, PropertyType, PropertyType.UNKNOWN
),
repo=LandlordPropertyTypeOverridePostgresRepository(session),
repo=LandlordOverridesRepository[PropertyType](
session, LandlordPropertyTypeOverrideRow
),
),
"built_form_type": lambda src: ClassifiableColumn(
name="built_form_type",
@ -70,7 +75,9 @@ def _build_columns(
classifier=ChatGptColumnClassifier(
chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN
),
repo=LandlordBuiltFormTypeOverridePostgresRepository(session),
repo=LandlordOverridesRepository[BuiltFormType](
session, LandlordBuiltFormTypeOverrideRow
),
),
"wall_type": lambda src: ClassifiableColumn(
name="wall_type",
@ -81,7 +88,9 @@ def _build_columns(
WallType.UNKNOWN,
extra_instructions=wall_type_construction_date_prompt_hint(),
),
repo=LandlordWallTypeOverridePostgresRepository(session),
repo=LandlordOverridesRepository[WallType](
session, LandlordWallTypeOverrideRow
),
),
"roof_type": lambda src: ClassifiableColumn(
name="roof_type",
@ -89,7 +98,9 @@ def _build_columns(
classifier=ChatGptColumnClassifier(
chat_gpt, RoofType, RoofType.UNKNOWN
),
repo=LandlordRoofTypeOverridePostgresRepository(session),
repo=LandlordOverridesRepository[RoofType](
session, LandlordRoofTypeOverrideRow
),
),
}

View file

@ -20,7 +20,7 @@ The Model service (specifically `applications/landlord_description_overrides/han
Transaction boundaries live in `infrastructure/postgres/engine.transactional_session` — a context manager that commits on clean exit and rolls back on exception. The application layer (`handler.py`) never calls `.commit()` or `.rollback()` itself; it only opens the context. Orchestration and repository code likewise never commits — keeping transaction semantics confined to one infrastructure helper.
The conflict policy lives in SQL and is identical for every adapter implementation:
The conflict policy lives in SQL and is identical for every override category. A single generic adapter, `LandlordOverridesRepository[E]`, implements it once; the target table is selected by the SQLModel `…Row` class passed at construction. Each category (property / built-form / wall / roof type) is that same adapter parameterised by its row class:
```sql
INSERT INTO landlord_property_type_overrides (portfolio_id, description, value, source)
@ -59,7 +59,7 @@ The convention going forward is:
- **Postgres adapter (concrete):** `infrastructure/postgres/<thing>_postgres_repository.py`
- **SQLModel row class:** `infrastructure/postgres/<thing>_table.py`
The new `LandlordOverrideRepository` family follows this convention.
The `LandlordOverridesRepository` adapter follows this convention: the concrete class lives at `infrastructure/postgres/landlord_overrides_postgres_repository.py`, with one `…_table.py` per category alongside it. The `…Row` classes stay one-per-table — each mirrors a genuinely distinct Drizzle table and `value` pgEnum, so they are schema mirrors, not duplicated logic.
**Existing outliers to relocate in a follow-up:**
@ -71,7 +71,7 @@ Both moves are mechanical (import-path updates only). They are intentionally out
## Out of scope (deferred to follow-up work)
- Relocating `task_postgres_repository.py` and `subtask_postgres_repository.py` into `infrastructure/postgres/` per the convention above.
- Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the two adapters' 95%-identical bodies are kept side-by-side for direct comparison.
- ~~Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the per-category adapters' 95%-identical bodies are kept side-by-side for direct comparison.~~ **Done.** The per-category adapter bodies were byte-identical (varying only in their row class), so they were consolidated into one generic `LandlordOverridesRepository[E]` parameterised by row class rather than waiting for a third column.
- Switching `applications/landlord_description_overrides/handler.py` to acquire its `Session` via a `@subtask_handler()`-style decorator instead of building its own engine.
- A cross-repo PR amending ADR-0002 to point at this ADR.
- A CI check (or codegen) that diffs the Drizzle pgEnum literals against the Python `Enum.value` strings.

View file

@ -1,42 +1,60 @@
"""Postgres adapter for ``LandlordOverrideRepository[BuiltFormType]``.
"""One Postgres adapter for every ``landlord_<category>_overrides`` table.
Writes to ``landlord_built_form_type_overrides`` (Drizzle-managed; mirrored by
``LandlordBuiltFormTypeOverrideRow``). The conflict policy lives in the SQL --
see ADR-0003 §Decision. Shape mirrors
``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
deliberate while there are only three columns -- if a fourth lands and the
duplication becomes painful, extract a shared upsert helper then.
The four override categories (property / built-form / wall / roof type) share
an identical write path: the same ``(portfolio_id, description) -> value`` row
shape and the same source-aware conflict policy (ADR-0003 §Decision fixes one
policy for all of them). The only thing that varies per category is the target
table, which is selected by the SQLModel row class handed to the constructor.
So a single generic adapter serves all four -- callers parameterise it by the
category enum ``E`` and pass the matching ``Row`` class:
LandlordOverridesRepository[PropertyType](session, LandlordPropertyTypeOverrideRow)
Per ADR-0003 §File layout, Postgres adapters live in ``infrastructure/postgres/``.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import cast
from enum import Enum
from typing import TypeVar, cast
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session
from sqlmodel import Session, SQLModel
from domain.epc.built_form_type import BuiltFormType
from infrastructure.postgres.landlord_built_form_type_override_table import (
LandlordBuiltFormTypeOverrideRow,
)
from infrastructure.postgres.landlord_override_enums import OverrideSource
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
E = TypeVar("E", bound=Enum)
class LandlordBuiltFormTypeOverridePostgresRepository(
LandlordOverrideRepository[BuiltFormType]
):
def __init__(self, session: Session) -> None:
class LandlordOverridesRepository(LandlordOverrideRepository[E]):
"""Writes classifier overrides to the ``landlord_<category>_overrides`` table
backing ``row_type``.
``row_type`` is the SQLModel mirror of the target table (e.g.
``LandlordPropertyTypeOverrideRow``); its category enum must match the type
argument ``E`` the caller binds. The pairing is a convention, not a type
constraint -- the row classes are not generic over their value enum -- so
keep the two in step at the call site.
"""
def __init__(self, session: Session, row_type: type[SQLModel]) -> None:
self._session = session
# SQLModel's class-level ``__table__`` is injected at runtime on
# ``table=True`` classes but isn't exposed by the stubs; pin it to
# ``Table`` via ``getattr`` so the dialect insert helper carries
# through with strict types.
self._table: Table = cast(Table, getattr(row_type, "__table__"))
def upsert_all(
self,
portfolio_id: int,
descriptions_to_values: dict[str, BuiltFormType],
descriptions_to_values: dict[str, E],
) -> None:
if not descriptions_to_values:
return
@ -54,14 +72,7 @@ class LandlordBuiltFormTypeOverridePostgresRepository(
for description, value in descriptions_to_values.items()
]
# SQLModel's class-level ``__table__`` is injected at runtime on
# ``table=True`` classes but isn't exposed by the stubs; pin it to
# ``Table`` via ``getattr`` so the dialect insert helper below
# carries through with strict types.
table: Table = cast(
Table, getattr(LandlordBuiltFormTypeOverrideRow, "__table__")
)
stmt = pg_insert(table).values(rows)
stmt = pg_insert(self._table).values(rows)
# The classifier may refresh its own past output, but must never
# overwrite a user correction -- the ``WHERE existing.source =
@ -73,7 +84,7 @@ class LandlordBuiltFormTypeOverridePostgresRepository(
"source": stmt.excluded.source,
"updated_at": stmt.excluded.updated_at,
},
where=table.c.source == OverrideSource.CLASSIFIER,
where=self._table.c.source == OverrideSource.CLASSIFIER,
)
# SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the

View file

@ -1,82 +0,0 @@
"""Postgres adapter for ``LandlordOverrideRepository[PropertyType]``.
Writes to ``landlord_property_type_overrides`` (Drizzle-managed; mirrored by
``LandlordPropertyTypeOverrideRow``). The conflict policy lives in the SQL --
see ADR-0003 §Decision.
Per the convention this ADR fixes, Postgres adapters live in
``infrastructure/postgres/``. The existing ``task_postgres_repository.py`` /
``subtask_postgres_repository.py`` are outliers still under ``repositories/``;
relocating them is tracked as a follow-up in ADR-0003 §"File layout".
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import cast
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session
from domain.epc.property_type import PropertyType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_property_type_override_table import (
LandlordPropertyTypeOverrideRow,
)
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
class LandlordPropertyTypeOverridePostgresRepository(
LandlordOverrideRepository[PropertyType]
):
def __init__(self, session: Session) -> None:
self._session = session
def upsert_all(
self,
portfolio_id: int,
descriptions_to_values: dict[str, PropertyType],
) -> None:
if not descriptions_to_values:
return
now = datetime.now(timezone.utc)
rows = [
{
"portfolio_id": portfolio_id,
"description": description,
"value": value.value,
"source": OverrideSource.CLASSIFIER,
"created_at": now,
"updated_at": now,
}
for description, value in descriptions_to_values.items()
]
# SQLModel's class-level ``__table__`` is injected at runtime on
# ``table=True`` classes but isn't exposed by the stubs; pin it to
# ``Table`` via ``getattr`` so the dialect insert helper below
# carries through with strict types.
table: Table = cast(Table, getattr(LandlordPropertyTypeOverrideRow, "__table__"))
stmt = pg_insert(table).values(rows)
# The classifier may refresh its own past output, but must never
# overwrite a user correction -- the ``WHERE existing.source =
# 'classifier'`` guard enforces that. See ADR-0003 §Decision.
stmt = stmt.on_conflict_do_update(
index_elements=["portfolio_id", "description"],
set_={
"value": stmt.excluded.value,
"source": stmt.excluded.source,
"updated_at": stmt.excluded.updated_at,
},
where=table.c.source == OverrideSource.CLASSIFIER,
)
# SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
# overload signatures is marked deprecated in stubs, which fires
# here even though our INSERT path is the supported one.
self._session.execute(stmt) # pyright: ignore[reportDeprecated]

View file

@ -1,80 +0,0 @@
"""Postgres adapter for ``LandlordOverrideRepository[RoofType]``.
Writes to ``landlord_roof_type_overrides`` (Drizzle-managed; mirrored by
``LandlordRoofTypeOverrideRow``). The conflict policy lives in the SQL --
see ADR-0003 §Decision. Shape mirrors
``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
deliberate while there are only a handful of override columns -- if the
duplication becomes painful, extract a shared upsert helper then.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import cast
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session
from domain.epc.roof_type import RoofType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_roof_type_override_table import (
LandlordRoofTypeOverrideRow,
)
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
class LandlordRoofTypeOverridePostgresRepository(
LandlordOverrideRepository[RoofType]
):
def __init__(self, session: Session) -> None:
self._session = session
def upsert_all(
self,
portfolio_id: int,
descriptions_to_values: dict[str, RoofType],
) -> None:
if not descriptions_to_values:
return
now = datetime.now(timezone.utc)
rows = [
{
"portfolio_id": portfolio_id,
"description": description,
"value": value.value,
"source": OverrideSource.CLASSIFIER,
"created_at": now,
"updated_at": now,
}
for description, value in descriptions_to_values.items()
]
# SQLModel's class-level ``__table__`` is injected at runtime on
# ``table=True`` classes but isn't exposed by the stubs; pin it to
# ``Table`` via ``getattr`` so the dialect insert helper below
# carries through with strict types.
table: Table = cast(Table, getattr(LandlordRoofTypeOverrideRow, "__table__"))
stmt = pg_insert(table).values(rows)
# The classifier may refresh its own past output, but must never
# overwrite a user correction -- the ``WHERE existing.source =
# 'classifier'`` guard enforces that. See ADR-0003 §Decision.
stmt = stmt.on_conflict_do_update(
index_elements=["portfolio_id", "description"],
set_={
"value": stmt.excluded.value,
"source": stmt.excluded.source,
"updated_at": stmt.excluded.updated_at,
},
where=table.c.source == OverrideSource.CLASSIFIER,
)
# SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
# overload signatures is marked deprecated in stubs, which fires
# here even though our INSERT path is the supported one.
self._session.execute(stmt) # pyright: ignore[reportDeprecated]

View file

@ -1,80 +0,0 @@
"""Postgres adapter for ``LandlordOverrideRepository[WallType]``.
Writes to ``landlord_wall_type_overrides`` (Drizzle-managed; mirrored by
``LandlordWallTypeOverrideRow``). The conflict policy lives in the SQL --
see ADR-0003 §Decision. Shape mirrors
``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
deliberate while there are only two columns -- if a third lands and the
duplication becomes painful, extract a shared upsert helper then.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import cast
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session
from domain.epc.wall_type import WallType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_wall_type_override_table import (
LandlordWallTypeOverrideRow,
)
from repositories.landlord_overrides.landlord_override_repository import (
LandlordOverrideRepository,
)
class LandlordWallTypeOverridePostgresRepository(
LandlordOverrideRepository[WallType]
):
def __init__(self, session: Session) -> None:
self._session = session
def upsert_all(
self,
portfolio_id: int,
descriptions_to_values: dict[str, WallType],
) -> None:
if not descriptions_to_values:
return
now = datetime.now(timezone.utc)
rows = [
{
"portfolio_id": portfolio_id,
"description": description,
"value": value.value,
"source": OverrideSource.CLASSIFIER,
"created_at": now,
"updated_at": now,
}
for description, value in descriptions_to_values.items()
]
# SQLModel's class-level ``__table__`` is injected at runtime on
# ``table=True`` classes but isn't exposed by the stubs; pin it to
# ``Table`` via ``getattr`` so the dialect insert helper below
# carries through with strict types.
table: Table = cast(Table, getattr(LandlordWallTypeOverrideRow, "__table__"))
stmt = pg_insert(table).values(rows)
# The classifier may refresh its own past output, but must never
# overwrite a user correction -- the ``WHERE existing.source =
# 'classifier'`` guard enforces that. See ADR-0003 §Decision.
stmt = stmt.on_conflict_do_update(
index_elements=["portfolio_id", "description"],
set_={
"value": stmt.excluded.value,
"source": stmt.excluded.source,
"updated_at": stmt.excluded.updated_at,
},
where=table.c.source == OverrideSource.CLASSIFIER,
)
# SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
# overload signatures is marked deprecated in stubs, which fires
# here even though our INSERT path is the supported one.
self._session.execute(stmt) # pyright: ignore[reportDeprecated]

View file

@ -15,8 +15,10 @@ class LandlordOverrideRepository(ABC, Generic[E]):
which table the adapter writes to; the orchestrator depends only on this
interface and never names a concrete table.
Concrete adapters live in ``infrastructure/`` (see ADR-0003): for example
``infrastructure/postgres/landlord_property_type_override_postgres_repository.py``.
A single concrete adapter,
``infrastructure/postgres/landlord_overrides_postgres_repository.LandlordOverridesRepository``,
serves every category (see ADR-0003) -- it is parameterised by the
SQLModel row class for the target table.
"""
@abstractmethod

View file

@ -0,0 +1,220 @@
"""Integration tests for the consolidated landlord-overrides upsert adapter.
``LandlordOverridesRepository`` serves every ``landlord_<category>_overrides``
table; the table is selected by the ``Row`` class passed at construction. The
source-aware conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT
... DO UPDATE ... WHERE existing.source = 'classifier'``), so the only way to
verify it correctly distinguishes ``EXCLUDED.source`` from the qualified
``<table>.source`` is against a real Postgres -- the ``db_engine`` fixture in
``tests/conftest.py`` spins one up per test.
Each scenario is parametrised across two distinct categories (property type and
wall type). Running the shared body against two different ``Row`` classes
proves the adapter writes to whichever table it was handed -- not just that one
table happens to work.
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
from typing import cast
import pytest
from sqlalchemy import Engine, Table
from sqlmodel import Session, SQLModel, select
from domain.epc.property_type import PropertyType
from domain.epc.wall_type import WallType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_overrides_postgres_repository import (
LandlordOverridesRepository,
)
from infrastructure.postgres.landlord_property_type_override_table import (
LandlordPropertyTypeOverrideRow,
)
from infrastructure.postgres.landlord_wall_type_override_table import (
LandlordWallTypeOverrideRow,
)
@dataclass(frozen=True)
class Category:
"""One ``landlord_<category>_overrides`` table plus a few of its enum values.
``first``/``second`` are two distinct classifier values for re-upsert
tests; ``user_correction`` stands in for a value only a user would pick;
``unknown`` is the category's ``UNKNOWN`` member.
"""
id: str
row_type: type[SQLModel]
first: Enum
second: Enum
user_correction: Enum
unknown: Enum
CATEGORIES = [
Category(
id="property_type",
row_type=LandlordPropertyTypeOverrideRow,
first=PropertyType.FLAT,
second=PropertyType.HOUSE,
user_correction=PropertyType.BUNGALOW,
unknown=PropertyType.UNKNOWN,
),
Category(
id="wall_type",
row_type=LandlordWallTypeOverrideRow,
first=WallType.CAVITY_FILLED,
second=WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED,
user_correction=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED,
unknown=WallType.UNKNOWN,
),
]
@pytest.fixture
def session(db_engine: Engine) -> Iterator[Session]:
with Session(db_engine) as s:
yield s
@pytest.fixture(params=CATEGORIES, ids=lambda c: c.id)
def category(request: pytest.FixtureRequest) -> Category:
return request.param
def _select_row(
session: Session, row_type: type[SQLModel], portfolio_id: int, description: str
) -> SQLModel:
# The row class is only known as ``type[SQLModel]`` here, so its column
# descriptors aren't typed; filter via the Core ``Table`` columns instead.
table = cast(Table, getattr(row_type, "__table__"))
rows = session.exec(
select(row_type).where(
table.c.portfolio_id == portfolio_id,
table.c.description == description,
)
).all()
assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
return rows[0]
def test_inserts_a_fresh_row_with_source_classifier(
session: Session, category: Category
) -> None:
# arrange
repo = LandlordOverridesRepository[Enum](session, category.row_type)
# act
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
session.commit()
# assert
row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
assert getattr(row, "value") is category.first
assert getattr(row, "source") == OverrideSource.CLASSIFIER
def test_reupsert_overwrites_a_classifier_row(
session: Session, category: Category
) -> None:
# arrange: a stale classifier row exists.
repo = LandlordOverridesRepository[Enum](session, category.row_type)
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
session.commit()
# act: re-classify with a different value.
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second})
session.commit()
# assert: the new classification wins.
row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
assert getattr(row, "value") is category.second
assert getattr(row, "source") == OverrideSource.CLASSIFIER
def test_reupsert_does_not_overwrite_a_user_row(
session: Session, category: Category
) -> None:
# arrange: a user has corrected the row. The classifier path never produces
# ``source = 'user'``; we install the row directly to mimic the override
# frontend.
user_row = category.row_type(
portfolio_id=1,
description="cosy",
value=category.user_correction,
source=OverrideSource.USER,
)
session.add(user_row)
session.commit()
# act: the classifier re-runs and tries to reclassify the same description.
# Under the source-aware conflict policy, this must be silently skipped --
# user edits beat classifier reruns.
repo = LandlordOverridesRepository[Enum](session, category.row_type)
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second})
session.commit()
# assert: the user row is unchanged.
row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
assert getattr(row, "value") is category.user_correction
assert getattr(row, "source") == OverrideSource.USER
def test_upsert_keeps_other_portfolios_descriptions_independent(
session: Session, category: Category
) -> None:
# arrange / act: the unique key is ``(portfolio_id, description)``, so the
# same description for two different portfolios must coexist as two rows.
repo = LandlordOverridesRepository[Enum](session, category.row_type)
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": category.second})
session.commit()
# assert: both rows survive with their own values.
assert (
getattr(_select_row(session, category.row_type, 1, "cosy"), "value")
is category.first
)
assert (
getattr(_select_row(session, category.row_type, 2, "cosy"), "value")
is category.second
)
def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
session: Session, category: Category
) -> None:
# arrange / act: a description the classifier couldn't resolve still lands
# -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user override can
# upgrade it to a real value.
repo = LandlordOverridesRepository[Enum](session, category.row_type)
repo.upsert_all(
portfolio_id=1,
descriptions_to_values={"unparseable nonsense": category.unknown},
)
session.commit()
# assert: the row exists with value=UNKNOWN, source=classifier.
row = _select_row(
session, category.row_type, portfolio_id=1, description="unparseable nonsense"
)
assert getattr(row, "value") is category.unknown
assert getattr(row, "source") == OverrideSource.CLASSIFIER
def test_upsert_all_with_empty_mapping_is_a_no_op(
session: Session, category: Category
) -> None:
# arrange / act
repo = LandlordOverridesRepository[Enum](session, category.row_type)
repo.upsert_all(portfolio_id=1, descriptions_to_values={})
session.commit()
# assert: nothing was inserted.
rows = session.exec(select(category.row_type)).all()
assert rows == []

View file

@ -1,147 +0,0 @@
"""Integration tests for the source-aware upsert policy.
The conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT
... DO UPDATE ... WHERE existing.source = 'classifier'``). The only way to
verify it correctly distinguishes ``EXCLUDED.source`` from the qualified
``landlord_property_type_overrides.source`` is against a real Postgres --
the ``db_engine`` fixture in ``tests/conftest.py`` spins one up per test.
"""
from __future__ import annotations
from collections.abc import Iterator
import pytest
from sqlalchemy import Engine
from sqlmodel import Session, select
from domain.epc.property_type import PropertyType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
LandlordPropertyTypeOverridePostgresRepository,
)
from infrastructure.postgres.landlord_property_type_override_table import (
LandlordPropertyTypeOverrideRow,
)
@pytest.fixture
def session(db_engine: Engine) -> Iterator[Session]:
with Session(db_engine) as s:
yield s
def _select_row(
session: Session, portfolio_id: int, description: str
) -> LandlordPropertyTypeOverrideRow:
rows = session.exec(
select(LandlordPropertyTypeOverrideRow).where(
LandlordPropertyTypeOverrideRow.portfolio_id == portfolio_id,
LandlordPropertyTypeOverrideRow.description == description,
)
).all()
assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
return rows[0]
def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None:
# arrange
repo = LandlordPropertyTypeOverridePostgresRepository(session)
# act
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
session.commit()
# assert
row = _select_row(session, portfolio_id=1, description="cosy")
assert row.value is PropertyType.HOUSE
assert row.source == OverrideSource.CLASSIFIER
def test_reupsert_overwrites_a_classifier_row(session: Session) -> None:
# arrange: a stale classifier row exists.
repo = LandlordPropertyTypeOverridePostgresRepository(session)
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.FLAT})
session.commit()
# act: re-classify with a different category.
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
session.commit()
# assert: the new classification wins.
row = _select_row(session, portfolio_id=1, description="cosy")
assert row.value is PropertyType.HOUSE
assert row.source == OverrideSource.CLASSIFIER
def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
# arrange: a user has corrected the row to ``BUNGALOW``. The classifier
# path never produces ``source = 'user'``; we install the row directly
# to mimic the override frontend.
user_row = LandlordPropertyTypeOverrideRow(
portfolio_id=1,
description="cosy",
value=PropertyType.BUNGALOW,
source=OverrideSource.USER,
)
session.add(user_row)
session.commit()
# act: the classifier re-runs and tries to classify the same description
# as a ``HOUSE``. Under the source-aware conflict policy, this must be
# silently skipped -- user edits beat classifier reruns.
repo = LandlordPropertyTypeOverridePostgresRepository(session)
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
session.commit()
# assert: the user row is unchanged.
row = _select_row(session, portfolio_id=1, description="cosy")
assert row.value is PropertyType.BUNGALOW
assert row.source == OverrideSource.USER
def test_upsert_keeps_other_portfolios_descriptions_independent(
session: Session,
) -> None:
# arrange: the unique key is ``(portfolio_id, description)``, so the same
# description for two different portfolios must coexist as two rows.
repo = LandlordPropertyTypeOverridePostgresRepository(session)
# act
repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": PropertyType.FLAT})
session.commit()
# assert: both rows survive with their own values.
assert _select_row(session, 1, "cosy").value is PropertyType.HOUSE
assert _select_row(session, 2, "cosy").value is PropertyType.FLAT
def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
session: Session,
) -> None:
# arrange / act: a description the classifier couldn't resolve still
# lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user
# override can upgrade it to a real value.
repo = LandlordPropertyTypeOverridePostgresRepository(session)
repo.upsert_all(
portfolio_id=1,
descriptions_to_values={"unparseable nonsense": PropertyType.UNKNOWN},
)
session.commit()
# assert: the row exists with value=UNKNOWN, source=classifier.
row = _select_row(session, portfolio_id=1, description="unparseable nonsense")
assert row.value is PropertyType.UNKNOWN
assert row.source == OverrideSource.CLASSIFIER
def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None:
# arrange / act
repo = LandlordPropertyTypeOverridePostgresRepository(session)
repo.upsert_all(portfolio_id=1, descriptions_to_values={})
session.commit()
# assert: nothing was inserted.
rows = session.exec(select(LandlordPropertyTypeOverrideRow)).all()
assert rows == []

View file

@ -1,158 +0,0 @@
"""Integration tests for the source-aware upsert policy on the WallType table.
Mirror of ``test_landlord_property_type_override_postgres_repository.py`` --
the SQL is structurally identical, but the conflict policy lives in two
separate concrete adapters and so warrants two parallel test suites until
(if) the adapters are factored through a shared upsert helper.
"""
from __future__ import annotations
from collections.abc import Iterator
import pytest
from sqlalchemy import Engine
from sqlmodel import Session, select
from domain.epc.wall_type import WallType
from infrastructure.postgres.landlord_override_enums import OverrideSource
from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
LandlordWallTypeOverridePostgresRepository,
)
from infrastructure.postgres.landlord_wall_type_override_table import (
LandlordWallTypeOverrideRow,
)
@pytest.fixture
def session(db_engine: Engine) -> Iterator[Session]:
with Session(db_engine) as s:
yield s
def _select_row(
session: Session, portfolio_id: int, description: str
) -> LandlordWallTypeOverrideRow:
rows = session.exec(
select(LandlordWallTypeOverrideRow).where(
LandlordWallTypeOverrideRow.portfolio_id == portfolio_id,
LandlordWallTypeOverrideRow.description == description,
)
).all()
assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
return rows[0]
def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None:
# arrange
repo = LandlordWallTypeOverridePostgresRepository(session)
# act
repo.upsert_all(
portfolio_id=1, descriptions_to_values={"cavity insulated": WallType.CAVITY_FILLED}
)
session.commit()
# assert
row = _select_row(session, portfolio_id=1, description="cavity insulated")
assert row.value is WallType.CAVITY_FILLED
assert row.source == OverrideSource.CLASSIFIER
def test_reupsert_overwrites_a_classifier_row(session: Session) -> None:
# arrange: a stale classifier row exists.
repo = LandlordWallTypeOverridePostgresRepository(session)
repo.upsert_all(
portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED}
)
session.commit()
# act: re-classify with a different category.
repo.upsert_all(
portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
)
session.commit()
# assert: the new classification wins.
row = _select_row(session, portfolio_id=1, description="old red brick")
assert row.value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
assert row.source == OverrideSource.CLASSIFIER
def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
# arrange: a user has corrected the row to ``SANDSTONE``. The classifier
# path never produces ``source = 'user'``; we install the row directly
# to mimic the override frontend.
user_row = LandlordWallTypeOverrideRow(
portfolio_id=1,
description="old red brick",
value=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED,
source=OverrideSource.USER,
)
session.add(user_row)
session.commit()
# act: the classifier re-runs and tries to classify the same description
# as ``SOLID_BRICK``. Under the source-aware conflict policy, this must
# be silently skipped -- user edits beat classifier reruns.
repo = LandlordWallTypeOverridePostgresRepository(session)
repo.upsert_all(
portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
)
session.commit()
# assert: the user row is unchanged.
row = _select_row(session, portfolio_id=1, description="old red brick")
assert row.value is WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED
assert row.source == OverrideSource.USER
def test_upsert_keeps_other_portfolios_descriptions_independent(
session: Session,
) -> None:
# arrange / act: the unique key is ``(portfolio_id, description)``, so the
# same description for two different portfolios must coexist as two rows.
repo = LandlordWallTypeOverridePostgresRepository(session)
repo.upsert_all(
portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED}
)
repo.upsert_all(
portfolio_id=2, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
)
session.commit()
# assert: both rows survive with their own values.
assert _select_row(session, 1, "old red brick").value is WallType.CAVITY_FILLED
assert _select_row(session, 2, "old red brick").value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
session: Session,
) -> None:
# arrange / act: a description the classifier couldn't resolve still
# lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user
# override can upgrade it to a real value.
repo = LandlordWallTypeOverridePostgresRepository(session)
repo.upsert_all(
portfolio_id=1,
descriptions_to_values={"unparseable wall description": WallType.UNKNOWN},
)
session.commit()
# assert: the row exists with value=UNKNOWN, source=classifier.
row = _select_row(
session, portfolio_id=1, description="unparseable wall description"
)
assert row.value is WallType.UNKNOWN
assert row.source == OverrideSource.CLASSIFIER
def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None:
# arrange / act
repo = LandlordWallTypeOverridePostgresRepository(session)
repo.upsert_all(portfolio_id=1, descriptions_to_values={})
session.commit()
# assert: nothing was inserted.
rows = session.exec(select(LandlordWallTypeOverrideRow)).all()
assert rows == []