mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added roofs
This commit is contained in:
parent
8422041215
commit
36f4c32904
8 changed files with 378 additions and 31 deletions
|
|
@ -11,7 +11,11 @@ from applications.landlord_description_overrides.landlord_description_overrides_
|
|||
from domain.addresses.unstandardised_address import AddressList
|
||||
from domain.landlord_description_overrides.built_form_type import BuiltFormType
|
||||
from domain.landlord_description_overrides.property_type import PropertyType
|
||||
from domain.landlord_description_overrides.roof_type import RoofType
|
||||
from domain.landlord_description_overrides.wall_type import WallType
|
||||
from domain.landlord_description_overrides.wall_type_construction_dates import (
|
||||
wall_type_construction_date_prompt_hint,
|
||||
)
|
||||
from infrastructure.chatgpt.chatgpt import ChatGPT
|
||||
from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
|
||||
from infrastructure.postgres.config import PostgresConfig
|
||||
|
|
@ -22,6 +26,9 @@ from infrastructure.postgres.landlord_built_form_type_override_postgres_reposito
|
|||
from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
|
||||
LandlordPropertyTypeOverridePostgresRepository,
|
||||
)
|
||||
from infrastructure.postgres.landlord_roof_type_override_postgres_repository import (
|
||||
LandlordRoofTypeOverridePostgresRepository,
|
||||
)
|
||||
from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
|
||||
LandlordWallTypeOverridePostgresRepository,
|
||||
)
|
||||
|
|
@ -98,10 +105,21 @@ def handler(
|
|||
name="wall_type",
|
||||
source_column="Walls",
|
||||
classifier=ChatGptColumnClassifier(
|
||||
chat_gpt, WallType, WallType.UNKNOWN
|
||||
chat_gpt,
|
||||
WallType,
|
||||
WallType.UNKNOWN,
|
||||
extra_instructions=wall_type_construction_date_prompt_hint(),
|
||||
),
|
||||
repo=LandlordWallTypeOverridePostgresRepository(session),
|
||||
),
|
||||
ClassifiableColumn(
|
||||
name="roof_type",
|
||||
source_column="Roofs",
|
||||
classifier=ChatGptColumnClassifier(
|
||||
chat_gpt, RoofType, RoofType.UNKNOWN
|
||||
),
|
||||
repo=LandlordRoofTypeOverridePostgresRepository(session),
|
||||
),
|
||||
]
|
||||
|
||||
orchestrator = LandlordDescriptionOverridesOrchestrator(
|
||||
|
|
|
|||
|
|
@ -13,40 +13,83 @@ class WallType(Enum):
|
|||
"""
|
||||
|
||||
CAVITY_FILLED = "Cavity wall, filled cavity"
|
||||
CAVITY_AS_BUILT_INSULATED_ASSUMED = "Cavity wall, as built, insulated (assumed)"
|
||||
CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = "Cavity wall, as built, no insulation (assumed)"
|
||||
CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Cavity wall, as built, partial insulation (assumed)"
|
||||
CAVITY_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"Cavity wall, as built, insulated (assumed)" # 1983 - 1990
|
||||
)
|
||||
CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Cavity wall, as built, no insulation (assumed)" # Pre-1975
|
||||
)
|
||||
|
||||
CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"Cavity wall, as built, partial insulation (assumed)" # 1976 - 1982
|
||||
)
|
||||
CAVITY_WITH_INTERNAL_INSULATION = "Cavity wall, with internal insulation"
|
||||
CAVITY_WITH_EXTERNAL_INSULATION = "Cavity wall, with external insulation"
|
||||
CAVITY_FILLED_AND_INTERNAL_INSULATION = "Cavity wall, filled cavity and internal insulation"
|
||||
CAVITY_FILLED_AND_EXTERNAL_INSULATION = "Cavity wall, filled cavity and external insulation"
|
||||
CAVITY_FILLED_AND_INTERNAL_INSULATION = (
|
||||
"Cavity wall, filled cavity and internal insulation"
|
||||
)
|
||||
CAVITY_FILLED_AND_EXTERNAL_INSULATION = (
|
||||
"Cavity wall, filled cavity and external insulation"
|
||||
)
|
||||
|
||||
SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = "Solid brick, as built, no insulation (assumed)"
|
||||
SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = "Solid brick, as built, insulated (assumed)"
|
||||
SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Solid brick, as built, partial insulation (assumed)"
|
||||
SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Solid brick, as built, no insulation (assumed)"
|
||||
)
|
||||
SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"Solid brick, as built, insulated (assumed)"
|
||||
)
|
||||
SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"Solid brick, as built, partial insulation (assumed)"
|
||||
)
|
||||
SOLID_BRICK_WITH_INTERNAL_INSULATION = "Solid brick, with internal insulation"
|
||||
SOLID_BRICK_WITH_EXTERNAL_INSULATION = "Solid brick, with external insulation"
|
||||
|
||||
TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = "Timber frame, as built, no insulation (assumed)"
|
||||
TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = "Timber frame, as built, insulated (assumed)"
|
||||
TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Timber frame, as built, partial insulation (assumed)"
|
||||
TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Timber frame, as built, no insulation (assumed)"
|
||||
)
|
||||
TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"Timber frame, as built, insulated (assumed)"
|
||||
)
|
||||
TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"Timber frame, as built, partial insulation (assumed)"
|
||||
)
|
||||
TIMBER_FRAME_WITH_ADDITIONAL_INSULATION = "Timber frame, with additional insulation"
|
||||
|
||||
SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = "Sandstone, as built, no insulation (assumed)"
|
||||
SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Sandstone, as built, no insulation (assumed)"
|
||||
)
|
||||
SANDSTONE_AS_BUILT_INSULATED_ASSUMED = "Sandstone, as built, insulated (assumed)"
|
||||
SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Sandstone, as built, partial insulation (assumed)"
|
||||
SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"Sandstone, as built, partial insulation (assumed)"
|
||||
)
|
||||
SANDSTONE_WITH_INTERNAL_INSULATION = "Sandstone, with internal insulation"
|
||||
SANDSTONE_WITH_EXTERNAL_INSULATION = "Sandstone, with external insulation"
|
||||
|
||||
GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = "Granite or whin, as built, no insulation (assumed)"
|
||||
GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = "Granite or whin, as built, insulated (assumed)"
|
||||
GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Granite or whin, as built, partial insulation (assumed)"
|
||||
GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = "Granite or whin, with internal insulation"
|
||||
GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = "Granite or whin, with external insulation"
|
||||
GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Granite or whin, as built, no insulation (assumed)"
|
||||
)
|
||||
GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"Granite or whin, as built, insulated (assumed)"
|
||||
)
|
||||
GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"Granite or whin, as built, partial insulation (assumed)"
|
||||
)
|
||||
GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = (
|
||||
"Granite or whin, with internal insulation"
|
||||
)
|
||||
GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = (
|
||||
"Granite or whin, with external insulation"
|
||||
)
|
||||
|
||||
SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = "System built, as built, no insulation (assumed)"
|
||||
SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = "System built, as built, insulated (assumed)"
|
||||
SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "System built, as built, partial insulation (assumed)"
|
||||
SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"System built, as built, no insulation (assumed)"
|
||||
)
|
||||
SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"System built, as built, insulated (assumed)"
|
||||
)
|
||||
SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
|
||||
"System built, as built, partial insulation (assumed)"
|
||||
)
|
||||
SYSTEM_BUILT_WITH_INTERNAL_INSULATION = "System built, with internal insulation"
|
||||
SYSTEM_BUILT_WITH_EXTERNAL_INSULATION = "System built, with external insulation"
|
||||
|
||||
|
|
@ -59,8 +102,12 @@ class WallType(Enum):
|
|||
COB_WITH_EXTERNAL_INSULATION = "Cob, with external insulation"
|
||||
|
||||
CURTAIN_WALL = "Curtain wall"
|
||||
CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = "Curtain Wall, as built, no insulation (assumed)"
|
||||
CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = "Curtain Wall, as built, insulated (assumed)"
|
||||
CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = (
|
||||
"Curtain Wall, as built, no insulation (assumed)"
|
||||
)
|
||||
CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = (
|
||||
"Curtain Wall, as built, insulated (assumed)"
|
||||
)
|
||||
CURTAIN_WALL_FILLED = "Curtain Wall, filled cavity"
|
||||
CURTAIN_WALL_WITH_INTERNAL_INSULATION = "Curtain Wall, with internal insulation"
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
"""Construction-date metadata for the "assumed" ``WallType`` variants.
|
||||
|
||||
The ``(assumed)`` variants of ``WallType`` are what RdSAP picks when a
|
||||
surveyor has no direct observation and falls back to the typical wall make-up
|
||||
for a property's build era. The era boundaries reflect UK Building
|
||||
Regulations milestones for cavity-wall insulation:
|
||||
|
||||
* up to 1975 -- no cavity insulation requirement
|
||||
* 1976-1982 -- partial-fill cavity (early insulation requirement)
|
||||
* 1983-1990 -- full-fill cavity (insulation required)
|
||||
|
||||
Captured here as a structured lookup so:
|
||||
|
||||
* the LLM prompt builder can render the ranges as a hint, helping the
|
||||
classifier resolve era-implying landlord descriptions to the right
|
||||
``(assumed)`` variant;
|
||||
* future date-aware paths (a deterministic year-to-variant shortcut, a
|
||||
date-keyed repo) can read from the same source instead of duplicating
|
||||
the knowledge.
|
||||
|
||||
Only the variants where we have a defensible era boundary appear here; the
|
||||
remaining ``(assumed)`` members are left out rather than guessed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Mapping, Optional
|
||||
|
||||
from domain.landlord_description_overrides.wall_type import WallType
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class YearRange:
|
||||
"""An inclusive year range. ``None`` on either end means "no bound"."""
|
||||
|
||||
start: Optional[int] = None
|
||||
end: Optional[int] = None
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.start is None and self.end is not None:
|
||||
return f"pre-{self.end + 1}"
|
||||
if self.start is not None and self.end is None:
|
||||
return f"{self.start}+"
|
||||
return f"{self.start}-{self.end}"
|
||||
|
||||
|
||||
WALL_TYPE_CONSTRUCTION_YEARS: Mapping[WallType, YearRange] = {
|
||||
WallType.CAVITY_AS_BUILT_NO_INSULATION_ASSUMED: YearRange(end=1975),
|
||||
WallType.CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED: YearRange(
|
||||
start=1976, end=1982
|
||||
),
|
||||
WallType.CAVITY_AS_BUILT_INSULATED_ASSUMED: YearRange(start=1983, end=1990),
|
||||
}
|
||||
|
||||
|
||||
def wall_type_construction_date_prompt_hint() -> str:
|
||||
"""Render the date metadata as a prompt fragment for the LLM classifier.
|
||||
|
||||
The fragment lists each (variant, year range) pair so the model can
|
||||
prefer the era-matching ``(assumed)`` variant when a landlord
|
||||
description carries era information (e.g. "1970s semi", "built before
|
||||
the war").
|
||||
"""
|
||||
lines = [
|
||||
f"- {wall_type.value!r}: typically built {year_range}"
|
||||
for wall_type, year_range in WALL_TYPE_CONSTRUCTION_YEARS.items()
|
||||
]
|
||||
return (
|
||||
"When the description carries construction-era information, prefer "
|
||||
"the category whose typical build year matches:\n" + "\n".join(lines)
|
||||
)
|
||||
|
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import json
|
||||
from enum import Enum
|
||||
from typing import Any, TypeVar
|
||||
from typing import Any, Optional, TypeVar
|
||||
|
||||
from domain.landlord_description_overrides.column_classifier import (
|
||||
ClassificationError,
|
||||
|
|
@ -27,10 +27,16 @@ class ChatGptColumnClassifier(ColumnClassifier[E]):
|
|||
chat_gpt: ChatGPT,
|
||||
category_enum: type[E],
|
||||
unknown: E,
|
||||
extra_instructions: Optional[str] = None,
|
||||
) -> None:
|
||||
self._chat_gpt = chat_gpt
|
||||
self._category_enum = category_enum
|
||||
self._unknown = unknown
|
||||
# Free-form column-specific guidance appended to the system prompt
|
||||
# ahead of the JSON-output instruction. Lets each column ship its
|
||||
# own hints (e.g. wall-type construction-era ranges) without the
|
||||
# generic classifier knowing what they are.
|
||||
self._extra_instructions = extra_instructions
|
||||
|
||||
def classify(self, descriptions: set[str]) -> dict[str, E]:
|
||||
if not descriptions:
|
||||
|
|
@ -62,12 +68,17 @@ class ChatGptColumnClassifier(ColumnClassifier[E]):
|
|||
for member in self._category_enum
|
||||
if member is not self._unknown
|
||||
)
|
||||
return (
|
||||
"Classify each free-text description into exactly one category. "
|
||||
f"Categories: {categories}. "
|
||||
parts = [
|
||||
"Classify each free-text description into exactly one category. ",
|
||||
f"Categories: {categories}. ",
|
||||
]
|
||||
if self._extra_instructions:
|
||||
parts.append(self._extra_instructions + " ")
|
||||
parts.append(
|
||||
"Reply with only a JSON object mapping each original description "
|
||||
"to its category, and nothing else."
|
||||
)
|
||||
return "".join(parts)
|
||||
|
||||
def _to_category(self, value: Any) -> E:
|
||||
"""Map a reply value to a category member, defaulting to UNKNOWN."""
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
"""Postgres adapter for ``LandlordOverrideRepository[RoofType]``.
|
||||
|
||||
Writes to ``landlord_roof_type_overrides`` (Drizzle-managed; mirrored by
|
||||
``LandlordRoofTypeOverrideRow``). The conflict policy lives in the SQL --
|
||||
see ADR-0003 §Decision. Shape mirrors
|
||||
``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
|
||||
deliberate while there are only a handful of override columns -- if the
|
||||
duplication becomes painful, extract a shared upsert helper then.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import cast
|
||||
|
||||
from sqlalchemy import Table
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlmodel import Session
|
||||
|
||||
from domain.landlord_description_overrides.roof_type import RoofType
|
||||
from infrastructure.postgres.landlord_override_enums import OverrideSource
|
||||
from infrastructure.postgres.landlord_roof_type_override_table import (
|
||||
LandlordRoofTypeOverrideRow,
|
||||
)
|
||||
from repositories.landlord_overrides.landlord_override_repository import (
|
||||
LandlordOverrideRepository,
|
||||
)
|
||||
|
||||
|
||||
class LandlordRoofTypeOverridePostgresRepository(
|
||||
LandlordOverrideRepository[RoofType]
|
||||
):
|
||||
def __init__(self, session: Session) -> None:
|
||||
self._session = session
|
||||
|
||||
def upsert_all(
|
||||
self,
|
||||
portfolio_id: int,
|
||||
descriptions_to_values: dict[str, RoofType],
|
||||
) -> None:
|
||||
if not descriptions_to_values:
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
rows = [
|
||||
{
|
||||
"portfolio_id": portfolio_id,
|
||||
"description": description,
|
||||
"value": value.value,
|
||||
"source": OverrideSource.CLASSIFIER,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
for description, value in descriptions_to_values.items()
|
||||
]
|
||||
|
||||
# SQLModel's class-level ``__table__`` is injected at runtime on
|
||||
# ``table=True`` classes but isn't exposed by the stubs; pin it to
|
||||
# ``Table`` via ``getattr`` so the dialect insert helper below
|
||||
# carries through with strict types.
|
||||
table: Table = cast(Table, getattr(LandlordRoofTypeOverrideRow, "__table__"))
|
||||
stmt = pg_insert(table).values(rows)
|
||||
|
||||
# The classifier may refresh its own past output, but must never
|
||||
# overwrite a user correction -- the ``WHERE existing.source =
|
||||
# 'classifier'`` guard enforces that. See ADR-0003 §Decision.
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=["portfolio_id", "description"],
|
||||
set_={
|
||||
"value": stmt.excluded.value,
|
||||
"source": stmt.excluded.source,
|
||||
"updated_at": stmt.excluded.updated_at,
|
||||
},
|
||||
where=table.c.source == OverrideSource.CLASSIFIER,
|
||||
)
|
||||
|
||||
# SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
|
||||
# overload signatures is marked deprecated in stubs, which fires
|
||||
# here even though our INSERT path is the supported one.
|
||||
self._session.execute(stmt) # pyright: ignore[reportDeprecated]
|
||||
69
infrastructure/postgres/landlord_roof_type_override_table.py
Normal file
69
infrastructure/postgres/landlord_roof_type_override_table.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
"""SQLModel mirror of the ``landlord_roof_type_overrides`` Drizzle table.
|
||||
|
||||
The schema source of truth lives in the ``assessment-model`` TS repo
|
||||
(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
|
||||
this row class only mirrors the columns so the Python lambda can read/write.
|
||||
See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
|
||||
differences are the table name, the ``roof_type`` pgEnum on ``value``, and
|
||||
the unique-constraint name.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import ClassVar
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from sqlalchemy import BigInteger, Column, UniqueConstraint
|
||||
from sqlalchemy import Enum as SAEnum
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from domain.landlord_description_overrides.roof_type import RoofType
|
||||
from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
|
||||
|
||||
|
||||
class LandlordRoofTypeOverrideRow(SQLModel, table=True):
|
||||
__tablename__: ClassVar[str] = "landlord_roof_type_overrides" # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, ...]] = ( # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
UniqueConstraint(
|
||||
"portfolio_id",
|
||||
"description",
|
||||
name="landlord_roof_type_overrides_portfolio_description_unique",
|
||||
),
|
||||
)
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
|
||||
# bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
|
||||
# mapping is 32-bit Integer and would overflow once portfolio IDs exceed
|
||||
# 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
|
||||
# not declared here -- the ``portfolio`` table is not modelled in Python.
|
||||
portfolio_id: int = Field(
|
||||
sa_column=Column(BigInteger, nullable=False, index=True),
|
||||
)
|
||||
|
||||
description: str = Field(nullable=False)
|
||||
|
||||
value: RoofType = Field(
|
||||
sa_column=Column(
|
||||
SAEnum(
|
||||
RoofType,
|
||||
name="roof_type",
|
||||
values_callable=lambda cls: [m.value for m in cls], # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
|
||||
),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Shared SAEnum -- see ``landlord_override_enums`` for why this single
|
||||
# instance is reused by every ``landlord_*_overrides`` row class.
|
||||
source: str = Field(
|
||||
sa_column=Column(override_source_sa_enum, nullable=False),
|
||||
)
|
||||
|
||||
created_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc),
|
||||
nullable=False,
|
||||
)
|
||||
updated_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc),
|
||||
nullable=False,
|
||||
)
|
||||
|
|
@ -46,7 +46,7 @@ def main() -> int:
|
|||
print(f" - {c}")
|
||||
return 0
|
||||
|
||||
column = "roof_description"
|
||||
column = "wall "
|
||||
series = df[column] if args.keep_na else df[column].dropna()
|
||||
for value in series.unique():
|
||||
print(value)
|
||||
|
|
|
|||
|
|
@ -23,11 +23,13 @@ class _FakeChatGPT(ChatGPT):
|
|||
error: Optional[Exception] = None,
|
||||
) -> None:
|
||||
self.prompts: list[str] = []
|
||||
self.system_prompts: list[Optional[str]] = []
|
||||
self._reply = reply
|
||||
self._error = error
|
||||
|
||||
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||
self.prompts.append(prompt)
|
||||
self.system_prompts.append(system_prompt)
|
||||
if self._error is not None:
|
||||
raise self._error
|
||||
return self._reply
|
||||
|
|
@ -125,11 +127,59 @@ def test_empty_description_set_returns_empty_without_calling_chatgpt() -> None:
|
|||
|
||||
def test_classifies_with_a_different_category_enum() -> None:
|
||||
# Arrange: the same adapter classifies a WallType column.
|
||||
chat_gpt = _FakeChatGPT(reply='{"solid brick wall": "Solid Brick"}')
|
||||
chat_gpt = _FakeChatGPT(
|
||||
reply='{"solid brick wall": "Solid brick, as built, no insulation (assumed)"}'
|
||||
)
|
||||
classifier = ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN)
|
||||
|
||||
# Act
|
||||
result = classifier.classify({"solid brick wall"})
|
||||
|
||||
# Assert
|
||||
assert result == {"solid brick wall": WallType.SOLID_BRICK}
|
||||
assert result == {
|
||||
"solid brick wall": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
|
||||
}
|
||||
|
||||
|
||||
def test_extra_instructions_are_appended_to_the_system_prompt() -> None:
|
||||
# Arrange: column-specific guidance (e.g. wall-type build-era hints)
|
||||
# should reach the model verbatim, in the system prompt ahead of the
|
||||
# JSON-output instruction.
|
||||
chat_gpt = _FakeChatGPT(reply='{"1970s semi": "House"}')
|
||||
classifier = ChatGptColumnClassifier(
|
||||
chat_gpt,
|
||||
PropertyType,
|
||||
PropertyType.UNKNOWN,
|
||||
extra_instructions="If the description carries a build decade, prefer X.",
|
||||
)
|
||||
|
||||
# Act
|
||||
classifier.classify({"1970s semi"})
|
||||
|
||||
# Assert: the hint sits in the system prompt, before the JSON instruction.
|
||||
system_prompt = chat_gpt.system_prompts[0]
|
||||
assert system_prompt is not None
|
||||
assert "If the description carries a build decade, prefer X." in system_prompt
|
||||
hint_index = system_prompt.index("If the description carries a build decade")
|
||||
json_index = system_prompt.index("Reply with only a JSON object")
|
||||
assert hint_index < json_index
|
||||
|
||||
|
||||
def test_omitting_extra_instructions_leaves_the_system_prompt_unchanged() -> None:
|
||||
# Arrange: a classifier without per-column guidance must still produce
|
||||
# the original system prompt -- no trailing whitespace, no orphan hint.
|
||||
chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
|
||||
classifier = ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
|
||||
|
||||
# Act
|
||||
classifier.classify({"semi-detached"})
|
||||
|
||||
# Assert
|
||||
system_prompt = chat_gpt.system_prompts[0]
|
||||
assert system_prompt is not None
|
||||
assert system_prompt == (
|
||||
"Classify each free-text description into exactly one category. "
|
||||
"Categories: House, Bungalow, Flat, Maisonette, Park home. "
|
||||
"Reply with only a JSON object mapping each original description "
|
||||
"to its category, and nothing else."
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue