mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
refactor(orchestration): wire stages onto the UnitOfWork; per-stage commit (#1138)
Replaces the handler's whole-pipeline Session (one transaction across all three stages, connection pinned during Ingestion's external IO) with a Unit-of-Work per stage (ADR-0012, added here). Each stage runs its batch in one unit and commits once; any property raising aborts the batch and the subtask fails noisily. - BaselineOrchestrator(unit_of_work, rebaseliner): one unit for the batch, commit once. Raise on a pre-SAP10 property leaves the unit uncommitted. - IngestionOrchestrator(unit_of_work, epc_fetcher, geospatial_repo, solar_fetcher): fetch/write split — phase 1 fetches the whole batch (EPC / coords / solar) with NO unit open; phase 2 writes in one unit and commits. The connection is never held during external IO. Geospatial S3 repo stays injected (reference data, not transactional). - Handler: module-scoped engine (pool reused across warm invocations) + a UoW factory; whole-pipeline `with Session` gone. `build_first_run_pipeline` composes on the factory. Source clients still behind the raising seam. - ADR-0012 records the decision (per-stage boundary, all-or-nothing batch, idempotent re-run, fetch/write split, module-scoped engine). Modelling stub left untouched (no-op, no DB) per the ADR. Tests: orchestrators on a shared FakeUnitOfWork (assert persisted batch + exactly-once commit + no-commit-on-raise). New real-DB E2E integration test: real PostgresUnitOfWork, Ingestion writes the EPC → Baseline reads it back through the repo → re-run replaces, not duplicates (1 EPC row, 1 baseline row after two runs). 121 pass in tests/; pyright strict clean; AAA. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
559ae1b4ec
commit
48a488d1e9
8 changed files with 423 additions and 346 deletions
|
|
@ -1,8 +1,10 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import Any, Protocol
|
from collections.abc import Callable
|
||||||
|
from typing import Any, Optional, Protocol
|
||||||
|
|
||||||
|
from sqlalchemy import Engine
|
||||||
from sqlmodel import Session
|
from sqlmodel import Session
|
||||||
|
|
||||||
from applications.ara_first_run.ara_first_run_trigger_body import (
|
from applications.ara_first_run.ara_first_run_trigger_body import (
|
||||||
|
|
@ -20,19 +22,24 @@ from orchestration.ingestion_orchestrator import (
|
||||||
)
|
)
|
||||||
from orchestration.modelling_orchestrator import ModellingOrchestrator
|
from orchestration.modelling_orchestrator import ModellingOrchestrator
|
||||||
from orchestration.task_orchestrator import TaskOrchestrator
|
from orchestration.task_orchestrator import TaskOrchestrator
|
||||||
from repositories.baseline.baseline_postgres_repository import (
|
|
||||||
BaselinePostgresRepository,
|
|
||||||
)
|
|
||||||
from repositories.epc.epc_postgres_repository import EpcPostgresRepository
|
|
||||||
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||||
from repositories.materials.materials_repository import MaterialsRepository
|
from repositories.materials.materials_repository import MaterialsRepository
|
||||||
from repositories.property.property_postgres_repository import (
|
from repositories.postgres_unit_of_work import PostgresUnitOfWork
|
||||||
PropertyPostgresRepository,
|
|
||||||
)
|
|
||||||
from repositories.scenario.scenario_repository import ScenarioRepository
|
from repositories.scenario.scenario_repository import ScenarioRepository
|
||||||
from repositories.solar.solar_postgres_repository import SolarPostgresRepository
|
from repositories.unit_of_work import UnitOfWork
|
||||||
from utilities.aws_lambda.subtask_handler import subtask_handler
|
from utilities.aws_lambda.subtask_handler import subtask_handler
|
||||||
|
|
||||||
|
# Module-scoped so the connection pool is reused across warm Lambda invocations
|
||||||
|
# rather than rebuilt per invocation (ADR-0012).
|
||||||
|
_engine: Optional[Engine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_engine() -> Engine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = make_engine(PostgresConfig.from_env(dict(os.environ)))
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
class _RunsFirstRun(Protocol):
|
class _RunsFirstRun(Protocol):
|
||||||
"""The slice of FirstRunPipeline the handler delegates to."""
|
"""The slice of FirstRunPipeline the handler delegates to."""
|
||||||
|
|
@ -44,8 +51,7 @@ def dispatch_first_run(body: dict[str, Any], *, pipeline: _RunsFirstRun) -> None
|
||||||
"""Validate the raw event body and hand the command to the pipeline.
|
"""Validate the raw event body and hand the command to the pipeline.
|
||||||
|
|
||||||
The handler's entire decision logic — kept as a named seam so it is
|
The handler's entire decision logic — kept as a named seam so it is
|
||||||
exercised without the Lambda runtime. No business logic lives here: validate,
|
exercised without the Lambda runtime. No business logic: validate, delegate.
|
||||||
then delegate (issue #1130/#1136).
|
|
||||||
"""
|
"""
|
||||||
trigger = AraFirstRunTriggerBody.model_validate(body)
|
trigger = AraFirstRunTriggerBody.model_validate(body)
|
||||||
pipeline.run(trigger)
|
pipeline.run(trigger)
|
||||||
|
|
@ -53,35 +59,28 @@ def dispatch_first_run(body: dict[str, Any], *, pipeline: _RunsFirstRun) -> None
|
||||||
|
|
||||||
def build_first_run_pipeline(
|
def build_first_run_pipeline(
|
||||||
*,
|
*,
|
||||||
session: Session,
|
unit_of_work: Callable[[], UnitOfWork],
|
||||||
epc_fetcher: EpcFetcher,
|
epc_fetcher: EpcFetcher,
|
||||||
geospatial_repo: GeospatialRepository,
|
geospatial_repo: GeospatialRepository,
|
||||||
solar_fetcher: SolarFetcher,
|
solar_fetcher: SolarFetcher,
|
||||||
) -> FirstRunPipeline:
|
) -> FirstRunPipeline:
|
||||||
"""Compose the real three-stage pipeline over Postgres-backed repos.
|
"""Compose the real three-stage pipeline on a Unit-of-Work factory.
|
||||||
|
|
||||||
The stages share the session's repos and hand off only ``property_ids``
|
Each stage opens its own unit(s) and commits per batch (ADR-0012); the
|
||||||
through them (ADR-0011). The source clients are passed in rather than built
|
handler no longer holds a session. The source clients are passed in because
|
||||||
here because their config is not settled — see ``_source_clients_from_env``.
|
their config is not settled — see ``_source_clients_from_env``. Modelling is
|
||||||
Modelling is stubbed (#1136); its Scenario / Materials ports are seams.
|
stubbed (#1136); its Scenario / Materials ports are seams.
|
||||||
"""
|
"""
|
||||||
epc_repo = EpcPostgresRepository(session)
|
|
||||||
property_repo = PropertyPostgresRepository(session, epc_repo)
|
|
||||||
solar_repo = SolarPostgresRepository(session)
|
|
||||||
baseline_repo = BaselinePostgresRepository(session)
|
|
||||||
return FirstRunPipeline(
|
return FirstRunPipeline(
|
||||||
ingestion=IngestionOrchestrator(
|
ingestion=IngestionOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=unit_of_work,
|
||||||
epc_fetcher=epc_fetcher,
|
epc_fetcher=epc_fetcher,
|
||||||
geospatial_repo=geospatial_repo,
|
geospatial_repo=geospatial_repo,
|
||||||
solar_fetcher=solar_fetcher,
|
solar_fetcher=solar_fetcher,
|
||||||
epc_repo=epc_repo,
|
|
||||||
solar_repo=solar_repo,
|
|
||||||
),
|
),
|
||||||
baseline=BaselineOrchestrator(
|
baseline=BaselineOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=unit_of_work,
|
||||||
rebaseliner=StubRebaseliner(),
|
rebaseliner=StubRebaseliner(),
|
||||||
baseline_repo=baseline_repo,
|
|
||||||
),
|
),
|
||||||
modelling=ModellingOrchestrator(
|
modelling=ModellingOrchestrator(
|
||||||
scenario_repo=ScenarioRepository(),
|
scenario_repo=ScenarioRepository(),
|
||||||
|
|
@ -108,14 +107,15 @@ def _source_clients_from_env() -> tuple[EpcFetcher, GeospatialRepository, SolarF
|
||||||
def handler(
|
def handler(
|
||||||
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
|
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
|
||||||
) -> None:
|
) -> None:
|
||||||
engine = make_engine(PostgresConfig.from_env(dict(os.environ)))
|
engine = _get_engine()
|
||||||
|
unit_of_work: Callable[[], UnitOfWork] = lambda: PostgresUnitOfWork(
|
||||||
|
lambda: Session(engine)
|
||||||
|
)
|
||||||
epc_fetcher, geospatial_repo, solar_fetcher = _source_clients_from_env()
|
epc_fetcher, geospatial_repo, solar_fetcher = _source_clients_from_env()
|
||||||
with Session(engine) as session:
|
pipeline = build_first_run_pipeline(
|
||||||
pipeline = build_first_run_pipeline(
|
unit_of_work=unit_of_work,
|
||||||
session=session,
|
epc_fetcher=epc_fetcher,
|
||||||
epc_fetcher=epc_fetcher,
|
geospatial_repo=geospatial_repo,
|
||||||
geospatial_repo=geospatial_repo,
|
solar_fetcher=solar_fetcher,
|
||||||
solar_fetcher=solar_fetcher,
|
)
|
||||||
)
|
dispatch_first_run(body, pipeline=pipeline)
|
||||||
dispatch_first_run(body, pipeline=pipeline)
|
|
||||||
session.commit()
|
|
||||||
|
|
|
||||||
31
docs/adr/0012-unit-of-work-per-stage-batch-transaction.md
Normal file
31
docs/adr/0012-unit-of-work-per-stage-batch-transaction.md
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Each stage commits its batch once, through a Unit of Work
|
||||||
|
|
||||||
|
**Status: Accepted.** Refines [ADR-0011](0011-composable-stage-orchestrators.md) (composable stage orchestrators, stages communicate through repos) with the persistence/transaction mechanics for batch processing. Decided in a `/grill-with-docs` session (2026-05-31) after the First Run spine (#1136) landed, prompted by reviewing the handler's session lifecycle.
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
A First Run trigger carries a **batch** of ~30 `property_ids`. The pipeline runs that batch through Ingestion → Baseline → Modelling. The first cut (#1136) wrapped **all three stages in one `Session` and one final `commit()`** in the handler. That has three problems:
|
||||||
|
|
||||||
|
1. **A connection is pinned for the whole long-running pipeline.** SQLAlchemy checks out a pooled connection on the first statement and holds it until commit. Ingestion is the only IO-heavy stage (per property: EPC HTTP, Google-Solar HTTP, geospatial S3), so the connection sits checked-out-but-idle across all that external IO — the RDS-Proxy/pgbouncer "transaction-pinned connection" anti-pattern.
|
||||||
|
2. **One giant transaction** for the batch: long-held locks, identity-map growth, all-or-nothing across stages.
|
||||||
|
3. **Cross-stage hand-off through an *uncommitted* transaction.** Baseline reads Ingestion's writes only because they share one open transaction — which contradicts ADR-0011/0003's "stages hand off through *persisted* state." If a stage ever moves to its own lambda, this breaks.
|
||||||
|
|
||||||
|
A tempting fix — commit per property — is **rejected**: per-property commits are a commit storm that has overloaded the database before. The unit of commit must be the **batch**, not the property.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
- **Transaction boundary = one stage = one Unit of Work = one commit.** A batch yields ~3 commits (Ingestion, Baseline, Modelling), never N. No per-property commits.
|
||||||
|
- **All-or-nothing per batch, fail noisily.** Any property failing aborts that stage's unit (rollback); the exception propagates so `@subtask_handler` marks the subtask FAILED on the task table. Operators debug and re-run the batch. There is no per-property partial success.
|
||||||
|
- **Re-runs are idempotent.** Because stages commit independently, a re-run after a mid-pipeline failure re-executes already-committed earlier stages. So each stage's batch write **replaces** the rows for the batch's `property_ids` (delete-for-these-ids then bulk insert, or upsert) inside its unit. This is also what the future re-score-on-override path needs (re-baselining overwrites, never duplicates).
|
||||||
|
- **Bulk reads, load-whole (ADR-0002).** Repos expose `get_many(property_ids) -> Properties` returning fully-hydrated aggregates, implemented as one IN-filtered query per table composed in memory — a handful of round-trips per batch, not 30 × tables. No lean stage-specific read path.
|
||||||
|
- **Ingestion splits fetch from write.** Phase 1 fetches the whole batch (EPC / coordinates / solar) over HTTP/S3 with **no DB unit open**; phase 2 opens a unit and writes the batch, committing once. The connection is therefore held only for the short batch write, never across external IO. This sharpens the Fetcher-vs-Repo taxonomy of ADR-0011: Fetchers do IO outside any unit; Repos do DB inside the committed unit.
|
||||||
|
- **Mechanism: a `UnitOfWork`.** A `UnitOfWork` port + a `PostgresUnitOfWork` adapter (built on a module-scoped engine + sessionmaker) owns the session and constructs the DB-backed repos on it (`uow.property`, `uow.epc`, `uow.solar`, `uow.baseline`). It commits on explicit `commit()` and rolls back on any exception. Orchestrators take a `unit_of_work` factory plus their **non-DB** dependencies, injected separately: the EPC/Solar fetchers, the geospatial **S3** repo (reference data — read outside the transaction), and the Rebaseliner. Baseline uses one unit for the batch; Ingestion uses two (read uprns → fetch outside any unit → write batch).
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
- The orchestrators' dependency shape changes from "individual session-bound repos" to "a `unit_of_work` factory + non-DB deps". The #1134 Ingestion and #1135 Baseline orchestrators are refactored accordingly; `FirstRunPipeline` is unchanged (it still composes the three stages and threads only `property_ids`).
|
||||||
|
- Hard to reverse once every stage depends on the UoW — hence this ADR.
|
||||||
|
- Atomicity is **stage-level**, not per-property; correctness of the re-run workflow depends on the idempotent batch writes above.
|
||||||
|
- The engine + sessionmaker move to module scope so the pool is reused across warm Lambda invocations, rather than rebuilt per invocation (the existing `default_orchestrator` has the same per-invocation smell and should follow).
|
||||||
|
- EPC writes span child tables, so the idempotent "replace for these `property_ids`" must delete child rows too (cascade) before re-insert.
|
||||||
|
- The Modelling stub is left untouched this slice — its `run` is a no-op that touches no DB, so giving it a `unit_of_work` now would be an unused dependency. It takes a unit when its scoring body is built (the per-service Modelling grills).
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
from datatypes.epc.domain.epc_property_data import (
|
from datatypes.epc.domain.epc_property_data import (
|
||||||
EpcPropertyData,
|
EpcPropertyData,
|
||||||
RenewableHeatIncentive,
|
RenewableHeatIncentive,
|
||||||
|
|
@ -7,50 +9,51 @@ from datatypes.epc.domain.epc_property_data import (
|
||||||
from domain.baseline.baseline_performance import BaselinePerformance
|
from domain.baseline.baseline_performance import BaselinePerformance
|
||||||
from domain.baseline.performance import lodged_performance
|
from domain.baseline.performance import lodged_performance
|
||||||
from domain.baseline.rebaseliner import Rebaseliner
|
from domain.baseline.rebaseliner import Rebaseliner
|
||||||
from repositories.baseline.baseline_repository import BaselineRepository
|
from repositories.unit_of_work import UnitOfWork
|
||||||
from repositories.property.property_repository import PropertyRepository
|
|
||||||
|
|
||||||
|
|
||||||
class BaselineOrchestrator:
|
class BaselineOrchestrator:
|
||||||
"""Stage 2: establish each Property's Baseline Performance and persist it.
|
"""Stage 2: establish each Property's Baseline Performance and persist it.
|
||||||
|
|
||||||
For each property: hydrate the Property aggregate via PropertyRepo, resolve
|
Runs the whole batch in **one** Unit of Work and commits once (ADR-0012):
|
||||||
its Effective EPC, read Lodged Performance off it, run the Rebaseliner to
|
for each property it hydrates the Property via the unit's PropertyRepo,
|
||||||
produce Effective Performance (equal to Lodged unless a trigger fires), and
|
resolves the Effective EPC, reads Lodged Performance off it, runs the
|
||||||
persist the pair plus the deterministic kWh.
|
Rebaseliner to produce Effective Performance, and persists the pair plus the
|
||||||
|
deterministic kWh. Any property raising aborts the batch — the unit is left
|
||||||
|
uncommitted, so nothing persists and the subtask fails noisily.
|
||||||
|
|
||||||
Reads only from repos — never a Fetcher or HTTP (ADR-0003). That is what
|
Reads only from repos — never a Fetcher or HTTP (ADR-0003) — so it is
|
||||||
makes it byte-identical whether Ingestion ran milliseconds ago (First Run)
|
byte-identical whether Ingestion ran milliseconds ago (First Run) or last
|
||||||
or last week (single-property review). The injected Rebaseliner is the
|
week. The injected Rebaseliner is the re-score-on-override seam (ADR-0011).
|
||||||
re-score-on-override seam: the future single-property flow re-runs the same
|
|
||||||
step after a Landlord Override changes the Effective EPC (ADR-0011).
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
property_repo: PropertyRepository,
|
unit_of_work: Callable[[], UnitOfWork],
|
||||||
rebaseliner: Rebaseliner,
|
rebaseliner: Rebaseliner,
|
||||||
baseline_repo: BaselineRepository,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
self._property_repo = property_repo
|
self._unit_of_work = unit_of_work
|
||||||
self._rebaseliner = rebaseliner
|
self._rebaseliner = rebaseliner
|
||||||
self._baseline_repo = baseline_repo
|
|
||||||
|
|
||||||
def run(self, property_ids: list[int]) -> None:
|
def run(self, property_ids: list[int]) -> None:
|
||||||
for property_id in property_ids:
|
with self._unit_of_work() as uow:
|
||||||
effective_epc = self._property_repo.get(property_id).effective_epc
|
for property_id in property_ids:
|
||||||
lodged = lodged_performance(effective_epc)
|
effective_epc = uow.property.get(property_id).effective_epc
|
||||||
effective, reason = self._rebaseliner.rebaseline(effective_epc, lodged)
|
lodged = lodged_performance(effective_epc)
|
||||||
rhi = _require_rhi(effective_epc)
|
effective, reason = self._rebaseliner.rebaseline(
|
||||||
baseline = BaselinePerformance(
|
effective_epc, lodged
|
||||||
lodged=lodged,
|
)
|
||||||
effective=effective,
|
rhi = _require_rhi(effective_epc)
|
||||||
rebaseline_reason=reason,
|
baseline = BaselinePerformance(
|
||||||
space_heating_kwh=rhi.space_heating_kwh,
|
lodged=lodged,
|
||||||
water_heating_kwh=rhi.water_heating_kwh,
|
effective=effective,
|
||||||
)
|
rebaseline_reason=reason,
|
||||||
self._baseline_repo.save(baseline, property_id)
|
space_heating_kwh=rhi.space_heating_kwh,
|
||||||
|
water_heating_kwh=rhi.water_heating_kwh,
|
||||||
|
)
|
||||||
|
uow.baseline.save(baseline, property_id)
|
||||||
|
uow.commit()
|
||||||
|
|
||||||
|
|
||||||
def _require_rhi(epc: EpcPropertyData) -> RenewableHeatIncentive:
|
def _require_rhi(epc: EpcPropertyData) -> RenewableHeatIncentive:
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import Any, Optional, Protocol
|
from typing import Any, Optional, Protocol
|
||||||
|
|
||||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
from repositories.epc.epc_repository import EpcRepository
|
|
||||||
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||||
from repositories.property.property_repository import PropertyRepository
|
from repositories.unit_of_work import UnitOfWork
|
||||||
from repositories.solar.solar_repository import SolarRepository
|
|
||||||
|
|
||||||
|
|
||||||
class EpcFetcher(Protocol):
|
class EpcFetcher(Protocol):
|
||||||
|
|
@ -23,50 +23,75 @@ class SolarFetcher(Protocol):
|
||||||
) -> dict[str, Any]: ...
|
) -> dict[str, Any]: ...
|
||||||
|
|
||||||
|
|
||||||
class IngestionOrchestrator:
|
@dataclass
|
||||||
"""Stage 1: acquire a Property's external source data and persist it.
|
class _Fetched:
|
||||||
|
"""One property's externally-fetched source data, awaiting the write phase."""
|
||||||
|
|
||||||
For each property: read its UPRN from the property row, fetch its EPC, resolve
|
property_id: int
|
||||||
its coordinates from the Geospatial reference Repo, thread those into the Solar
|
epc: Optional[EpcPropertyData]
|
||||||
fetcher, and persist EPC + solar via repos. The orchestrator is the only place
|
solar_insights: Optional[dict[str, Any]]
|
||||||
a Fetcher and a Repo meet, and it threads the coordinate from the Repo into the
|
|
||||||
Solar Fetcher — Fetchers never call each other (ADR-0011). Coordinates are
|
|
||||||
reference data (deterministic from UPRN), so they are resolved transiently to
|
class IngestionOrchestrator:
|
||||||
drive the Solar fetch rather than persisted per-property.
|
"""Stage 1: acquire a batch's external source data and persist it.
|
||||||
|
|
||||||
|
Runs in two phases so a DB connection is never held during external IO
|
||||||
|
(ADR-0012): **fetch** the whole batch — read each UPRN, fetch its EPC, resolve
|
||||||
|
coordinates from the Geospatial reference Repo, thread those into the Solar
|
||||||
|
fetcher — with *no unit open*; then **write** the batch in one Unit of Work
|
||||||
|
and commit once. Fetchers never call each other (ADR-0011); the orchestrator
|
||||||
|
threads the coordinate. Coordinates are reference data (deterministic from
|
||||||
|
UPRN), resolved transiently to drive the Solar fetch, never persisted.
|
||||||
|
|
||||||
|
The geospatial repo reads S3 reference data, not the transactional store, so
|
||||||
|
it is injected separately rather than taken from the unit.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
property_repo: PropertyRepository,
|
unit_of_work: Callable[[], UnitOfWork],
|
||||||
epc_fetcher: EpcFetcher,
|
epc_fetcher: EpcFetcher,
|
||||||
geospatial_repo: GeospatialRepository,
|
geospatial_repo: GeospatialRepository,
|
||||||
solar_fetcher: SolarFetcher,
|
solar_fetcher: SolarFetcher,
|
||||||
epc_repo: EpcRepository,
|
|
||||||
solar_repo: SolarRepository,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
self._property_repo = property_repo
|
self._unit_of_work = unit_of_work
|
||||||
self._epc_fetcher = epc_fetcher
|
self._epc_fetcher = epc_fetcher
|
||||||
self._geospatial_repo = geospatial_repo
|
self._geospatial_repo = geospatial_repo
|
||||||
self._solar_fetcher = solar_fetcher
|
self._solar_fetcher = solar_fetcher
|
||||||
self._epc_repo = epc_repo
|
|
||||||
self._solar_repo = solar_repo
|
|
||||||
|
|
||||||
def run(self, property_ids: list[int]) -> None:
|
def run(self, property_ids: list[int]) -> None:
|
||||||
for property_id in property_ids:
|
uprns = self._uprns_for(property_ids)
|
||||||
uprn = self._property_repo.get(property_id).identity.uprn
|
fetched = [self._fetch(property_id, uprn) for property_id, uprn in uprns]
|
||||||
if uprn is None:
|
self._persist(fetched)
|
||||||
# No UPRN to fetch against (e.g. landlord_property_id-only); a
|
|
||||||
# later Site-Notes path covers these.
|
|
||||||
continue
|
|
||||||
|
|
||||||
epc = self._epc_fetcher.get_by_uprn(uprn)
|
def _uprns_for(self, property_ids: list[int]) -> list[tuple[int, int]]:
|
||||||
if epc is not None:
|
# A short read unit; properties with no UPRN (e.g. landlord_property_id
|
||||||
self._epc_repo.save(epc, property_id=property_id)
|
# only) are skipped — a later Site-Notes path covers them.
|
||||||
|
with self._unit_of_work() as uow:
|
||||||
|
pairs: list[tuple[int, int]] = []
|
||||||
|
for property_id in property_ids:
|
||||||
|
uprn = uow.property.get(property_id).identity.uprn
|
||||||
|
if uprn is not None:
|
||||||
|
pairs.append((property_id, uprn))
|
||||||
|
return pairs
|
||||||
|
|
||||||
coordinates = self._geospatial_repo.coordinates_for(uprn)
|
def _fetch(self, property_id: int, uprn: int) -> _Fetched:
|
||||||
if coordinates is not None:
|
# No unit open here — this is the external-IO phase.
|
||||||
insights = self._solar_fetcher.get_building_insights(
|
epc = self._epc_fetcher.get_by_uprn(uprn)
|
||||||
coordinates.longitude, coordinates.latitude
|
solar_insights: Optional[dict[str, Any]] = None
|
||||||
)
|
coordinates = self._geospatial_repo.coordinates_for(uprn)
|
||||||
self._solar_repo.save(property_id, insights)
|
if coordinates is not None:
|
||||||
|
solar_insights = self._solar_fetcher.get_building_insights(
|
||||||
|
coordinates.longitude, coordinates.latitude
|
||||||
|
)
|
||||||
|
return _Fetched(property_id, epc, solar_insights)
|
||||||
|
|
||||||
|
def _persist(self, fetched: list[_Fetched]) -> None:
|
||||||
|
with self._unit_of_work() as uow:
|
||||||
|
for item in fetched:
|
||||||
|
if item.epc is not None:
|
||||||
|
uow.epc.save(item.epc, property_id=item.property_id)
|
||||||
|
if item.solar_insights is not None:
|
||||||
|
uow.solar.save(item.property_id, item.solar_insights)
|
||||||
|
uow.commit()
|
||||||
|
|
|
||||||
110
tests/orchestration/fakes.py
Normal file
110
tests/orchestration/fakes.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""In-memory fakes for orchestrator unit tests (no DB, no network).
|
||||||
|
|
||||||
|
A `FakeUnitOfWork` exposes dict-backed fake repos and records commits, so a
|
||||||
|
test can drive an orchestrator and then assert what was persisted and that the
|
||||||
|
batch committed exactly once (ADR-0012)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import TracebackType
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
|
from domain.baseline.baseline_performance import BaselinePerformance
|
||||||
|
from domain.property.property import Property
|
||||||
|
from repositories.baseline.baseline_repository import BaselineRepository
|
||||||
|
from repositories.epc.epc_repository import EpcRepository
|
||||||
|
from repositories.property.property_repository import PropertyRepository
|
||||||
|
from repositories.solar.solar_repository import SolarRepository
|
||||||
|
from repositories.unit_of_work import UnitOfWork
|
||||||
|
|
||||||
|
|
||||||
|
class FakePropertyRepo(PropertyRepository):
|
||||||
|
def __init__(self, by_id: dict[int, Property]) -> None:
|
||||||
|
self._by_id = by_id
|
||||||
|
|
||||||
|
def get(self, property_id: int) -> Property:
|
||||||
|
return self._by_id[property_id]
|
||||||
|
|
||||||
|
|
||||||
|
class FakeEpcRepo(EpcRepository):
|
||||||
|
def __init__(self, by_property: Optional[dict[int, EpcPropertyData]] = None) -> None:
|
||||||
|
self.saved: list[tuple[EpcPropertyData, Optional[int]]] = []
|
||||||
|
self._by_property = by_property or {}
|
||||||
|
|
||||||
|
def save(
|
||||||
|
self,
|
||||||
|
data: EpcPropertyData,
|
||||||
|
property_id: Optional[int] = None,
|
||||||
|
portfolio_id: Optional[int] = None,
|
||||||
|
) -> int:
|
||||||
|
self.saved.append((data, property_id))
|
||||||
|
if property_id is not None:
|
||||||
|
self._by_property[property_id] = data
|
||||||
|
return len(self.saved)
|
||||||
|
|
||||||
|
def get(self, epc_property_id: int) -> EpcPropertyData: # pragma: no cover
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_for_property(self, property_id: int) -> Optional[EpcPropertyData]:
|
||||||
|
return self._by_property.get(property_id)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeSolarRepo(SolarRepository):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.saved: list[tuple[int, dict[str, Any]]] = []
|
||||||
|
|
||||||
|
def save(self, property_id: int, insights: dict[str, Any]) -> None:
|
||||||
|
self.saved.append((property_id, insights))
|
||||||
|
|
||||||
|
def get(self, property_id: int) -> Optional[dict[str, Any]]: # pragma: no cover
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class FakeBaselineRepo(BaselineRepository):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.saved: list[tuple[BaselinePerformance, int]] = []
|
||||||
|
|
||||||
|
def save(self, baseline: BaselinePerformance, property_id: int) -> int:
|
||||||
|
self.saved.append((baseline, property_id))
|
||||||
|
return len(self.saved)
|
||||||
|
|
||||||
|
def get_for_property(
|
||||||
|
self, property_id: int
|
||||||
|
) -> Optional[BaselinePerformance]: # pragma: no cover
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class FakeUnitOfWork(UnitOfWork):
|
||||||
|
"""A unit that holds in-memory repos and counts commits."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
property: FakePropertyRepo,
|
||||||
|
epc: Optional[FakeEpcRepo] = None,
|
||||||
|
solar: Optional[FakeSolarRepo] = None,
|
||||||
|
baseline: Optional[FakeBaselineRepo] = None,
|
||||||
|
) -> None:
|
||||||
|
self.property = property
|
||||||
|
self.epc = epc or FakeEpcRepo()
|
||||||
|
self.solar = solar or FakeSolarRepo()
|
||||||
|
self.baseline = baseline or FakeBaselineRepo()
|
||||||
|
self.commits = 0
|
||||||
|
|
||||||
|
def __enter__(self) -> "FakeUnitOfWork":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: Optional[type[BaseException]],
|
||||||
|
exc: Optional[BaseException],
|
||||||
|
tb: Optional[TracebackType],
|
||||||
|
) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def commit(self) -> None:
|
||||||
|
self.commits += 1
|
||||||
|
|
||||||
|
def rollback(self) -> None:
|
||||||
|
return None
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from datatypes.epc.domain.epc import Epc
|
from datatypes.epc.domain.epc import Epc
|
||||||
|
|
@ -14,30 +12,11 @@ from domain.baseline.performance import Performance
|
||||||
from domain.baseline.rebaseliner import RebaselineNotImplemented, StubRebaseliner
|
from domain.baseline.rebaseliner import RebaselineNotImplemented, StubRebaseliner
|
||||||
from domain.property.property import Property, PropertyIdentity
|
from domain.property.property import Property, PropertyIdentity
|
||||||
from orchestration.baseline_orchestrator import BaselineOrchestrator
|
from orchestration.baseline_orchestrator import BaselineOrchestrator
|
||||||
from repositories.baseline.baseline_repository import BaselineRepository
|
from tests.orchestration.fakes import (
|
||||||
from repositories.property.property_repository import PropertyRepository
|
FakeBaselineRepo,
|
||||||
|
FakePropertyRepo,
|
||||||
|
FakeUnitOfWork,
|
||||||
class _FakePropertyRepo(PropertyRepository):
|
)
|
||||||
def __init__(self, by_id: dict[int, Property]) -> None:
|
|
||||||
self._by_id = by_id
|
|
||||||
|
|
||||||
def get(self, property_id: int) -> Property:
|
|
||||||
return self._by_id[property_id]
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeBaselineRepo(BaselineRepository):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.saved: list[tuple[BaselinePerformance, int]] = []
|
|
||||||
|
|
||||||
def save(self, baseline: BaselinePerformance, property_id: int) -> int:
|
|
||||||
self.saved.append((baseline, property_id))
|
|
||||||
return len(self.saved)
|
|
||||||
|
|
||||||
def get_for_property(
|
|
||||||
self, property_id: int
|
|
||||||
) -> Optional[BaselinePerformance]: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
def _property(*, sap_version: float) -> Property:
|
def _property(*, sap_version: float) -> Property:
|
||||||
|
|
@ -58,25 +37,22 @@ def _property(*, sap_version: float) -> Property:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _sap10_property() -> Property:
|
def test_run_establishes_persists_and_commits_the_batch_once() -> None:
|
||||||
return _property(sap_version=10.2)
|
|
||||||
|
|
||||||
|
|
||||||
def test_run_establishes_and_persists_baseline_performance() -> None:
|
|
||||||
# Arrange
|
# Arrange
|
||||||
property_repo = _FakePropertyRepo({10: _sap10_property()})
|
baseline_repo = FakeBaselineRepo()
|
||||||
baseline_repo = _FakeBaselineRepo()
|
uow = FakeUnitOfWork(
|
||||||
|
property=FakePropertyRepo({10: _property(sap_version=10.2)}),
|
||||||
|
baseline=baseline_repo,
|
||||||
|
)
|
||||||
orchestrator = BaselineOrchestrator(
|
orchestrator = BaselineOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=lambda: uow, rebaseliner=StubRebaseliner()
|
||||||
rebaseliner=StubRebaseliner(),
|
|
||||||
baseline_repo=baseline_repo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
orchestrator.run([10])
|
orchestrator.run([10])
|
||||||
|
|
||||||
# Assert — one Baseline Performance persisted for property 10, both halves
|
# Assert — one Baseline Performance persisted (both halves equal, kWh off the
|
||||||
# equal (no rebaselining), kWh read off the RHI.
|
# RHI), and the batch committed exactly once.
|
||||||
lodged = Performance(
|
lodged = Performance(
|
||||||
sap_score=72, epc_band=Epc.C, co2_emissions=1.8, primary_energy_intensity=180
|
sap_score=72, epc_band=Epc.C, co2_emissions=1.8, primary_energy_intensity=180
|
||||||
)
|
)
|
||||||
|
|
@ -92,19 +68,23 @@ def test_run_establishes_and_persists_baseline_performance() -> None:
|
||||||
10,
|
10,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
assert uow.commits == 1
|
||||||
|
|
||||||
|
|
||||||
def test_run_raises_on_a_pre_sap10_property_and_persists_nothing() -> None:
|
def test_run_raises_on_a_pre_sap10_property_and_does_not_commit() -> None:
|
||||||
# Arrange — a pre-SAP10 cert needs ML rebaselining, which is not wired yet.
|
# Arrange — a pre-SAP10 cert needs ML rebaselining, which is not wired yet.
|
||||||
property_repo = _FakePropertyRepo({10: _property(sap_version=9.94)})
|
baseline_repo = FakeBaselineRepo()
|
||||||
baseline_repo = _FakeBaselineRepo()
|
uow = FakeUnitOfWork(
|
||||||
|
property=FakePropertyRepo({10: _property(sap_version=9.94)}),
|
||||||
|
baseline=baseline_repo,
|
||||||
|
)
|
||||||
orchestrator = BaselineOrchestrator(
|
orchestrator = BaselineOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=lambda: uow, rebaseliner=StubRebaseliner()
|
||||||
rebaseliner=StubRebaseliner(),
|
|
||||||
baseline_repo=baseline_repo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act / Assert — the raise propagates; no half-baked baseline is written.
|
# Act / Assert — the raise propagates; the batch is neither persisted nor
|
||||||
|
# committed (all-or-nothing).
|
||||||
with pytest.raises(RebaselineNotImplemented):
|
with pytest.raises(RebaselineNotImplemented):
|
||||||
orchestrator.run([10])
|
orchestrator.run([10])
|
||||||
assert baseline_repo.saved == []
|
assert baseline_repo.saved == []
|
||||||
|
assert uow.commits == 0
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,43 @@
|
||||||
|
"""End-to-end through-repos integration for First Run (ADR-0012, #1138).
|
||||||
|
|
||||||
|
Real PostgresUnitOfWork over an ephemeral DB: Ingestion writes the EPC, Baseline
|
||||||
|
reads it back *through the repo* (not in memory), and a re-run replaces rather
|
||||||
|
than duplicates. Stub Modelling. The source clients are faked (no IO)."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from sqlalchemy import Engine
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
|
||||||
from datatypes.epc.domain.epc import Epc
|
from datatypes.epc.domain.epc import Epc
|
||||||
from datatypes.epc.domain.epc_property_data import (
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
EpcPropertyData,
|
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||||
RenewableHeatIncentive,
|
|
||||||
)
|
|
||||||
from domain.baseline.rebaseliner import StubRebaseliner
|
from domain.baseline.rebaseliner import StubRebaseliner
|
||||||
from domain.geospatial.coordinates import Coordinates
|
from domain.geospatial.coordinates import Coordinates
|
||||||
from domain.property.property import Property, PropertyIdentity
|
from infrastructure.postgres.baseline_performance_table import (
|
||||||
|
BaselinePerformanceModel,
|
||||||
|
)
|
||||||
|
from infrastructure.postgres.epc_property_table import EpcPropertyModel
|
||||||
|
from infrastructure.postgres.property_table import PropertyRow
|
||||||
from orchestration.baseline_orchestrator import BaselineOrchestrator
|
from orchestration.baseline_orchestrator import BaselineOrchestrator
|
||||||
from orchestration.first_run_pipeline import FirstRunPipeline
|
from orchestration.first_run_pipeline import FirstRunPipeline
|
||||||
from orchestration.ingestion_orchestrator import IngestionOrchestrator
|
from orchestration.ingestion_orchestrator import IngestionOrchestrator
|
||||||
from orchestration.modelling_orchestrator import ModellingOrchestrator
|
from orchestration.modelling_orchestrator import ModellingOrchestrator
|
||||||
from repositories.baseline.baseline_repository import BaselineRepository
|
from repositories.baseline.baseline_postgres_repository import (
|
||||||
from repositories.epc.epc_repository import EpcRepository
|
BaselinePostgresRepository,
|
||||||
|
)
|
||||||
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||||
from repositories.materials.materials_repository import MaterialsRepository
|
from repositories.materials.materials_repository import MaterialsRepository
|
||||||
from repositories.property.property_repository import PropertyRepository
|
from repositories.postgres_unit_of_work import PostgresUnitOfWork
|
||||||
from repositories.scenario.scenario_repository import ScenarioRepository
|
from repositories.scenario.scenario_repository import ScenarioRepository
|
||||||
from repositories.solar.solar_repository import SolarRepository
|
|
||||||
from domain.baseline.baseline_performance import BaselinePerformance
|
_JSON_SAMPLES = Path(__file__).resolve().parents[2] / "backend/epc_api/json_samples"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -32,48 +47,7 @@ class _FakeCommand:
|
||||||
scenario_ids: list[int]
|
scenario_ids: list[int]
|
||||||
|
|
||||||
|
|
||||||
class _SharedEpcRepo(EpcRepository):
|
class _FetcherReturning:
|
||||||
"""Stands in for the persisted EPC slice both stages talk through."""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._by_property: dict[int, EpcPropertyData] = {}
|
|
||||||
|
|
||||||
def save(
|
|
||||||
self,
|
|
||||||
data: EpcPropertyData,
|
|
||||||
property_id: Optional[int] = None,
|
|
||||||
portfolio_id: Optional[int] = None,
|
|
||||||
) -> int:
|
|
||||||
assert property_id is not None
|
|
||||||
self._by_property[property_id] = data
|
|
||||||
return property_id
|
|
||||||
|
|
||||||
def get(self, epc_property_id: int) -> EpcPropertyData: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_for_property(self, property_id: int) -> Optional[EpcPropertyData]:
|
|
||||||
return self._by_property.get(property_id)
|
|
||||||
|
|
||||||
|
|
||||||
class _RepoBackedPropertyRepo(PropertyRepository):
|
|
||||||
"""Composes the Property from its identity row + the EPC slice in the shared
|
|
||||||
EPC repo — mirroring PropertyPostgresRepository, so the stages genuinely
|
|
||||||
hand off through repo state, not in memory."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self, identities: dict[int, PropertyIdentity], epc_repo: _SharedEpcRepo
|
|
||||||
) -> None:
|
|
||||||
self._identities = identities
|
|
||||||
self._epc_repo = epc_repo
|
|
||||||
|
|
||||||
def get(self, property_id: int) -> Property:
|
|
||||||
return Property(
|
|
||||||
identity=self._identities[property_id],
|
|
||||||
epc=self._epc_repo.get_for_property(property_id),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeEpcFetcher:
|
|
||||||
def __init__(self, epc: EpcPropertyData) -> None:
|
def __init__(self, epc: EpcPropertyData) -> None:
|
||||||
self._epc = epc
|
self._epc = epc
|
||||||
|
|
||||||
|
|
@ -81,103 +55,91 @@ class _FakeEpcFetcher:
|
||||||
return self._epc
|
return self._epc
|
||||||
|
|
||||||
|
|
||||||
class _NoCoordinatesGeospatialRepo(GeospatialRepository):
|
class _NoCoordinates(GeospatialRepository):
|
||||||
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
|
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
|
||||||
return None # skip the solar leg — not under test here
|
return None # skip the solar leg — not under test here
|
||||||
|
|
||||||
|
|
||||||
class _FakeSolarFetcher:
|
class _UnusedSolarFetcher:
|
||||||
def get_building_insights(
|
def get_building_insights(
|
||||||
self, longitude: float, latitude: float
|
self, longitude: float, latitude: float
|
||||||
) -> dict[str, Any]: # pragma: no cover
|
) -> dict[str, Any]: # pragma: no cover
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class _FakeSolarRepo(SolarRepository):
|
def _lodged_epc() -> EpcPropertyData:
|
||||||
def save(self, property_id: int, insights: dict[str, Any]) -> None: # pragma: no cover
|
# A real, persistable EPC (so it round-trips through the EPC repo), with the
|
||||||
return None
|
# recorded-performance fields the sample leaves blank filled in so Baseline
|
||||||
|
# can read its Lodged Performance.
|
||||||
def get(self, property_id: int) -> Optional[dict[str, Any]]: # pragma: no cover
|
raw: dict[str, Any] = json.loads(
|
||||||
raise NotImplementedError
|
(_JSON_SAMPLES / "RdSAP-Schema-21.0.0" / "epc.json").read_text()
|
||||||
|
)
|
||||||
|
epc = EpcPropertyDataMapper.from_api_response(raw)
|
||||||
class _CollectingBaselineRepo(BaselineRepository):
|
return dataclasses.replace(
|
||||||
def __init__(self) -> None:
|
epc,
|
||||||
self.saved: list[tuple[BaselinePerformance, int]] = []
|
energy_rating_current=72,
|
||||||
|
current_energy_efficiency_band=Epc.C,
|
||||||
def save(self, baseline: BaselinePerformance, property_id: int) -> int:
|
co2_emissions_current=1.8,
|
||||||
self.saved.append((baseline, property_id))
|
energy_consumption_current=180,
|
||||||
return len(self.saved)
|
|
||||||
|
|
||||||
def get_for_property(
|
|
||||||
self, property_id: int
|
|
||||||
) -> Optional[BaselinePerformance]: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeScenarioRepo(ScenarioRepository):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeMaterialsRepo(MaterialsRepository):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def _ingestible_epc() -> EpcPropertyData:
|
|
||||||
epc = object.__new__(EpcPropertyData)
|
|
||||||
epc.energy_rating_current = 72
|
|
||||||
epc.current_energy_efficiency_band = Epc.C
|
|
||||||
epc.co2_emissions_current = 1.8
|
|
||||||
epc.energy_consumption_current = 180
|
|
||||||
epc.sap_version = 10.2
|
|
||||||
epc.renewable_heat_incentive = RenewableHeatIncentive(
|
|
||||||
space_heating_kwh=5000.0, water_heating_kwh=2000.0
|
|
||||||
)
|
)
|
||||||
return epc
|
|
||||||
|
|
||||||
|
|
||||||
def test_baseline_reads_the_epc_ingestion_persisted_through_repos() -> None:
|
def test_first_run_baselines_through_repos_and_is_idempotent_on_rerun(
|
||||||
# Arrange — one property; the EPC the fetcher returns is what Ingestion
|
db_engine: Engine,
|
||||||
# persists and Baseline must then read back through the shared repo.
|
) -> None:
|
||||||
epc = _ingestible_epc()
|
# Arrange — a property row to ingest against, and the EPC its fetcher returns.
|
||||||
epc_repo = _SharedEpcRepo()
|
with Session(db_engine) as session:
|
||||||
identities = {
|
session.add(
|
||||||
10: PropertyIdentity(
|
PropertyRow(
|
||||||
portfolio_id=1, postcode="A0 0AA", address="1 Some Street", uprn=123
|
id=10,
|
||||||
|
portfolio_id=1,
|
||||||
|
postcode="A0 0AA",
|
||||||
|
address="1 Some Street",
|
||||||
|
uprn=12345,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
}
|
session.commit()
|
||||||
property_repo = _RepoBackedPropertyRepo(identities, epc_repo)
|
|
||||||
baseline_repo = _CollectingBaselineRepo()
|
def unit_of_work() -> PostgresUnitOfWork:
|
||||||
|
return PostgresUnitOfWork(lambda: Session(db_engine))
|
||||||
|
|
||||||
pipeline = FirstRunPipeline(
|
pipeline = FirstRunPipeline(
|
||||||
ingestion=IngestionOrchestrator(
|
ingestion=IngestionOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=unit_of_work,
|
||||||
epc_fetcher=_FakeEpcFetcher(epc),
|
epc_fetcher=_FetcherReturning(_lodged_epc()),
|
||||||
geospatial_repo=_NoCoordinatesGeospatialRepo(),
|
geospatial_repo=_NoCoordinates(),
|
||||||
solar_fetcher=_FakeSolarFetcher(),
|
solar_fetcher=_UnusedSolarFetcher(),
|
||||||
epc_repo=epc_repo,
|
|
||||||
solar_repo=_FakeSolarRepo(),
|
|
||||||
),
|
),
|
||||||
baseline=BaselineOrchestrator(
|
baseline=BaselineOrchestrator(
|
||||||
property_repo=property_repo,
|
unit_of_work=unit_of_work, rebaseliner=StubRebaseliner()
|
||||||
rebaseliner=StubRebaseliner(),
|
|
||||||
baseline_repo=baseline_repo,
|
|
||||||
),
|
),
|
||||||
modelling=ModellingOrchestrator(
|
modelling=ModellingOrchestrator(
|
||||||
scenario_repo=_FakeScenarioRepo(),
|
scenario_repo=ScenarioRepository(),
|
||||||
materials_repo=_FakeMaterialsRepo(),
|
materials_repo=MaterialsRepository(),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
command = _FakeCommand(portfolio_id=1, property_ids=[10], scenario_ids=[7])
|
||||||
|
|
||||||
# Act
|
# Act — First Run, then a re-run over the same batch.
|
||||||
pipeline.run(_FakeCommand(portfolio_id=1, property_ids=[10], scenario_ids=[7]))
|
pipeline.run(command)
|
||||||
|
pipeline.run(command)
|
||||||
|
|
||||||
# Assert — a Baseline Performance landed for property 10, its Lodged half
|
# Assert — Baseline read the EPC Ingestion persisted (through the repo, only
|
||||||
# read off the very EPC Ingestion persisted. Only property_ids crossed the
|
# property_ids crossed the stage boundary), and the re-run replaced rather
|
||||||
# stage boundary; the EPC itself travelled through the repo.
|
# than duplicated either row.
|
||||||
assert len(baseline_repo.saved) == 1
|
with Session(db_engine) as session:
|
||||||
baseline, property_id = baseline_repo.saved[0]
|
baseline = BaselinePostgresRepository(session).get_for_property(10)
|
||||||
assert property_id == 10
|
epc_rows = session.exec(
|
||||||
|
select(EpcPropertyModel).where(EpcPropertyModel.property_id == 10)
|
||||||
|
).all()
|
||||||
|
baseline_rows = session.exec(
|
||||||
|
select(BaselinePerformanceModel).where(
|
||||||
|
BaselinePerformanceModel.property_id == 10
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
assert baseline is not None
|
||||||
assert baseline.lodged.sap_score == 72
|
assert baseline.lodged.sap_score == 72
|
||||||
assert baseline.lodged.epc_band == Epc.C
|
assert baseline.space_heating_kwh == 13120.0
|
||||||
assert baseline.space_heating_kwh == 5000.0
|
assert len(epc_rows) == 1
|
||||||
|
assert len(baseline_rows) == 1
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,5 @@
|
||||||
"""IngestionOrchestrator wires fetchers + repos with no real IO (ADR-0011).
|
"""IngestionOrchestrator fetches the batch (no DB unit open), then writes it in
|
||||||
|
one Unit of Work and commits once (ADR-0012). Tested against fakes — no IO."""
|
||||||
Tested entirely against fakes: it must fetch EPC + solar, thread the
|
|
||||||
Geospatial-resolved coordinates into the solar fetcher, and persist via repos.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -12,18 +9,13 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
from domain.geospatial.coordinates import Coordinates
|
from domain.geospatial.coordinates import Coordinates
|
||||||
from domain.property.property import Property, PropertyIdentity
|
from domain.property.property import Property, PropertyIdentity
|
||||||
from orchestration.ingestion_orchestrator import IngestionOrchestrator
|
from orchestration.ingestion_orchestrator import IngestionOrchestrator
|
||||||
from repositories.epc.epc_repository import EpcRepository
|
|
||||||
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||||
from repositories.property.property_repository import PropertyRepository
|
from tests.orchestration.fakes import (
|
||||||
from repositories.solar.solar_repository import SolarRepository
|
FakeEpcRepo,
|
||||||
|
FakePropertyRepo,
|
||||||
|
FakeSolarRepo,
|
||||||
class _FakePropertyRepo(PropertyRepository):
|
FakeUnitOfWork,
|
||||||
def __init__(self, by_id: dict[int, Property]) -> None:
|
)
|
||||||
self._by_id = by_id
|
|
||||||
|
|
||||||
def get(self, property_id: int) -> Property:
|
|
||||||
return self._by_id[property_id]
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeEpcFetcher:
|
class _FakeEpcFetcher:
|
||||||
|
|
@ -56,39 +48,6 @@ class _FakeSolarFetcher:
|
||||||
return self.insights
|
return self.insights
|
||||||
|
|
||||||
|
|
||||||
class _FakeEpcRepo(EpcRepository):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.saved: list[tuple[EpcPropertyData, Optional[int]]] = []
|
|
||||||
|
|
||||||
def save(
|
|
||||||
self,
|
|
||||||
data: EpcPropertyData,
|
|
||||||
property_id: Optional[int] = None,
|
|
||||||
portfolio_id: Optional[int] = None,
|
|
||||||
) -> int:
|
|
||||||
self.saved.append((data, property_id))
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def get(self, epc_property_id: int) -> EpcPropertyData: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_for_property(
|
|
||||||
self, property_id: int
|
|
||||||
) -> Optional[EpcPropertyData]: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeSolarRepo(SolarRepository):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.saved: list[tuple[int, dict[str, Any]]] = []
|
|
||||||
|
|
||||||
def save(self, property_id: int, insights: dict[str, Any]) -> None:
|
|
||||||
self.saved.append((property_id, insights))
|
|
||||||
|
|
||||||
def get(self, property_id: int) -> Optional[dict[str, Any]]: # pragma: no cover
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
def _property(uprn: Optional[int]) -> Property:
|
def _property(uprn: Optional[int]) -> Property:
|
||||||
return Property(
|
return Property(
|
||||||
identity=PropertyIdentity(
|
identity=PropertyIdentity(
|
||||||
|
|
@ -97,55 +56,59 @@ def _property(uprn: Optional[int]) -> Property:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _epc() -> EpcPropertyData:
|
|
||||||
# A bare placeholder is enough — the orchestrator treats the EPC opaquely.
|
|
||||||
return object.__new__(EpcPropertyData)
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingestion_persists_epc_and_threads_coords_into_solar() -> None:
|
def test_ingestion_persists_epc_and_threads_coords_into_solar() -> None:
|
||||||
# Arrange
|
# Arrange
|
||||||
epc = _epc()
|
epc = object.__new__(EpcPropertyData)
|
||||||
insights = {"name": "buildings/X"}
|
insights = {"name": "buildings/X"}
|
||||||
coords = Coordinates(longitude=-0.1278, latitude=51.5074)
|
epc_repo = FakeEpcRepo()
|
||||||
epc_repo = _FakeEpcRepo()
|
solar_repo = FakeSolarRepo()
|
||||||
solar_repo = _FakeSolarRepo()
|
|
||||||
solar_fetcher = _FakeSolarFetcher(insights)
|
solar_fetcher = _FakeSolarFetcher(insights)
|
||||||
|
uow = FakeUnitOfWork(
|
||||||
|
property=FakePropertyRepo({10: _property(uprn=12345)}),
|
||||||
|
epc=epc_repo,
|
||||||
|
solar=solar_repo,
|
||||||
|
)
|
||||||
orchestrator = IngestionOrchestrator(
|
orchestrator = IngestionOrchestrator(
|
||||||
property_repo=_FakePropertyRepo({10: _property(uprn=12345)}),
|
unit_of_work=lambda: uow,
|
||||||
epc_fetcher=_FakeEpcFetcher(epc),
|
epc_fetcher=_FakeEpcFetcher(epc),
|
||||||
geospatial_repo=_FakeGeospatialRepo(coords),
|
geospatial_repo=_FakeGeospatialRepo(
|
||||||
|
Coordinates(longitude=-0.1278, latitude=51.5074)
|
||||||
|
),
|
||||||
solar_fetcher=solar_fetcher,
|
solar_fetcher=solar_fetcher,
|
||||||
epc_repo=epc_repo,
|
|
||||||
solar_repo=solar_repo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
orchestrator.run([10])
|
orchestrator.run([10])
|
||||||
|
|
||||||
# Assert
|
# Assert — EPC persisted, coords threaded from the repo into the solar
|
||||||
|
# fetcher, solar persisted, batch committed once.
|
||||||
assert epc_repo.saved == [(epc, 10)]
|
assert epc_repo.saved == [(epc, 10)]
|
||||||
assert solar_fetcher.calls == [(-0.1278, 51.5074)] # coords threaded from repo
|
assert solar_fetcher.calls == [(-0.1278, 51.5074)]
|
||||||
assert solar_repo.saved == [(10, insights)]
|
assert solar_repo.saved == [(10, insights)]
|
||||||
|
assert uow.commits == 1
|
||||||
|
|
||||||
|
|
||||||
def test_ingestion_skips_property_without_uprn() -> None:
|
def test_ingestion_skips_property_without_uprn() -> None:
|
||||||
# Arrange
|
# Arrange
|
||||||
epc_repo = _FakeEpcRepo()
|
epc_repo = FakeEpcRepo()
|
||||||
solar_repo = _FakeSolarRepo()
|
solar_repo = FakeSolarRepo()
|
||||||
solar_fetcher = _FakeSolarFetcher({})
|
solar_fetcher = _FakeSolarFetcher({})
|
||||||
|
uow = FakeUnitOfWork(
|
||||||
|
property=FakePropertyRepo({10: _property(uprn=None)}),
|
||||||
|
epc=epc_repo,
|
||||||
|
solar=solar_repo,
|
||||||
|
)
|
||||||
orchestrator = IngestionOrchestrator(
|
orchestrator = IngestionOrchestrator(
|
||||||
property_repo=_FakePropertyRepo({10: _property(uprn=None)}),
|
unit_of_work=lambda: uow,
|
||||||
epc_fetcher=_FakeEpcFetcher(_epc()),
|
epc_fetcher=_FakeEpcFetcher(object.__new__(EpcPropertyData)),
|
||||||
geospatial_repo=_FakeGeospatialRepo(None),
|
geospatial_repo=_FakeGeospatialRepo(None),
|
||||||
solar_fetcher=solar_fetcher,
|
solar_fetcher=solar_fetcher,
|
||||||
epc_repo=epc_repo,
|
|
||||||
solar_repo=solar_repo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
orchestrator.run([10])
|
orchestrator.run([10])
|
||||||
|
|
||||||
# Assert — nothing fetched or persisted for a UPRN-less property
|
# Assert — nothing fetched or persisted for a UPRN-less property.
|
||||||
assert epc_repo.saved == []
|
assert epc_repo.saved == []
|
||||||
assert solar_repo.saved == []
|
assert solar_repo.saved == []
|
||||||
assert solar_fetcher.calls == []
|
assert solar_fetcher.calls == []
|
||||||
|
|
@ -153,17 +116,20 @@ def test_ingestion_skips_property_without_uprn() -> None:
|
||||||
|
|
||||||
def test_ingestion_persists_epc_but_skips_solar_when_no_coordinates() -> None:
|
def test_ingestion_persists_epc_but_skips_solar_when_no_coordinates() -> None:
|
||||||
# Arrange
|
# Arrange
|
||||||
epc = _epc()
|
epc = object.__new__(EpcPropertyData)
|
||||||
epc_repo = _FakeEpcRepo()
|
epc_repo = FakeEpcRepo()
|
||||||
solar_repo = _FakeSolarRepo()
|
solar_repo = FakeSolarRepo()
|
||||||
solar_fetcher = _FakeSolarFetcher({})
|
solar_fetcher = _FakeSolarFetcher({})
|
||||||
|
uow = FakeUnitOfWork(
|
||||||
|
property=FakePropertyRepo({10: _property(uprn=12345)}),
|
||||||
|
epc=epc_repo,
|
||||||
|
solar=solar_repo,
|
||||||
|
)
|
||||||
orchestrator = IngestionOrchestrator(
|
orchestrator = IngestionOrchestrator(
|
||||||
property_repo=_FakePropertyRepo({10: _property(uprn=12345)}),
|
unit_of_work=lambda: uow,
|
||||||
epc_fetcher=_FakeEpcFetcher(epc),
|
epc_fetcher=_FakeEpcFetcher(epc),
|
||||||
geospatial_repo=_FakeGeospatialRepo(None),
|
geospatial_repo=_FakeGeospatialRepo(None),
|
||||||
solar_fetcher=solar_fetcher,
|
solar_fetcher=solar_fetcher,
|
||||||
epc_repo=epc_repo,
|
|
||||||
solar_repo=solar_repo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
|
@ -171,5 +137,5 @@ def test_ingestion_persists_epc_but_skips_solar_when_no_coordinates() -> None:
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert epc_repo.saved == [(epc, 10)]
|
assert epc_repo.saved == [(epc, 10)]
|
||||||
assert solar_fetcher.calls == []
|
|
||||||
assert solar_repo.saved == []
|
assert solar_repo.saved == []
|
||||||
|
assert solar_fetcher.calls == []
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue