From f66e2cb0205719e058bd832867b6def729b11c7a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jun 2026 04:13:30 +0000 Subject: [PATCH] docs(epc-prediction): module README + end-to-end showcase test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit README at domain/epc_prediction/README.md — the flow diagram, where each piece lives, links to the ADRs/CONTEXT/handover/migration note, and a runnable test command. The team's entry point. tests/e2e/test_epc_prediction_e2e.py — the whole gap-fill flow against the REAL Postgres Unit of Work + EPC/Property repositories + EpcComparablePropertiesRepository + EpcPrediction, with only the three external HTTP clients faked (EPC API, geospatial S3, Solar). Proves: EPC-less Property → Ingestion predicts from its postcode cohort → persists to the predicted slot → reloaded Property resolves effective_epc via source_path == "predicted". The canonical "see it in action". Co-Authored-By: Claude Opus 4.8 --- domain/epc_prediction/README.md | 69 +++++++++++ tests/e2e/__init__.py | 0 tests/e2e/test_epc_prediction_e2e.py | 177 +++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 domain/epc_prediction/README.md create mode 100644 tests/e2e/__init__.py create mode 100644 tests/e2e/test_epc_prediction_e2e.py diff --git a/domain/epc_prediction/README.md b/domain/epc_prediction/README.md new file mode 100644 index 00000000..794785be --- /dev/null +++ b/domain/epc_prediction/README.md @@ -0,0 +1,69 @@ +# EPC Prediction + +Predict a structured `EpcPropertyData` for an **EPC-less** UK home from its +postcode neighbours, so it flows through the rest of the pipeline (Baseline, Bill +Derivation, Modelling) exactly like a home that has an EPC. It is **deterministic +neighbour synthesis** — cohort modes + a coherent template + per-component +weighting — **not ML**. ~30% of UK homes (typically long-tenure) have no EPC. + +- **Design**: [ADR-0029](../../docs/adr/0029-epc-prediction-from-comparable-properties.md) (algorithm), + [ADR-0030](../../docs/adr/0030-epc-prediction-validation-is-sap-version-aware-and-component-first.md) (validation), + [ADR-0031](../../docs/adr/0031-epc-prediction-production-wiring.md) (production wiring). +- **Glossary**: see *EPC Prediction*, *Comparable Properties*, *Component + Accuracy*, *EPC Anomaly Flag* in [CONTEXT.md](../../CONTEXT.md). + +## The flow (gap-fill) + +``` +Ingestion: a Property has no lodged EPC (epc_fetcher.get_by_uprn → None) + │ + ├─ resolve its attributes (property_type/built_form/wall) from Landlord Overrides + │ └─ property_type unknown? → GATED OUT, not predicted (no national defaults) + ├─ build a PredictionTarget (postcode + coordinates + attributes) + ├─ ComparableProperties repo: fetch the postcode cohort (search → per-cert → coords) + ├─ select_comparables(): filter to the reference cohort (type-hard, built-form-soft) + ├─ EpcPrediction.predict(): synthesise the picture (modes + template + donor + weights) + └─ persist to the Property's PREDICTED slot (source = "predicted") + │ +Modelling/Baseline: Property.effective_epc returns the predicted picture + (source_path == "predicted"), scored like any other Effective EPC. +``` + +A lodged EPC always wins — prediction is last-resort gap-fill. + +## Where the pieces live + +| Concern | File | +|---|---| +| Synthesis (modes, template, heating donor, geo/recency/similarity weights) | `epc_prediction.py` | +| Cohort selection (filter-then-relax ladder) | `comparable_properties.py` | +| Target assembly + eligibility gate | `prediction_target.py` | +| Cohort IO port + EPC-API/geospatial adapter | `repositories/comparable_properties/` | +| Predicted-EPC persistence (`source` discriminator) | `repositories/epc/` | +| `predicted` source path on the aggregate | `domain/property/property.py` | +| Ingestion wiring (gate → predict → persist) | `orchestration/ingestion_orchestrator.py` | +| Validation (leave-one-out, component-first) + ratcheting gate | `validation.py`, `tests/domain/epc_prediction/test_component_accuracy_gate.py` | + +## See it run + +`tests/e2e/test_epc_prediction_e2e.py` — the whole flow against the real DB + +repos, only the external HTTP clients faked. Start there. + +## Status + +Algorithm + validation: **built**. Production gap-fill wiring: **built behind +seams** (slices 5a–5e). Two things finish it — a DB migration and the +`property_overrides` read adapter — see +[the wiring handover](../../docs/HANDOVER_EPC_PREDICTION_WIRING.md) and +[the migration note](../../docs/MIGRATION_NOTE_predicted_epc_source.md). +**EPC Anomaly Flags** (predict for *every* home, compare to lodged) is the +designed next step the storage already supports. + +## Run the tests + +```bash +PYTHONPATH=. python -m pytest tests/e2e/test_epc_prediction_e2e.py \ + tests/domain/epc_prediction tests/orchestration/test_ingestion_prediction.py \ + tests/repositories/comparable_properties tests/repositories/epc/test_epc_predicted_slot.py \ + -o addopts="" -q +``` diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e/test_epc_prediction_e2e.py b/tests/e2e/test_epc_prediction_e2e.py new file mode 100644 index 00000000..671ad117 --- /dev/null +++ b/tests/e2e/test_epc_prediction_e2e.py @@ -0,0 +1,177 @@ +"""END-TO-END showcase: an EPC-less Property flows through Ingestion, gets a +predicted EPC synthesised from its postcode cohort, is persisted to the predicted +slot, and comes back out of the Property repository resolving as the Effective +EPC (ADR-0031). + +This is the full production path with ONLY the external HTTP clients faked (the +EPC API, the geospatial S3 reader, the Solar API) — everything else is the real +thing: the real Postgres Unit of Work, the real EPC + Property repositories +against the test database, the real `EpcComparablePropertiesRepository`, and the +real `EpcPrediction`. It is the canonical "see the whole flow" reference; the +narrower unit tests live in: + - tests/orchestration/test_ingestion_prediction.py (orchestrator: gate / persist) + - tests/repositories/epc/test_epc_predicted_slot.py (the lodged|predicted slot) + - tests/domain/property/test_property.py (the "predicted" source path) + - tests/domain/epc_prediction/test_prediction_target.py (the eligibility gate) +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Optional + +from sqlalchemy import Engine +from sqlmodel import Session + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.search.epc_search_result import EpcSearchResult +from domain.epc_prediction.epc_prediction import EpcPrediction +from domain.epc_prediction.prediction_target import PredictionTargetAttributes +from domain.geospatial.coordinates import Coordinates +from domain.geospatial.planning_restrictions import PlanningRestrictions +from domain.geospatial.spatial_reference import SpatialReference +from domain.property.property import Property +from infrastructure.postgres.property_table import PropertyRow +from orchestration.ingestion_orchestrator import IngestionOrchestrator +from repositories.comparable_properties.epc_comparable_properties_repository import ( + EpcComparablePropertiesRepository, +) +from repositories.epc.epc_postgres_repository import EpcPostgresRepository +from repositories.geospatial.geospatial_repository import GeospatialRepository +from repositories.postgres_unit_of_work import PostgresUnitOfWork +from repositories.property.property_postgres_repository import ( + PropertyPostgresRepository, +) +from repositories.spatial.spatial_postgres_repository import SpatialPostgresRepository + +_JSON_SAMPLES = Path(__file__).resolve().parents[2] / "backend/epc_api/json_samples" +_POSTCODE = "LS6 1AA" + + +def _epc() -> EpcPropertyData: + raw: dict[str, Any] = json.loads( + (_JSON_SAMPLES / "RdSAP-Schema-21.0.0" / "epc.json").read_text() + ) + return EpcPropertyDataMapper.from_api_response(raw) + + +# --- fakes for the THREE external HTTP boundaries (everything else is real) ---- + + +class _FakeCohortEpcClient: + """Stands in for the live EPC API: the postcode's lodged certs + their data.""" + + def __init__(self, results: list[EpcSearchResult]) -> None: + self._results = results + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + return self._results + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: + return _epc() + + +class _FakeGeospatialRepo(GeospatialRepository): + """Stands in for the S3 Open-UPRN reader: UPRN → coordinates.""" + + def __init__(self, coords: dict[int, Coordinates]) -> None: + self._coords = coords + + def coordinates_for(self, uprn: int) -> Optional[Coordinates]: + return self._coords.get(uprn) + + def spatial_for(self, uprn: int) -> Optional[SpatialReference]: + coordinates = self._coords.get(uprn) + if coordinates is None: + return None + return SpatialReference( + coordinates=coordinates, restrictions=PlanningRestrictions() + ) + + +class _NoEpcFetcher: + """The target Property is EPC-less — the EPC API finds nothing for its UPRN.""" + + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + return None + + +class _NoSolarFetcher: + def get_building_insights( + self, longitude: float, latitude: float + ) -> dict[str, Any]: + return {} + + +class _FakeAttributesReader: + """Stands in for Jun-te's property_overrides read adapter: the landlord-known + property type (here a House, code "0", matching the cohort).""" + + def attributes_for(self, property_id: int) -> PredictionTargetAttributes: + return PredictionTargetAttributes(property_type="0", built_form="2") + + +def _cohort_results() -> list[EpcSearchResult]: + return [ + EpcSearchResult( + certificate_number=f"CERT-{i}", + address_line_1=f"{i} Neighbour Road", + address_line_2=None, + address_line_3=None, + address_line_4=None, + postcode=_POSTCODE, + post_town="LEEDS", + uprn=20000 + i, + current_energy_efficiency_band="D", + registration_date=f"2023-0{i + 1}-01", + ) + for i in range(3) + ] + + +def test_epc_less_property_is_predicted_persisted_and_resolved_end_to_end( + db_engine: Engine, +) -> None: + # Arrange — an EPC-less Property exists in the database (postcode + UPRN known, + # no EPC lodged), plus its postcode cohort behind the faked EPC API. + with Session(db_engine) as session: + row = PropertyRow( + portfolio_id=1, postcode=_POSTCODE, address="1 Target Street", uprn=10000 + ) + session.add(row) + session.commit() + property_id = row.id + assert property_id is not None + + cohort_coords = {20000 + i: Coordinates(longitude=-1.55, latitude=53.81) for i in range(3)} + comparables_repo = EpcComparablePropertiesRepository( + _FakeCohortEpcClient(_cohort_results()), _FakeGeospatialRepo(cohort_coords) + ) + orchestrator = IngestionOrchestrator( + unit_of_work=lambda: PostgresUnitOfWork(lambda: Session(db_engine)), + epc_fetcher=_NoEpcFetcher(), + geospatial_repo=_FakeGeospatialRepo({10000: Coordinates(longitude=-1.55, latitude=53.81)}), + solar_fetcher=_NoSolarFetcher(), + comparables_repo=comparables_repo, + prediction_attributes_reader=_FakeAttributesReader(), + epc_prediction=EpcPrediction(), + ) + + # Act — run Ingestion: no lodged EPC found → predict from the cohort → persist. + orchestrator.run([property_id]) + + # Assert — reloading the Property through the real repository, its Effective + # EPC is the predicted picture, flagged by the "predicted" source path. + with Session(db_engine) as session: + epc_repo = EpcPostgresRepository(session) + prop: Property = PropertyPostgresRepository( + session, epc_repo, SpatialPostgresRepository(session) + ).get(property_id) + + assert prop.epc is None # no lodged EPC + assert prop.predicted_epc is not None # a predicted one was persisted + assert prop.source_path == "predicted" + assert prop.effective_epc is prop.predicted_epc + assert prop.effective_epc.property_type == "0"