diff --git a/packages/README.md b/packages/README.md new file mode 100644 index 00000000..0911a1d3 --- /dev/null +++ b/packages/README.md @@ -0,0 +1,16 @@ +# Shared packages + +Workspace packages consumed by `services/*`. Each package is its own Python distribution with its own `pyproject.toml`; services import via the workspace dependency mechanism (`{ workspace = true }`). + +| Package | Purpose | +|---------|---------| +| [`domain/`](./domain/) | Shared domain types — `Property`, `BaselinePerformance`, `Plan`, `Scenario`, `EpcPropertyData`, etc. No persistence, no IO, no business logic. | +| [`repos/`](./repos/) | Persistence layer — one repo per aggregate. Owns the SQL. Depends on `domain`. | +| [`fetchers/`](./fetchers/) | External API clients (gov EPC, Ofgem, Google Solar, etc.). Depend on `domain` for response shapes. | +| [`utils/`](./utils/) | Cross-cutting infra — logging, S3, CloudWatch URL builders, SQS task helpers. | + +## Adding a new shared package + +Only when a real second consumer materialises. Don't pre-shatter (`repos-epc`, `repos-property`, ...) — split when a deployment needs to drop a dep, not before. + +See [`../ara_backend_design.md`](../ara_backend_design.md) §11 for the broader monorepo layout and [`../CONTEXT.md`](../CONTEXT.md) for the domain glossary that names the types living in `domain/`. diff --git a/packages/domain/README.md b/packages/domain/README.md new file mode 100644 index 00000000..6dc69d41 --- /dev/null +++ b/packages/domain/README.md @@ -0,0 +1,30 @@ +# domna-domain + +Shared domain types — `Property`, `Properties`, `BaselinePerformance`, `Plan`, `PlanPhase`, `Scenario`, `ScenarioPhase`, `ScenarioSnapshot`, `Recommendation`, `OptimisedPackage`, `EpcPropertyData`, etc. + +**Boundary**: types only. No persistence, no IO, no business logic. Other packages and services depend on `domna-domain`; this package depends on nothing internal. + +Domain definitions live in [`../../CONTEXT.md`](../../CONTEXT.md). New types added here must match the glossary terms. + +## Layout + +``` +src/domain/ +├── __init__.py +├── property.py # Property, Properties, PropertyIdentity +├── site_notes.py +├── landlord_overrides.py +├── baseline_performance.py # lodged + effective pair (ADR-0004) +├── plan.py # Plan, PlanPhase, OptimisedPackage +├── scenario.py # Scenario, ScenarioPhase, ScenarioSnapshot (ADR-0005) +├── recommendation.py +├── geospatial.py +├── solar.py +├── anomaly_flags.py +└── ml/ + ├── __init__.py + ├── transform.py # EpcMlTransform (versioned per §8.3) + └── schema.py +``` + +When `datatypes/epc/domain/` folds in, the EPC schema types move under `src/domain/epc/`. diff --git a/packages/domain/pyproject.toml b/packages/domain/pyproject.toml new file mode 100644 index 00000000..5e820371 --- /dev/null +++ b/packages/domain/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "domna-domain" +version = "0.1.0" +description = "Shared domain types for the Ara modelling pipeline and sibling Domna services." +requires-python = ">=3.11" +dependencies = [] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/domain"] diff --git a/packages/domain/src/domain/__init__.py b/packages/domain/src/domain/__init__.py new file mode 100644 index 00000000..1d52198c --- /dev/null +++ b/packages/domain/src/domain/__init__.py @@ -0,0 +1,4 @@ +"""Shared domain types for the Ara modelling pipeline and sibling Domna services. + +No persistence, no IO, no business logic. See README.md for layout. +""" diff --git a/packages/fetchers/README.md b/packages/fetchers/README.md new file mode 100644 index 00000000..ebe47f74 --- /dev/null +++ b/packages/fetchers/README.md @@ -0,0 +1,19 @@ +# domna-fetchers + +External API clients. Each fetcher is responsible for one external system — `EpcClientService` for the gov EPC API, `GeospatialFetcher` for Ordnance Survey, `SolarFetcher` for Google Solar, `FuelRatesFetcher` for Ofgem, `CarbonFactorsFetcher` for Defra. + +**Boundary**: makes HTTP calls + returns raw or lightly-mapped responses. No DB, no business logic. Modelling services never depend on fetchers — only orchestrators do (per [ADR-0003](../../docs/adr/0003-strict-ingestion-modelling-separation.md)). + +## Layout + +``` +src/fetchers/ +├── __init__.py +├── epc_client.py # wraps backend/epc_client/ +├── geospatial.py +├── solar.py +├── fuel_rates_fetcher.py +└── carbon_factors_fetcher.py +``` + +`backend/epc_client/` will fold into `epc_client.py` during the migration; until then this module re-exports from the legacy location. diff --git a/packages/fetchers/pyproject.toml b/packages/fetchers/pyproject.toml new file mode 100644 index 00000000..69404681 --- /dev/null +++ b/packages/fetchers/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "domna-fetchers" +version = "0.1.0" +description = "External API clients — gov EPC, Ofgem, Google Solar, Defra, etc." +requires-python = ">=3.11" +dependencies = [ + "domna-domain", + "httpx>=0.27", +] + +[tool.uv.sources] +domna-domain = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/fetchers"] diff --git a/packages/fetchers/src/fetchers/__init__.py b/packages/fetchers/src/fetchers/__init__.py new file mode 100644 index 00000000..74646907 --- /dev/null +++ b/packages/fetchers/src/fetchers/__init__.py @@ -0,0 +1,4 @@ +"""External API clients for Ara and sibling services. + +One fetcher per external system. No DB, no business logic. See README.md. +""" diff --git a/packages/repos/README.md b/packages/repos/README.md new file mode 100644 index 00000000..990b66db --- /dev/null +++ b/packages/repos/README.md @@ -0,0 +1,27 @@ +# domna-repos + +Persistence layer. One repo per aggregate; owns the SQL for its tables. Callers see only domain objects from `domna-domain`. + +**Boundary**: depends on `domna-domain` for types. No external IO except the DB. No business logic — services do that. + +## Repos (per [PRD §7.3](../../ara_backend_design.md)) + +``` +src/repos/ +├── __init__.py +├── unit_of_work.py +├── property_repo.py +├── epc_cache_repo.py +├── site_notes_repo.py +├── landlord_overrides_repo.py +├── recommendations_repo.py +├── generic_data_repo.py +├── fuel_rates_repo.py +├── carbon_factors_repo.py +├── heating_system_assumptions_repo.py +└── subtask_repo.py +``` + +Each repo has a `Fake*Repo` companion in its service's test tree (typically `services/ara/tests/fakes/`) — dict-backed, no DB. + +DDL migrations are scoped to sub-PRD (iii); during Phase 0 repos may delegate into the legacy `backend/app/db/db_funcs.*` modules. diff --git a/packages/repos/pyproject.toml b/packages/repos/pyproject.toml new file mode 100644 index 00000000..53689812 --- /dev/null +++ b/packages/repos/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "domna-repos" +version = "0.1.0" +description = "Persistence layer — one repo per aggregate. Owns the SQL." +requires-python = ">=3.11" +dependencies = [ + "domna-domain", + "sqlalchemy>=2.0", +] + +[tool.uv.sources] +domna-domain = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/repos"] diff --git a/packages/repos/src/repos/__init__.py b/packages/repos/src/repos/__init__.py new file mode 100644 index 00000000..a981395a --- /dev/null +++ b/packages/repos/src/repos/__init__.py @@ -0,0 +1,4 @@ +"""Persistence layer for the Ara domain aggregates. + +One repo per aggregate. Owns SQL; exposes domain objects. See README.md. +""" diff --git a/packages/utils/README.md b/packages/utils/README.md new file mode 100644 index 00000000..1fba6457 --- /dev/null +++ b/packages/utils/README.md @@ -0,0 +1,15 @@ +# domna-utils + +Cross-cutting infrastructure helpers. Nothing domain-specific — anything in here should be portable across services. + +## Will live here (migrating from `utils/` and `backend/utils/`) + +- Logging — `logger.py` +- S3 — `s3.py` +- Pandas helpers — `pandas_utils.py` +- CloudWatch URL builder — `cloudwatch.py` +- SQS subtask helpers — `subtasks.py` + +## Will NOT live here + +Service-specific parsers (Osmosis condition report, full-SAP parser, SharePoint integration) move into the service that owns them, not here. diff --git a/packages/utils/pyproject.toml b/packages/utils/pyproject.toml new file mode 100644 index 00000000..cf739bbd --- /dev/null +++ b/packages/utils/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "domna-utils" +version = "0.1.0" +description = "Cross-cutting infrastructure helpers — logging, S3, CloudWatch, SQS tasks." +requires-python = ">=3.11" +dependencies = [ + "boto3>=1.34", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/utils"] diff --git a/packages/utils/src/utils/__init__.py b/packages/utils/src/utils/__init__.py new file mode 100644 index 00000000..d010a3be --- /dev/null +++ b/packages/utils/src/utils/__init__.py @@ -0,0 +1,4 @@ +"""Cross-cutting infrastructure helpers — logging, S3, CloudWatch, SQS tasks. + +Nothing domain-specific belongs here. See README.md. +""" diff --git a/pyproject.toml b/pyproject.toml index 49108861..75aabc82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1 +1,13 @@ [tool.pyright] + +# uv workspace root. +# Each workspace member has its own pyproject.toml under packages// or services//. +# Run `uv sync` at the root to install everything; `uv sync --package ` for one. +[tool.uv.workspace] +members = [ + "packages/domain", + "packages/repos", + "packages/fetchers", + "packages/utils", + "services/ara", +] diff --git a/services/README.md b/services/README.md new file mode 100644 index 00000000..c82ef6a4 --- /dev/null +++ b/services/README.md @@ -0,0 +1,13 @@ +# Services + +Each subdirectory is a deployable unit — typically a Lambda image. Own `pyproject.toml`, own `Dockerfile`, own deps. Lambda bundle contains only that service's deps + its workspace deps. + +| Service | Purpose | +|---------|---------| +| [`ara/`](./ara/) | The Domna retrofit modelling backend — ingestion + modelling pipelines, all 9 services in [PRD §9.2](../ara_backend_design.md). | + +Other Domna services (address2uprn, hubspot, pashub, ecmk, magicplan) live in the legacy `backend/` and `etl/` trees for now; they are slated to migrate here as their owners pick them up — see [PRD §11](../ara_backend_design.md). When that work starts, scaffold the service under `services//` and add it to the workspace members in the root `pyproject.toml`. + +## Service boundary + +A service can `import domain.*`, `import repos.*`, `import fetchers.*`, `import utils.*` (workspace deps). It **cannot** import another service's modules — they are separate distributions with no cross-import path. This is the structural enforcement of the modelling/ingestion separation ([ADR-0003](../docs/adr/0003-strict-ingestion-modelling-separation.md)). diff --git a/services/ara/Dockerfile b/services/ara/Dockerfile new file mode 100644 index 00000000..c45d6bc1 --- /dev/null +++ b/services/ara/Dockerfile @@ -0,0 +1,12 @@ +# Lambda image for the Ara modelling backend. +# +# This is a scaffold — final image will install only ara + its workspace deps +# (domna-domain, domna-repos, domna-fetchers, domna-utils) plus ML/data libraries. +# Build via uv to keep cold-start size contained. + +FROM public.ecr.aws/lambda/python:3.11 + +# TODO: install uv, sync this service's deps from the workspace lock file, +# copy src/ara/ into ${LAMBDA_TASK_ROOT}/, set CMD to the Lambda handler. + +CMD ["ara.lambdas.handler.handler"] diff --git a/services/ara/README.md b/services/ara/README.md new file mode 100644 index 00000000..71e71a5d --- /dev/null +++ b/services/ara/README.md @@ -0,0 +1,30 @@ +# ara + +The Domna retrofit modelling backend. Replaces the legacy `backend/engine/engine.py` monolith with a service-oriented pipeline that survives the 30 May 2026 gov EPC API cut-over and that other team members can read, fix, and extend. + +Design document: [`../../ara_backend_design.md`](../../ara_backend_design.md). +Domain glossary: [`../../CONTEXT.md`](../../CONTEXT.md). + +## Layout + +``` +src/ara/ +├── services/ # the 9 domain services from PRD §9.2: +│ # EpcRemappingService, EpcPredictionService, +│ # FeatureBuilder, EpcEnergyDerivationService, +│ # RebaseliningService, RecommendationService, +│ # ImpactPredictionService, OptimiserService, +│ # ValuationService, ResultsPersister +├── orchestrators/ # IngestionPipeline, ModellingPipeline, RefreshOrchestrator +└── lambdas/ # one handler.py per Lambda + the event-shape contracts +``` + +## Pipeline + +See [PRD §9.4](../../ara_backend_design.md) for the per-batch step order. Briefly: per-property setup (steps 1–6) runs once per Property; the per-scenario × per-phase loop (steps 7–10) re-derives candidates and impact predictions against the rolling Effective EPC state; results are persisted under one Unit of Work per (Plan, Scenario). + +## Testing + +- `tests/unit/` — service tests against fakes from `tests/fakes/`. No DB, no network, no ML lambda. +- `tests/integration/` — real Postgres (testcontainers / localstack), fake fetchers + fake ML lambdas. +- ML transform contract tests live with `domain.ml.transform` in `packages/domain/`. diff --git a/services/ara/pyproject.toml b/services/ara/pyproject.toml new file mode 100644 index 00000000..3556a15f --- /dev/null +++ b/services/ara/pyproject.toml @@ -0,0 +1,28 @@ +[project] +name = "ara" +version = "0.1.0" +description = "The Domna retrofit modelling backend. Ingestion + modelling pipelines." +requires-python = ">=3.11" +dependencies = [ + "domna-domain", + "domna-repos", + "domna-fetchers", + "domna-utils", + "pandas>=2.0", + "pandas-stubs", + "numpy>=1.26", + "pydantic>=2.0", +] + +[tool.uv.sources] +domna-domain = { workspace = true } +domna-repos = { workspace = true } +domna-fetchers = { workspace = true } +domna-utils = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/ara"] diff --git a/services/ara/src/ara/__init__.py b/services/ara/src/ara/__init__.py new file mode 100644 index 00000000..26856c73 --- /dev/null +++ b/services/ara/src/ara/__init__.py @@ -0,0 +1,4 @@ +"""The Domna retrofit modelling backend. + +See README.md and ara_backend_design.md (repo root) for the architecture. +""" diff --git a/services/ara/src/ara/lambdas/__init__.py b/services/ara/src/ara/lambdas/__init__.py new file mode 100644 index 00000000..93b08582 --- /dev/null +++ b/services/ara/src/ara/lambdas/__init__.py @@ -0,0 +1,5 @@ +"""Lambda handlers + event-shape contracts. + +One handler per deployable Lambda. See PRD §4.6 for the ModelTriggerRequest +shape. +""" diff --git a/services/ara/src/ara/orchestrators/__init__.py b/services/ara/src/ara/orchestrators/__init__.py new file mode 100644 index 00000000..4d2c9a60 --- /dev/null +++ b/services/ara/src/ara/orchestrators/__init__.py @@ -0,0 +1,5 @@ +"""Orchestrators for the Ara pipeline. + +IngestionPipeline, ModellingPipeline, RefreshOrchestrator. The only place +where step order is encoded and where fetchers + services + repos meet. +""" diff --git a/services/ara/src/ara/services/__init__.py b/services/ara/src/ara/services/__init__.py new file mode 100644 index 00000000..b561f336 --- /dev/null +++ b/services/ara/src/ara/services/__init__.py @@ -0,0 +1,9 @@ +"""Domain services for the Ara modelling pipeline (PRD §9.2). + +EpcRemappingService, EpcPredictionService, FeatureBuilder, +EpcEnergyDerivationService, RebaseliningService, RecommendationService, +ImpactPredictionService, OptimiserService, ValuationService, ResultsPersister. + +Each service operates on `Properties` and depends only on repos + other services ++ domain objects. No external IO (per ADR-0003). +""" diff --git a/services/ara/tests/__init__.py b/services/ara/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/ara/tests/fakes/__init__.py b/services/ara/tests/fakes/__init__.py new file mode 100644 index 00000000..cc032044 --- /dev/null +++ b/services/ara/tests/fakes/__init__.py @@ -0,0 +1,4 @@ +"""Fake repos and fetchers for unit tests. + +One FakeRepo per real repo; dict-backed; no DB. Same for fetchers. +""" diff --git a/services/ara/tests/integration/__init__.py b/services/ara/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/ara/tests/unit/__init__.py b/services/ara/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b