diff --git a/harness/console.py b/harness/console.py index f1b9675f..dcb0c541 100644 --- a/harness/console.py +++ b/harness/console.py @@ -39,6 +39,7 @@ from repositories.fuel_rates.fuel_rates_static_file_repository import ( ) from repositories.geospatial.geospatial_repository import GeospatialRepository from repositories.product.product_json_repository import ProductJsonRepository +from repositories.product.product_repository import ProductRepository from tests.orchestration.fakes import ( FakeEpcRepo, FakePlanRepository, @@ -173,6 +174,8 @@ def run_modelling( planning_restrictions: PlanningRestrictions = PlanningRestrictions(), solar_insights: Optional[dict[str, Any]] = None, considered_measures: Optional[frozenset[MeasureType]] = None, + products: Optional[ProductRepository] = None, + scenario: Optional[Scenario] = None, print_table: bool = True, ) -> Plan: """Run ONLY the Modelling stage over ``epc`` with no database — skipping @@ -184,7 +187,22 @@ def run_modelling( ``solar_insights`` is the Property's raw Google Solar ``buildingInsights`` JSON (as persisted by ``SolarRepository``); when given, the solar Recommendation Generator sees the dwelling's potential and can offer Solar - PV Options (ADR-0026).""" + PV Options (ADR-0026). + + ``products`` overrides the Product catalogue source (default: the JSON + sample catalogue) — pass a read-only ``ProductPostgresRepository`` to price + against the live ``material`` table. ``scenario`` overrides the default + Increasing-EPC-to-``goal_band`` Scenario — pass a Scenario read from the DB + so the run targets a real ``scenario_id`` (its ``goal_value``/budget drive + the Optimiser); the computed Plan is then keyed by that Scenario's id.""" + scenario_obj = scenario or Scenario( + id=_SCENARIO_ID, + goal="Increasing EPC", + goal_value=goal_band, + budget=None, + is_default=True, + ) + scenario_id = scenario_obj.id plan_repo = FakePlanRepository() property_repo = FakePropertyRepo( { @@ -208,18 +226,8 @@ def run_modelling( if solar_insights is not None else None ), - scenario=FakeScenarioRepository( - { - _SCENARIO_ID: Scenario( - id=_SCENARIO_ID, - goal="Increasing EPC", - goal_value=goal_band, - budget=None, - is_default=True, - ) - } - ), - product=ProductJsonRepository(catalogue_path), + scenario=FakeScenarioRepository({scenario_id: scenario_obj}), + product=products or ProductJsonRepository(catalogue_path), plan=plan_repo, ) @@ -229,12 +237,12 @@ def run_modelling( fuel_rates=FuelRatesStaticFileRepository(), ).run( property_ids=[_PROPERTY_ID], - scenario_ids=[_SCENARIO_ID], + scenario_ids=[scenario_id], portfolio_id=_PORTFOLIO_ID, considered_measures=considered_measures, ) - plan = plan_repo.saved[(_PROPERTY_ID, _SCENARIO_ID)] + plan = plan_repo.saved[(_PROPERTY_ID, scenario_id)] if print_table: print("\n" + format_plan_table(plan)) return plan diff --git a/scripts/run_modelling_e2e.py b/scripts/run_modelling_e2e.py index bb62c524..80a53db6 100644 --- a/scripts/run_modelling_e2e.py +++ b/scripts/run_modelling_e2e.py @@ -3,30 +3,41 @@ print the recommendations for inspection. The local DB's Properties have no linked, ingested EPC yet (Ingestion's source clients are still stubbed — #1136), so this script does the ingestion step -inline for inspection: it reads each Property's UPRN from the DB, fetches the -latest EPC **live** from the gov EPC API by UPRN, then runs the Modelling stage -in memory (every Recommendation Generator → the Optimiser → a costed, attributed -Plan). It is read-only on the DB (just the UPRN lookup) and persists nothing — -purely for inspecting recommendations. Prints a per-Property plan table and -writes a Markdown + CSV summary. +inline: it reads each Property's UPRN from the DB, fetches the latest EPC +**live** from the gov EPC API by UPRN, resolves the UPRN's spatial reference +from S3, and fetches Google Solar — then runs the Modelling stage (every +Recommendation Generator → the Optimiser → a costed, attributed Plan). The same +local computation runs whether or not you store the result: by default it +persists **nothing** (the run is for inspecting recommendations); pass +`--persist` to write the inputs + the Plan to the DB. + +To keep the inspected recommendations identical to what gets stored, **both +modes price against the live ``material`` catalogue (read-only)** and model +against a real **Scenario** read from the DB — not the JSON sample catalogue. +Pass `--scenario-id` to target a real Scenario (its ``goal_value`` drives the +band); without it the run synthesises an Increasing-EPC-to-``--goal`` Scenario. +``--measures`` restricts the run to a comma-separated set of measure types +(mirroring the legacy `inclusions`) — e.g. only HHRSH + Solar PV. Config: loads `backend/.env` for the DB creds (`DB_*`), the EPC API token (`EPC_AUTH_TOKEN`), the Google Solar key (`GOOGLE_SOLAR_API_KEY`) and the S3 reference bucket (`DATA_BUCKET`) — the agent never sees the secrets. AWS creds -come from the ambient `~/.aws` profile. Run from the worktree root so imports -resolve to this checkout: +come from the ambient `~/.aws` profile. Run from the worktree root: - python -m scripts.run_modelling_e2e 115 116 117 # goal band C (default) - python -m scripts.run_modelling_e2e --goal B 115 116 117 # a different target band - python -m scripts.run_modelling_e2e --no-solar 115 116 # skip the Google Solar leg + # inspect only (no DB writes), HHRSH + Solar PV, against Scenario 1263: + python -m scripts.run_modelling_e2e --scenario-id 1263 \ + --measures high_heat_retention_storage_heaters,solar_pv 115 116 117 + # same run, but persist the Plans (needs --portfolio-id): + python -m scripts.run_modelling_e2e --scenario-id 1263 --portfolio-id 4 \ + --measures high_heat_retention_storage_heaters,solar_pv --persist 115 116 117 + python -m scripts.run_modelling_e2e --no-solar 115 116 # skip the Google leg -Per Property the script resolves the UPRN's spatial reference from the Ordnance -Survey Open-UPRN parquet in S3 (`GeospatialS3Repository`): the planning -protections (conservation/listed/heritage) gate the wall + solar measures, and -the coordinates drive a live Google Solar `buildingInsights` fetch so the Solar -PV Options can fire (ADR-0026). Buildings S3 doesn't cover, or that Google has -no solar coverage for, fall back to unrestricted / no-solar and are still -modelled. Pass `--no-solar` to skip the Google leg entirely. +Per Property the spatial reference (S3 Open-UPRN parquet) gives the planning +protections (conservation/listed/heritage — gate the wall + solar measures) and +the coordinates that drive the Google Solar fetch (ADR-0026). Buildings S3 +doesn't cover, or that Google has no solar coverage for, fall back to +unrestricted / no-solar and are still modelled. Pass `--no-solar` to skip the +Google leg. """ from __future__ import annotations @@ -47,8 +58,10 @@ sys.path.insert(0, str(_REPO_ROOT)) # worktree root first — avoid the import from datatypes.epc.domain.epc_property_data import EpcPropertyData # noqa: E402 from domain.geospatial.planning_restrictions import PlanningRestrictions # noqa: E402 from domain.geospatial.spatial_reference import SpatialReference # noqa: E402 +from domain.modelling.measure_type import MeasureType # noqa: E402 from domain.modelling.plan import Plan, PlanMeasure # noqa: E402 -from harness.console import DEFAULT_CATALOGUE, run_modelling # noqa: E402 +from domain.modelling.scenario import Scenario # noqa: E402 +from harness.console import run_modelling # noqa: E402 from harness.plan_table import format_plan_table # noqa: E402 from infrastructure.epc_client.epc_client_service import EpcClientService # noqa: E402 from infrastructure.solar.google_solar_api_client import ( # noqa: E402 @@ -59,7 +72,15 @@ from repositories.geospatial.geospatial_s3_repository import ( # noqa: E402 GeospatialS3Repository, ParquetReader, ) -from sqlalchemy import create_engine, text # noqa: E402 +from repositories.product.product_postgres_repository import ( # noqa: E402 + ProductPostgresRepository, +) +from repositories.postgres_unit_of_work import PostgresUnitOfWork # noqa: E402 +from repositories.scenario.scenario_postgres_repository import ( # noqa: E402 + ScenarioPostgresRepository, +) +from sqlalchemy import Engine, create_engine, text # noqa: E402 +from sqlmodel import Session # noqa: E402 _ENV_PATH = _REPO_ROOT / "backend" / ".env" _MARKDOWN_PATH = Path("modelling_e2e.md") @@ -130,11 +151,15 @@ def _solar_insights_for( return None # no Google solar coverage at this point — model without it -def _uprns_for(property_ids: list[int]) -> dict[int, Optional[int]]: - """Read each Property's UPRN from the DB (read-only).""" - engine = create_engine( +def _engine() -> Engine: + """A connection-pooled engine to DevAssessmentModelDB (DB_* creds).""" + return create_engine( _db_url(), pool_pre_ping=True, connect_args={"connect_timeout": 10} ) + + +def _uprns_for(engine: Engine, property_ids: list[int]) -> dict[int, Optional[int]]: + """Read each Property's UPRN from the DB (read-only).""" with engine.connect() as conn: rows = conn.execute( text("SELECT id, uprn FROM property WHERE id = ANY(:ids)"), @@ -143,6 +168,26 @@ def _uprns_for(property_ids: list[int]) -> dict[int, Optional[int]]: return {int(pid): (int(uprn) if uprn is not None else None) for pid, uprn in rows} +def _scenario_for(session: Session, scenario_id: int) -> Scenario: + """Read the Scenario the run targets (read-only). An Increasing-EPC Scenario + must carry a ``goal_value`` (band) — the old null-band rows were a fixed bug + and crash the Optimiser's target — so reject one that does not.""" + scenario: Scenario = ScenarioPostgresRepository(session).get_many([scenario_id])[0] + if scenario.goal == "Increasing EPC" and not scenario.goal_value: + raise ValueError( + f"scenario {scenario_id} has no goal_value (band); pick a recent one" + ) + return scenario + + +def _parse_measures(raw: Optional[str]) -> Optional[frozenset[MeasureType]]: + """Parse `--measures a,b,c` into a `considered_measures` allowlist, or None + (consider every modelled measure) when unset. Raises on an unknown type.""" + if raw is None: + return None + return frozenset(MeasureType(token.strip()) for token in raw.split(",") if token.strip()) + + def _context_summary( spatial: Optional[SpatialReference], solar_insights: Optional[dict[str, Any]] ) -> str: @@ -173,10 +218,57 @@ def _measure_summary(measure: PlanMeasure) -> str: ) +def _persist( + engine: Engine, + *, + property_id: int, + uprn: int, + portfolio_id: int, + scenario: Scenario, + epc: EpcPropertyData, + spatial: Optional[SpatialReference], + solar_insights: Optional[dict[str, Any]], + plan: Plan, +) -> None: + """Write the run's inputs (EPC + spatial + solar) and the computed Plan to + the DB in one Unit of Work, then commit. ``PlanPostgresRepository`` replaces + any existing Plan for ``(property_id, scenario.id)`` (idempotent re-run).""" + with PostgresUnitOfWork(lambda: Session(engine)) as uow: + uow.epc.save(epc, property_id=property_id, portfolio_id=portfolio_id) + if spatial is not None: + uow.spatial.save(uprn, spatial) + if solar_insights is not None: + uow.solar.save(property_id, solar_insights) + uow.plan.save( + plan, + property_id=property_id, + scenario_id=scenario.id, + portfolio_id=portfolio_id, + is_default=scenario.is_default, + ) + uow.commit() + + def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("property_ids", type=int, nargs="+", help="Property ids to model") - parser.add_argument("--goal", default="C", help="target EPC band (default C)") + parser.add_argument("--goal", default="C", help="target band when no --scenario-id (default C)") + parser.add_argument( + "--scenario-id", type=int, default=None, help="model against this DB Scenario" + ) + parser.add_argument( + "--measures", + default=None, + help="comma-separated measure types to consider (default: all)", + ) + parser.add_argument( + "--portfolio-id", type=int, default=None, help="portfolio id (required for --persist)" + ) + parser.add_argument( + "--persist", + action="store_true", + help="WRITE the inputs + Plan to the DB (default: inspect only, no writes)", + ) parser.add_argument( "--no-solar", action="store_true", @@ -184,18 +276,39 @@ def main() -> None: ) args = parser.parse_args() + if args.persist and (args.scenario_id is None or args.portfolio_id is None): + parser.error("--persist requires --scenario-id and --portfolio-id") + _load_env(_ENV_PATH) epc_client = EpcClientService(os.environ["EPC_AUTH_TOKEN"]) geospatial = GeospatialS3Repository(_s3_parquet_reader(os.environ["DATA_BUCKET"])) solar_client = GoogleSolarApiClient(os.environ["GOOGLE_SOLAR_API_KEY"]) - uprns = _uprns_for(args.property_ids) - - print( - f"modelling {len(args.property_ids)} propertie(s) (goal band {args.goal}); " - f"EPCs fetched live by UPRN, modelled in memory — no DB writes...\n" + engine = _engine() + considered = _parse_measures(args.measures) + uprns = _uprns_for(engine, args.property_ids) + # One read-only session for the live `material` catalogue, reused across the + # batch so both store and no-store runs price against the same DB rows. + catalogue_session = Session(engine) + products = ProductPostgresRepository(catalogue_session) + scenario: Optional[Scenario] = ( + _scenario_for(catalogue_session, args.scenario_id) + if args.scenario_id is not None + else None ) - md_lines: list[str] = [f"# Modelling recommendations (goal band {args.goal})\n"] + target = ( + f"scenario {scenario.id} (band {scenario.goal_value})" + if scenario is not None + else f"synthesised Increasing-EPC band {args.goal}" + ) + measures_note = ",".join(sorted(considered)) if considered else "all measures" + mode = "PERSISTING to DB" if args.persist else "no DB writes" + print( + f"modelling {len(args.property_ids)} propertie(s) · {target} · {measures_note} · " + f"{mode} (DB material catalogue, live EPC/solar)...\n" + ) + + md_lines: list[str] = [f"# Modelling recommendations ({target}, {measures_note})\n"] csv_rows: list[str] = [ "property_id,uprn,baseline_sap,post_sap,measures,measure_types,cost_of_works" ] @@ -218,11 +331,26 @@ def main() -> None: plan: Plan = run_modelling( epc, goal_band=args.goal, - catalogue_path=DEFAULT_CATALOGUE, planning_restrictions=restrictions, solar_insights=solar_insights, + considered_measures=considered, + products=products, + scenario=scenario, print_table=False, ) + if args.persist: + assert scenario is not None # guaranteed by the --persist guard + _persist( + engine, + property_id=property_id, + uprn=uprn, + portfolio_id=args.portfolio_id, + scenario=scenario, + epc=epc, + spatial=spatial, + solar_insights=solar_insights, + plan=plan, + ) except Exception as error: # noqa: BLE001 — one bad property must not stop the run line = f"property {property_id} (uprn {uprn}): ERROR — {type(error).__name__}: {error}" print(line + "\n") @@ -255,6 +383,7 @@ def main() -> None: f"{'|'.join(measure_types)},{plan.cost_of_works:.0f}" ) + catalogue_session.close() _MARKDOWN_PATH.write_text("\n".join(md_lines) + "\n", encoding="utf-8") _CSV_PATH.write_text("\n".join(csv_rows) + "\n", encoding="utf-8") print(f"wrote {_MARKDOWN_PATH.resolve()}")