allow for multiple scenarios for 1 portfolio without ignoring all properties in second scenario

This commit is contained in:
Daniel Roth 2026-06-26 11:24:26 +00:00
parent 17a9f0aafc
commit 632465f03f
2 changed files with 25 additions and 20 deletions

View file

@ -0,0 +1,15 @@
'NN14 1JZ': [742012]
'NN15 6TD': [741992]
'NN14 1JS': [742051, 742052]
'LE16 8HG': [741987, 741988, 741989]
'LE16 8PP': [742044, 742045, 742046]
'NN14 1PY': [742000, 742001, 742002, 742003]
'LE16 8HF': [741983, 741984, 741985, 741986, 741990, 741991]
'LE16 8HT': [742057, 742058, 742059, 742060, 742061, 742062]
'LE16 8LD': [741993, 741994, 741995, 741996, 741997, 741998, 741999]
'NN14 1JP': [742047, 742048, 742049, 742050, 742053, 742054, 742055, 742056]
'NN14 1LA': [742013, 742014, 742015, 742016, 742017, 742018, 742019, 742020]
'NN14 1PT': [742004, 742005, 742006, 742007, 742008, 742009, 742010, 742011]
'NN14 1EL': [742021, 742022, 742023, 742024, 742025, 742026, 742027, 742028, 742029, 742030, 742031, 742032, 742033, 742034, 742035, 742036, 742037, 742038, 742039, 742040, 742041, 742042, 742043]
Total postcodes: 13, total properties: 80

View file

@ -23,8 +23,8 @@ from utilities.logger import setup_logger
# ---------------------------------------------------------------------------
# CONFIG — edit these before running
# ---------------------------------------------------------------------------
PORTFOLIO_ID: int = 796
SCENARIO_ID: int = 1268
PORTFOLIO_ID: int = 805
SCENARIO_ID: int = 1267
SQS_QUEUE_NAME: str = "modelling_e2e-queue-dev"
# Max number of properties to process this run (cost cap).
@ -42,16 +42,8 @@ COMPLETED_SINCE: datetime | None = datetime(
# True → Lambda runs the full pipeline but skips all DB writes (safe for testing).
DRY_RUN: bool = False
# False → Lambda skips the Google Solar fetch (re-uses stored Solar data).
REFETCH_SOLAR: bool = True
# False → use stored lodged EPC for properties that have one; properties with no
# stored lodged EPC are treated as EPC-less and routed to prediction (no API call).
REFETCH_EPC: bool = True
# False → use stored predicted EPC for EPC-less properties that have one; live
# prediction still runs when no stored predicted EPC exists for the property.
REPREDICT_EPC: bool = True
# True → Lambda skips the Google Solar fetch.
NO_SOLAR: bool = False
# ---------------------------------------------------------------------------
_REPO_ROOT = Path(__file__).resolve().parents[1]
@ -82,8 +74,8 @@ def _load_postcode_map() -> dict[str, list[int]]:
return result
def _completed_property_ids(since: datetime) -> set[int]:
"""Return property IDs with a completed modelling_e2e sub_task on or after *since*."""
def _completed_property_ids(since: datetime, scenario_id: int) -> set[int]:
"""Return property IDs with a completed modelling_e2e sub_task for *scenario_id* on or after *since*."""
load_env(ENV_PATH)
engine = build_engine()
with engine.connect() as conn:
@ -96,8 +88,9 @@ def _completed_property_ids(since: datetime) -> set[int]:
AND st.status = 'complete'
AND st.job_completed >= :since
AND (st.inputs::jsonb) ? 'property_id'
AND ((st.inputs::jsonb)->>'scenario_id')::int = :scenario_id
"""),
{"since": since},
{"since": since, "scenario_id": scenario_id},
).fetchall()
return {int(r[0]) for r in rows}
@ -158,8 +151,7 @@ def main() -> None:
logger.info(
f"sending {len(batches)} messages "
f"(portfolio={PORTFOLIO_ID}, scenario={SCENARIO_ID}, "
f"dry_run={DRY_RUN}, refetch_solar={REFETCH_SOLAR}, "
f"refetch_epc={REFETCH_EPC}, repredict_epc={REPREDICT_EPC}) → {sqs_url}"
f"dry_run={DRY_RUN}, no_solar={NO_SOLAR}) → {sqs_url}"
)
for batch in batches:
@ -170,9 +162,7 @@ def main() -> None:
"property_ids": batch,
"portfolio_id": PORTFOLIO_ID,
"scenario_id": SCENARIO_ID,
"refetch_solar": REFETCH_SOLAR,
"refetch_epc": REFETCH_EPC,
"repredict_epc": REPREDICT_EPC,
"no_solar": NO_SOLAR,
"dry_run": DRY_RUN,
}
),