From 6e9f831296fa0ea01bb7390bb17d4f6c8a5a6564 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Jun 2026 06:42:19 +0000 Subject: [PATCH] chore(epc-prediction): grow validation corpus to 150 postcodes Bumps N_POSTCODES 40 -> 150 for the fetch script. Larger corpus (150 postcodes / 3719 certs) reduces leave-one-out variance and unblocks the recency-template work (#1223), which regressed the noisier 36-target gate fixture. Corpus itself stays out of git (gitignored /tmp + persistent backup at /workspaces/home/epc_prediction_corpus_backup). Co-Authored-By: Claude Opus 4.8 --- scripts/fetch_epc_prediction_corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_epc_prediction_corpus.py b/scripts/fetch_epc_prediction_corpus.py index 24f31646..2e69ee6c 100644 --- a/scripts/fetch_epc_prediction_corpus.py +++ b/scripts/fetch_epc_prediction_corpus.py @@ -62,7 +62,7 @@ CACHE.mkdir(parents=True, exist_ok=True) WINDOW = {"date_start": "2026-01-01", "date_end": "2026-05-31"} TOTAL_PAGES = 7402 SEED_PAGES = 20 # random search pages → postcode seeds -N_POSTCODES = 40 # distinct postcodes to pull full cohorts for +N_POSTCODES = 150 # distinct postcodes to pull full cohorts for random.seed(2026) # reproducible draw