From c3d56b00dd86423360b437d4727c2dc119701fd8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 01:52:44 +0000 Subject: [PATCH] chore(epc-prediction): grow validation corpus to 40 postcodes (ADR-0029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bump N_POSTCODES 150 -> 40 as the gradual-growth step from the 3-postcode smoke. 40 postcodes / 1113 certs / 578 leave-one-out predictions is enough for stable, trustworthy metrics (the smoke's 2 usable postcodes were dominated by oddball flats — floor_area mean|.| 52.6 there vs 12.7 here). Resumable + reproducible (random.seed(2026)); raise again to scale up. Co-Authored-By: Claude Opus 4.8 --- scripts/fetch_epc_prediction_corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_epc_prediction_corpus.py b/scripts/fetch_epc_prediction_corpus.py index 2e69ee6c..24f31646 100644 --- a/scripts/fetch_epc_prediction_corpus.py +++ b/scripts/fetch_epc_prediction_corpus.py @@ -62,7 +62,7 @@ CACHE.mkdir(parents=True, exist_ok=True) WINDOW = {"date_start": "2026-01-01", "date_end": "2026-05-31"} TOTAL_PAGES = 7402 SEED_PAGES = 20 # random search pages → postcode seeds -N_POSTCODES = 150 # distinct postcodes to pull full cohorts for +N_POSTCODES = 40 # distinct postcodes to pull full cohorts for random.seed(2026) # reproducible draw