Slice 34: PCDB Table 172 postcode weather lookup (data layer)

Per SAP 10.2 Appendix U (p.124): "Weather data for each postcode district
are taken from the PCDB" — Table 172 of pcdb10.dat lodges ~3138 postcode
districts × monthly (temp, wind, solar). This is the data source for the
EPC's demand-side cascade (Current Carbon, Current Primary Energy, Fuel
Bill) — distinct from the rating-side cascade which uses UK-average
climate per the same Appendix U paragraph.

Adds:
- `PostcodeClimate` dataclass: area, district, region (1-21 fallback),
  country, height, lat/lon, monthly temp/wind/solar tuples.
- `_parse_table_172_rows(text)`: parser over the pcdb10.dat row format
  (45 comma-separated fields: 9 metadata + 12 T + 12 W + 12 R).
- `_split_postcode(postcode)`: outward-code splitter handling 1-2 letter
  area + 1-2 digit district (e.g. "bd19 3tf" → ("BD", 19)).
- `postcode_climate(postcode)`: cached lookup with None fallback for
  unknown postcodes (callers fall back to Appendix U region tables).

Verified BD3 (the Bradford district for Elmhurst fixture 000474) reproduces
U985 Block 2 wind exactly: (5.2, 5.2, 5.0, 4.4, 4.3, 3.9, 4.0, 3.8, 4.1,
4.4, 4.6, 4.9). 5 unit tests pinning the lookup, postcode parsing
(including 2-digit districts), case insensitivity, and graceful None
returns for unknown/malformed postcodes.

Data layer only — slice 35 plumbs this through cert_to_inputs as the
demand-side cascade. No changes to existing tests (1490/1490 still pass).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-24 09:07:50 +00:00
parent 729229ed61
commit 20b2bfa11d
2 changed files with 217 additions and 0 deletions

View file

@ -0,0 +1,130 @@
"""PCDB Table 172 — postcode-district weather data.
Per SAP 10.2 Appendix U (p.124): "Weather data for each postcode district
are taken from the PCDB and are used when the postcode district is known;
in other cases the data from Tables U1 to U4 are used." Table 172 is the
PCDB delivery format. ~3138 districts × monthly (temp, wind, solar).
The "rating" cascade (SAP rating, EI rating) uses UK-average climate per
Appendix U; the "demand" cascade (EPC emissions, primary energy, fuel
cost) uses the postcode-specific climate from this table.
Reference: PCDB10 data file `docs/sap-spec/pcdb10.dat`.
"""
from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Final, Optional
_PCDB_DAT_PATH: Final[Path] = (
Path(__file__).resolve().parents[7] / "docs" / "sap-spec" / "pcdb10.dat"
)
_TABLE_172_TAG: Final[str] = "$172"
@dataclass(frozen=True)
class PostcodeClimate:
"""Per-postcode-district monthly weather. Months are Jan..Dec (12-tuples).
`region` is the fallback SAP climate region index (1-21) for this
district used when callers want to mix in region-only tables like
U3.2 (solar transformations) that haven't been delivered per postcode.
"""
area: str # e.g. "BD"
district: int # e.g. 3
region: int # SAP region 1-21 (for fallbacks)
country: int # 1-5 country/jurisdiction code
height_m: float # district elevation (m)
latitude_deg: float # district centroid
longitude_deg: float # district centroid
monthly_external_temp_c: tuple[float, ...] # T(1..12) °C
monthly_wind_speed_m_per_s: tuple[float, ...] # W(1..12) m/s
monthly_horizontal_solar_w_per_m2: tuple[float, ...] # R(1..12) W/m²
def _parse_table_172_rows(dat_text: str) -> dict[tuple[str, int], PostcodeClimate]:
"""Parse Table 172 (Postcodes) rows from the PCDB data file text into a
`{(area, district): PostcodeClimate}` lookup."""
out: dict[tuple[str, int], PostcodeClimate] = {}
in_table = False
for line in dat_text.splitlines():
if line.startswith(_TABLE_172_TAG):
in_table = True
continue
if not in_table:
continue
if line.startswith("$"):
break # next table starts
if line.startswith("#") or not line.strip():
continue
parts = line.split(",")
if len(parts) < 45:
continue
area = parts[0].strip().upper()
try:
district = int(parts[1])
except ValueError:
continue
temps = tuple(float(parts[9 + i]) for i in range(12))
winds = tuple(float(parts[21 + i]) for i in range(12))
solars = tuple(float(parts[33 + i]) for i in range(12))
out[(area, district)] = PostcodeClimate(
area=area,
district=district,
region=int(parts[3]),
country=int(parts[4]),
height_m=float(parts[6]),
latitude_deg=float(parts[7]),
longitude_deg=float(parts[8]),
monthly_external_temp_c=temps,
monthly_wind_speed_m_per_s=winds,
monthly_horizontal_solar_w_per_m2=solars,
)
return out
@lru_cache(maxsize=1)
def _postcode_climate_table() -> dict[tuple[str, int], PostcodeClimate]:
"""Cached load of Table 172. Called lazily on first postcode lookup."""
# PCDB delivery uses latin-1 (degree symbols, etc.) — not UTF-8.
return _parse_table_172_rows(_PCDB_DAT_PATH.read_text(encoding="latin-1"))
def _split_postcode(postcode: str) -> Optional[tuple[str, int]]:
"""Split a UK postcode into (area, district). "BD3 7XY" → ("BD", 3),
"bd19 3tf" ("BD", 19). Returns None when the format is unrecognised.
UK postcode structure: outward = 1-2 letter area + 1-2 digit district,
optionally followed by a letter (e.g. "EC1A"). For Table 172 the
district sub-letter is dropped only the numeric part is used."""
if not postcode:
return None
outward = postcode.strip().split()[0].upper()
i = 0
while i < len(outward) and outward[i].isalpha():
i += 1
area = outward[:i]
rest = outward[i:]
j = 0
while j < len(rest) and rest[j].isdigit():
j += 1
if not area or j == 0:
return None
return area, int(rest[:j])
def postcode_climate(postcode: Optional[str]) -> Optional[PostcodeClimate]:
"""Look up postcode-district weather from PCDB Table 172. Returns None
when postcode is missing, format unrecognised, or district not in the
table (callers fall back to Appendix U region tables)."""
if postcode is None:
return None
key = _split_postcode(postcode)
if key is None:
return None
return _postcode_climate_table().get(key)

View file

@ -0,0 +1,87 @@
"""Tests for the PCDB Table 172 (postcode weather) lookup module.
The lookup parses pcdb10.dat at first use and caches it as a
`{(area, district): PostcodeClimate}` dict. Callers invoke
`postcode_climate(postcode_str)` to obtain the per-district monthly
weather (temp, wind, solar) used by the demand-side cascade for EPC
emissions / primary energy.
Reference: BRE PCDB pcdb10.dat Table 172 (Postcodes).
"""
from __future__ import annotations
from domain.sap.tables.pcdb.postcode_weather import (
PostcodeClimate,
postcode_climate,
)
def test_postcode_climate_returns_bd3_record() -> None:
"""Bradford district 3 (BD3) is the postcode for Elmhurst fixture 000474.
Verified against U985 Block 2 wind speed (5.2, 5.2, 5.0, ..., 4.9) which
is the EPC demand-cascade climate."""
# Arrange
# Act
climate = postcode_climate("bd3 8aq")
# Assert
assert climate is not None
assert climate.area == "BD"
assert climate.district == 3
assert climate.region == 11 # East Pennines
# Block 2 of U985-0001-000474.txt: Wind speed
# 5.2 5.2 5.0 4.4 4.3 3.9 4.0 3.8 4.1 4.4 4.6 4.9 (22)
assert climate.monthly_wind_speed_m_per_s == (
5.2, 5.2, 5.0, 4.4, 4.3, 3.9, 4.0, 3.8, 4.1, 4.4, 4.6, 4.9,
)
def test_postcode_climate_parses_mixed_case() -> None:
"""Postcode is normalised to upper-case so "bd3 8aq" and "BD3 8AQ" hit
the same record."""
# Arrange
lower = "bd4 7jr"
upper = "BD4 7JR"
# Act
a = postcode_climate(lower)
b = postcode_climate(upper)
# Assert
assert a is not None
assert b is not None
assert a == b
def test_postcode_climate_handles_two_digit_district() -> None:
"""Two-digit district numbers ("BD19") parse correctly — the digit
consumption walks past the alpha prefix and grabs all digits."""
# Arrange
# Act
climate = postcode_climate("bd19 3tf")
# Assert
assert climate is not None
assert climate.area == "BD"
assert climate.district == 19
def test_postcode_climate_returns_none_for_unknown_postcode() -> None:
"""Postcodes with no Table 172 entry (e.g. synthetic test data) yield
None so callers can fall back to UK-average climate."""
# Arrange
# Act
result = postcode_climate("ZZ99 9ZZ")
# Assert
assert result is None
def test_postcode_climate_returns_none_for_malformed() -> None:
"""Empty or letter-only postcodes return None rather than raising."""
# Arrange
# Act
# Assert
assert postcode_climate("") is None
assert postcode_climate(None) is None
assert postcode_climate("XYZ") is None