mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice 34: PCDB Table 172 postcode weather lookup (data layer)
Per SAP 10.2 Appendix U (p.124): "Weather data for each postcode district
are taken from the PCDB" — Table 172 of pcdb10.dat lodges ~3138 postcode
districts × monthly (temp, wind, solar). This is the data source for the
EPC's demand-side cascade (Current Carbon, Current Primary Energy, Fuel
Bill) — distinct from the rating-side cascade which uses UK-average
climate per the same Appendix U paragraph.
Adds:
- `PostcodeClimate` dataclass: area, district, region (1-21 fallback),
country, height, lat/lon, monthly temp/wind/solar tuples.
- `_parse_table_172_rows(text)`: parser over the pcdb10.dat row format
(45 comma-separated fields: 9 metadata + 12 T + 12 W + 12 R).
- `_split_postcode(postcode)`: outward-code splitter handling 1-2 letter
area + 1-2 digit district (e.g. "bd19 3tf" → ("BD", 19)).
- `postcode_climate(postcode)`: cached lookup with None fallback for
unknown postcodes (callers fall back to Appendix U region tables).
Verified BD3 (the Bradford district for Elmhurst fixture 000474) reproduces
U985 Block 2 wind exactly: (5.2, 5.2, 5.0, 4.4, 4.3, 3.9, 4.0, 3.8, 4.1,
4.4, 4.6, 4.9). 5 unit tests pinning the lookup, postcode parsing
(including 2-digit districts), case insensitivity, and graceful None
returns for unknown/malformed postcodes.
Data layer only — slice 35 plumbs this through cert_to_inputs as the
demand-side cascade. No changes to existing tests (1490/1490 still pass).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
729229ed61
commit
20b2bfa11d
2 changed files with 217 additions and 0 deletions
130
packages/domain/src/domain/sap/tables/pcdb/postcode_weather.py
Normal file
130
packages/domain/src/domain/sap/tables/pcdb/postcode_weather.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
"""PCDB Table 172 — postcode-district weather data.
|
||||
|
||||
Per SAP 10.2 Appendix U (p.124): "Weather data for each postcode district
|
||||
are taken from the PCDB and are used when the postcode district is known;
|
||||
in other cases the data from Tables U1 to U4 are used." Table 172 is the
|
||||
PCDB delivery format. ~3138 districts × monthly (temp, wind, solar).
|
||||
|
||||
The "rating" cascade (SAP rating, EI rating) uses UK-average climate per
|
||||
Appendix U; the "demand" cascade (EPC emissions, primary energy, fuel
|
||||
cost) uses the postcode-specific climate from this table.
|
||||
|
||||
Reference: PCDB10 data file `docs/sap-spec/pcdb10.dat`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Final, Optional
|
||||
|
||||
|
||||
_PCDB_DAT_PATH: Final[Path] = (
|
||||
Path(__file__).resolve().parents[7] / "docs" / "sap-spec" / "pcdb10.dat"
|
||||
)
|
||||
_TABLE_172_TAG: Final[str] = "$172"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PostcodeClimate:
|
||||
"""Per-postcode-district monthly weather. Months are Jan..Dec (12-tuples).
|
||||
|
||||
`region` is the fallback SAP climate region index (1-21) for this
|
||||
district — used when callers want to mix in region-only tables like
|
||||
U3.2 (solar transformations) that haven't been delivered per postcode.
|
||||
"""
|
||||
|
||||
area: str # e.g. "BD"
|
||||
district: int # e.g. 3
|
||||
region: int # SAP region 1-21 (for fallbacks)
|
||||
country: int # 1-5 country/jurisdiction code
|
||||
height_m: float # district elevation (m)
|
||||
latitude_deg: float # district centroid
|
||||
longitude_deg: float # district centroid
|
||||
monthly_external_temp_c: tuple[float, ...] # T(1..12) °C
|
||||
monthly_wind_speed_m_per_s: tuple[float, ...] # W(1..12) m/s
|
||||
monthly_horizontal_solar_w_per_m2: tuple[float, ...] # R(1..12) W/m²
|
||||
|
||||
|
||||
def _parse_table_172_rows(dat_text: str) -> dict[tuple[str, int], PostcodeClimate]:
|
||||
"""Parse Table 172 (Postcodes) rows from the PCDB data file text into a
|
||||
`{(area, district): PostcodeClimate}` lookup."""
|
||||
out: dict[tuple[str, int], PostcodeClimate] = {}
|
||||
in_table = False
|
||||
for line in dat_text.splitlines():
|
||||
if line.startswith(_TABLE_172_TAG):
|
||||
in_table = True
|
||||
continue
|
||||
if not in_table:
|
||||
continue
|
||||
if line.startswith("$"):
|
||||
break # next table starts
|
||||
if line.startswith("#") or not line.strip():
|
||||
continue
|
||||
parts = line.split(",")
|
||||
if len(parts) < 45:
|
||||
continue
|
||||
area = parts[0].strip().upper()
|
||||
try:
|
||||
district = int(parts[1])
|
||||
except ValueError:
|
||||
continue
|
||||
temps = tuple(float(parts[9 + i]) for i in range(12))
|
||||
winds = tuple(float(parts[21 + i]) for i in range(12))
|
||||
solars = tuple(float(parts[33 + i]) for i in range(12))
|
||||
out[(area, district)] = PostcodeClimate(
|
||||
area=area,
|
||||
district=district,
|
||||
region=int(parts[3]),
|
||||
country=int(parts[4]),
|
||||
height_m=float(parts[6]),
|
||||
latitude_deg=float(parts[7]),
|
||||
longitude_deg=float(parts[8]),
|
||||
monthly_external_temp_c=temps,
|
||||
monthly_wind_speed_m_per_s=winds,
|
||||
monthly_horizontal_solar_w_per_m2=solars,
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _postcode_climate_table() -> dict[tuple[str, int], PostcodeClimate]:
|
||||
"""Cached load of Table 172. Called lazily on first postcode lookup."""
|
||||
# PCDB delivery uses latin-1 (degree symbols, etc.) — not UTF-8.
|
||||
return _parse_table_172_rows(_PCDB_DAT_PATH.read_text(encoding="latin-1"))
|
||||
|
||||
|
||||
def _split_postcode(postcode: str) -> Optional[tuple[str, int]]:
|
||||
"""Split a UK postcode into (area, district). "BD3 7XY" → ("BD", 3),
|
||||
"bd19 3tf" → ("BD", 19). Returns None when the format is unrecognised.
|
||||
|
||||
UK postcode structure: outward = 1-2 letter area + 1-2 digit district,
|
||||
optionally followed by a letter (e.g. "EC1A"). For Table 172 the
|
||||
district sub-letter is dropped — only the numeric part is used."""
|
||||
if not postcode:
|
||||
return None
|
||||
outward = postcode.strip().split()[0].upper()
|
||||
i = 0
|
||||
while i < len(outward) and outward[i].isalpha():
|
||||
i += 1
|
||||
area = outward[:i]
|
||||
rest = outward[i:]
|
||||
j = 0
|
||||
while j < len(rest) and rest[j].isdigit():
|
||||
j += 1
|
||||
if not area or j == 0:
|
||||
return None
|
||||
return area, int(rest[:j])
|
||||
|
||||
|
||||
def postcode_climate(postcode: Optional[str]) -> Optional[PostcodeClimate]:
|
||||
"""Look up postcode-district weather from PCDB Table 172. Returns None
|
||||
when postcode is missing, format unrecognised, or district not in the
|
||||
table (callers fall back to Appendix U region tables)."""
|
||||
if postcode is None:
|
||||
return None
|
||||
key = _split_postcode(postcode)
|
||||
if key is None:
|
||||
return None
|
||||
return _postcode_climate_table().get(key)
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
"""Tests for the PCDB Table 172 (postcode weather) lookup module.
|
||||
|
||||
The lookup parses pcdb10.dat at first use and caches it as a
|
||||
`{(area, district): PostcodeClimate}` dict. Callers invoke
|
||||
`postcode_climate(postcode_str)` to obtain the per-district monthly
|
||||
weather (temp, wind, solar) used by the demand-side cascade for EPC
|
||||
emissions / primary energy.
|
||||
|
||||
Reference: BRE PCDB pcdb10.dat Table 172 (Postcodes).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from domain.sap.tables.pcdb.postcode_weather import (
|
||||
PostcodeClimate,
|
||||
postcode_climate,
|
||||
)
|
||||
|
||||
|
||||
def test_postcode_climate_returns_bd3_record() -> None:
|
||||
"""Bradford district 3 (BD3) is the postcode for Elmhurst fixture 000474.
|
||||
Verified against U985 Block 2 wind speed (5.2, 5.2, 5.0, ..., 4.9) which
|
||||
is the EPC demand-cascade climate."""
|
||||
# Arrange
|
||||
# Act
|
||||
climate = postcode_climate("bd3 8aq")
|
||||
|
||||
# Assert
|
||||
assert climate is not None
|
||||
assert climate.area == "BD"
|
||||
assert climate.district == 3
|
||||
assert climate.region == 11 # East Pennines
|
||||
# Block 2 of U985-0001-000474.txt: Wind speed
|
||||
# 5.2 5.2 5.0 4.4 4.3 3.9 4.0 3.8 4.1 4.4 4.6 4.9 (22)
|
||||
assert climate.monthly_wind_speed_m_per_s == (
|
||||
5.2, 5.2, 5.0, 4.4, 4.3, 3.9, 4.0, 3.8, 4.1, 4.4, 4.6, 4.9,
|
||||
)
|
||||
|
||||
|
||||
def test_postcode_climate_parses_mixed_case() -> None:
|
||||
"""Postcode is normalised to upper-case so "bd3 8aq" and "BD3 8AQ" hit
|
||||
the same record."""
|
||||
# Arrange
|
||||
lower = "bd4 7jr"
|
||||
upper = "BD4 7JR"
|
||||
|
||||
# Act
|
||||
a = postcode_climate(lower)
|
||||
b = postcode_climate(upper)
|
||||
|
||||
# Assert
|
||||
assert a is not None
|
||||
assert b is not None
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_postcode_climate_handles_two_digit_district() -> None:
|
||||
"""Two-digit district numbers ("BD19") parse correctly — the digit
|
||||
consumption walks past the alpha prefix and grabs all digits."""
|
||||
# Arrange
|
||||
# Act
|
||||
climate = postcode_climate("bd19 3tf")
|
||||
|
||||
# Assert
|
||||
assert climate is not None
|
||||
assert climate.area == "BD"
|
||||
assert climate.district == 19
|
||||
|
||||
|
||||
def test_postcode_climate_returns_none_for_unknown_postcode() -> None:
|
||||
"""Postcodes with no Table 172 entry (e.g. synthetic test data) yield
|
||||
None so callers can fall back to UK-average climate."""
|
||||
# Arrange
|
||||
# Act
|
||||
result = postcode_climate("ZZ99 9ZZ")
|
||||
|
||||
# Assert
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_postcode_climate_returns_none_for_malformed() -> None:
|
||||
"""Empty or letter-only postcodes return None rather than raising."""
|
||||
# Arrange
|
||||
# Act
|
||||
# Assert
|
||||
assert postcode_climate("") is None
|
||||
assert postcode_climate(None) is None
|
||||
assert postcode_climate("XYZ") is None
|
||||
Loading…
Add table
Reference in a new issue