mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Locality of reference — SAP-specific docs, specs, and runtime data
now live alongside the calculator that consumes them, mirroring the
prior packages→domain layout moves.
Move targets:
- Narrative MDs → domain/sap10_calculator/docs/
NEXT_AGENT_PROMPT.md, HANDOVER_NEXT.md, SAP_CALCULATOR.md
- Spec PDFs → domain/sap10_calculator/docs/specs/
RdSAP 10 Specification 10-06-2025.pdf
PCDF_Spec_Rev-06b_12_May_2021.pdf
sap-10-2-full-specification-2025-03-14.pdf
sap-10-3-full-specification-2026-01-13.pdf
- PCDB runtime data → domain/sap10_calculator/tables/pcdb/data/
pcdb10.dat (8.3MB) + 7× pcdb_table_*.jsonl (18MB total)
Path code rewrites (load-bearing):
- tables/pcdb/__init__.py: replaced parents[4]/'docs'/'sap-spec' with
Path(__file__).resolve().parent/'data' for Table 105 JSONL loading.
- tables/pcdb/postcode_weather.py: same rebase for the pcdb10.dat path
read by _postcode_climate_table().
- tables/pcdb/etl.py __main__: same rebase for the manual ETL invocation
(source + output_dir both now point inside the package).
- tests/test_pcdb_etl.py: _PCDB_DAT_PATH now derives from
parents[1]/'tables'/'pcdb'/'data' (was parents[3]/'docs'/'sap-spec').
Citation rewrites:
- 12 .py docstrings and 4 .md docs (ADRs + READMEs + narrative docs)
had `docs/sap-spec/<file>` strings rewritten to their new locations.
- Two cases where the catch-all sed misfired (an ADR-0009 line about a
PCDB extract; the pcdb __init__.py docstring about ETL output) were
hand-corrected to point at tables/pcdb/data/ rather than docs/specs/.
docs/sap-spec/ is now empty (will be removed in a follow-up sweep or
left as a vestigial empty dir for future repurposing). ADRs 0009 and
0010 remain at docs/adr/ — they're part of the chronological
cross-cutting decision log, not calculator-specific narrative.
Verified:
- Calculator's 1e-4 production gate
(test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly) GREEN.
- Wider sweep (domain/sap10_calculator/ + domain/sap10_ml/): 1654
passed / 20 failed — exact pre-move baseline. All 20 failures
pre-existing (10 hand-built skeleton + 4 cohort chain + 6 cohort
diff).
- Pyright net-zero on the 4 touched runtime/test files (0 errors)
and unchanged on heat_transmission.py (13) / cert_to_inputs.py (35) /
mapper.py (33).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
130 lines
4.9 KiB
Python
130 lines
4.9 KiB
Python
"""PCDB Table 172 — postcode-district weather data.
|
||
|
||
Per SAP 10.2 Appendix U (p.124): "Weather data for each postcode district
|
||
are taken from the PCDB and are used when the postcode district is known;
|
||
in other cases the data from Tables U1 to U4 are used." Table 172 is the
|
||
PCDB delivery format. ~3138 districts × monthly (temp, wind, solar).
|
||
|
||
The "rating" cascade (SAP rating, EI rating) uses UK-average climate per
|
||
Appendix U; the "demand" cascade (EPC emissions, primary energy, fuel
|
||
cost) uses the postcode-specific climate from this table.
|
||
|
||
Reference: PCDB10 data file `domain/sap10_calculator/tables/pcdb/data/pcdb10.dat`.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from functools import lru_cache
|
||
from pathlib import Path
|
||
from typing import Final, Optional
|
||
|
||
|
||
_PCDB_DAT_PATH: Final[Path] = (
|
||
Path(__file__).resolve().parent / "data" / "pcdb10.dat"
|
||
)
|
||
_TABLE_172_TAG: Final[str] = "$172"
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class PostcodeClimate:
|
||
"""Per-postcode-district monthly weather. Months are Jan..Dec (12-tuples).
|
||
|
||
`region` is the fallback SAP climate region index (1-21) for this
|
||
district — used when callers want to mix in region-only tables like
|
||
U3.2 (solar transformations) that haven't been delivered per postcode.
|
||
"""
|
||
|
||
area: str # e.g. "BD"
|
||
district: int # e.g. 3
|
||
region: int # SAP region 1-21 (for fallbacks)
|
||
country: int # 1-5 country/jurisdiction code
|
||
height_m: float # district elevation (m)
|
||
latitude_deg: float # district centroid
|
||
longitude_deg: float # district centroid
|
||
monthly_external_temp_c: tuple[float, ...] # T(1..12) °C
|
||
monthly_wind_speed_m_per_s: tuple[float, ...] # W(1..12) m/s
|
||
monthly_horizontal_solar_w_per_m2: tuple[float, ...] # R(1..12) W/m²
|
||
|
||
|
||
def _parse_table_172_rows(dat_text: str) -> dict[tuple[str, int], PostcodeClimate]:
|
||
"""Parse Table 172 (Postcodes) rows from the PCDB data file text into a
|
||
`{(area, district): PostcodeClimate}` lookup."""
|
||
out: dict[tuple[str, int], PostcodeClimate] = {}
|
||
in_table = False
|
||
for line in dat_text.splitlines():
|
||
if line.startswith(_TABLE_172_TAG):
|
||
in_table = True
|
||
continue
|
||
if not in_table:
|
||
continue
|
||
if line.startswith("$"):
|
||
break # next table starts
|
||
if line.startswith("#") or not line.strip():
|
||
continue
|
||
parts = line.split(",")
|
||
if len(parts) < 45:
|
||
continue
|
||
area = parts[0].strip().upper()
|
||
try:
|
||
district = int(parts[1])
|
||
except ValueError:
|
||
continue
|
||
temps = tuple(float(parts[9 + i]) for i in range(12))
|
||
winds = tuple(float(parts[21 + i]) for i in range(12))
|
||
solars = tuple(float(parts[33 + i]) for i in range(12))
|
||
out[(area, district)] = PostcodeClimate(
|
||
area=area,
|
||
district=district,
|
||
region=int(parts[3]),
|
||
country=int(parts[4]),
|
||
height_m=float(parts[6]),
|
||
latitude_deg=float(parts[7]),
|
||
longitude_deg=float(parts[8]),
|
||
monthly_external_temp_c=temps,
|
||
monthly_wind_speed_m_per_s=winds,
|
||
monthly_horizontal_solar_w_per_m2=solars,
|
||
)
|
||
return out
|
||
|
||
|
||
@lru_cache(maxsize=1)
|
||
def _postcode_climate_table() -> dict[tuple[str, int], PostcodeClimate]:
|
||
"""Cached load of Table 172. Called lazily on first postcode lookup."""
|
||
# PCDB delivery uses latin-1 (degree symbols, etc.) — not UTF-8.
|
||
return _parse_table_172_rows(_PCDB_DAT_PATH.read_text(encoding="latin-1"))
|
||
|
||
|
||
def _split_postcode(postcode: str) -> Optional[tuple[str, int]]:
|
||
"""Split a UK postcode into (area, district). "BD3 7XY" → ("BD", 3),
|
||
"bd19 3tf" → ("BD", 19). Returns None when the format is unrecognised.
|
||
|
||
UK postcode structure: outward = 1-2 letter area + 1-2 digit district,
|
||
optionally followed by a letter (e.g. "EC1A"). For Table 172 the
|
||
district sub-letter is dropped — only the numeric part is used."""
|
||
if not postcode:
|
||
return None
|
||
outward = postcode.strip().split()[0].upper()
|
||
i = 0
|
||
while i < len(outward) and outward[i].isalpha():
|
||
i += 1
|
||
area = outward[:i]
|
||
rest = outward[i:]
|
||
j = 0
|
||
while j < len(rest) and rest[j].isdigit():
|
||
j += 1
|
||
if not area or j == 0:
|
||
return None
|
||
return area, int(rest[:j])
|
||
|
||
|
||
def postcode_climate(postcode: Optional[str]) -> Optional[PostcodeClimate]:
|
||
"""Look up postcode-district weather from PCDB Table 172. Returns None
|
||
when postcode is missing, format unrecognised, or district not in the
|
||
table (callers fall back to Appendix U region tables)."""
|
||
if postcode is None:
|
||
return None
|
||
key = _split_postcode(postcode)
|
||
if key is None:
|
||
return None
|
||
return _postcode_climate_table().get(key)
|