Merge branch 'main' of https://github.com/Hestia-Homes/Model into feature/hyde_make_it_more_accurate_with_tests

This commit is contained in:
Jun-te Kim 2026-06-18 16:24:26 +00:00
commit 269a7fdaa7
16 changed files with 797 additions and 198 deletions

View file

@ -1,168 +0,0 @@
{
"permissions": {
"allow": [
"Bash(python -c ' *)",
"Bash(python -m pytest tests/ -v --no-cov)",
"Bash(git -C /workspaces/model diff --stat)",
"Bash(python -c \"import moto; print\\('moto installed:', moto.__version__\\)\")",
"Bash(grep -E \"\\\\.\\(py|sql\\)$\")",
"Bash(xargs basename -a)",
"Bash(ls -la /workspaces/home 2>/dev/null | head -20)",
"Read(//workspaces/home/**)",
"Bash(command -v uv)",
"Bash(uv --version)",
"Bash(echo \"uv: $\\(uv --version\\)\")",
"Bash(python -m pyright --version)",
"Bash(npx --no-install pyright --version)",
"Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --outputjson etl/hubspot/hubspotClient.py etl/hubspot/hubspotDataTodB.py etl/hubspot/project_data.py etl/hubspot/scripts/scraper/main.py backend/app/db/models/hubspot_project_data.py backend/app/db/models/hubspot_deal_data.py etl/hubspot/tests/test_scraper_handler.py etl/hubspot/tests/test_hubspot_data_to_db.py etl/hubspot/tests/test_hubspot_client_integration.py)",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('errors',s['errorCount'],'warnings',s['warningCount']\\); [print\\(f\\\\\"{x['severity']}: {x['file'].split\\('/'\\)[-1]}:{x['range']['start']['line']+1} {x['rule'] if 'rule' in x else ''} -- {x['message'].splitlines\\(\\)[0]}\\\\\"\\) for x in d['generalDiagnostics'] if x['severity']=='error']\")",
"Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js etl/hubspot/project_data.py backend/app/db/models/hubspot_project_data.py)",
"Bash(python -c \"import sqlmodel, sqlalchemy, hubspot; print\\('deps importable'\\)\")",
"Bash(python -m pytest --version)",
"Bash(python -m pytest etl/hubspot/tests/ -m \"not integration\" -p no:cacheprovider -o addopts=\"\" -q)",
"Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --pythonpath /usr/local/bin/python etl/hubspot/project_data.py backend/app/db/models/hubspot_project_data.py)",
"Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --pythonpath /usr/local/bin/python etl/hubspot/company_data.py)",
"Bash(python /tmp/inspect_project.py)",
"Read(//home/vscode/github/**)",
"Bash(find / -maxdepth 6 -type d -name assessment-model)",
"Bash(terraform fmt *)",
"Bash(terraform init *)",
"Bash(terraform validate *)",
"Bash(python -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py -q)",
"Bash(python -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides -q --no-cov)",
"Bash(python -m py_compile tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py)",
"Bash(git add *)",
"Bash(git status *)",
"Bash(cp pyrightconfig.json /tmp/pyrightconfig.bak.json)",
"Bash(python3 -)",
"Bash(npx --yes pyright infrastructure/postgres/landlord_overrides_postgres_repository.py applications/landlord_description_overrides/handler.py tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py orchestration/classifiable_column.py)",
"Bash(git stash *)",
"Bash(npx --yes pyright applications/landlord_description_overrides/handler.py)",
"Bash(python3 -m pytest tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py -q)",
"Bash(python3 -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides/ -q -p no:cov)",
"Bash(python3 -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides/ -q)",
"Bash(cp pyrightconfig.json /tmp/pyrightconfig.bak2.json)",
"Bash(npx --yes pyright infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py)",
"Bash(GIT_EDITOR=true git rebase --continue)",
"Bash(git worktree *)",
"Bash(git branch *)",
"Bash(echo \"exit: $?\")",
"Bash(git reset *)",
"Bash(echo \"fetch-exit: $?\")",
"Bash(sed -n *)",
"Bash(set -e)",
"Bash(git rm *)",
"Bash(git ls-tree *)",
"Bash(command -v pyright)",
"Bash(git merge *)",
"Bash(git rev-list *)",
"Bash(git remote *)",
"Bash(git ls-remote *)",
"Bash(grep -v '\\\\.sample$')",
"Bash([ -f \".git/hooks/$h\" ])",
"Bash(python3 -m pytest tests/test_lambda_packaging.py -p no:cacheprovider --no-header -q)",
"Bash(python3 -m pytest tests/test_lambda_packaging.py -p no:cov --no-header -q)",
"Bash(python3 -m pytest tests/test_lambda_packaging.py --no-cov -p no:cacheprovider -q)",
"Bash(python3 -m pytest tests/test_lambda_packaging.py --no-cov -q)",
"Bash(python3 -m pyright tests/test_lambda_packaging.py)",
"Bash(python3 -m pytest tests/ --collect-only --no-cov -q)",
"Bash(python3 -c \"import yaml; [print\\(f, 'OK'\\) for f in ['.github/workflows/ddd_tests.yml','.github/workflows/unit_tests.yml'] if yaml.safe_load\\(open\\(f\\)\\)]\")",
"Bash(python3 -m pytest tests/ -q --no-cov)",
"Bash(python3 -c ' *)",
"Bash(python3 -m pyright --outputjson scripts/hyde/main.py)",
"Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('errors:',s['errorCount'],'warnings:',s['warningCount']\\); [print\\(f\\\\\" L{e['range']['start']['line']+1}: {e['message'].splitlines\\(\\)[0]}\\\\\"\\) for e in d['generalDiagnostics'] if e['severity']=='error']\")",
"Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('pyright errors:',s['errorCount'],'warnings:',s['warningCount']\\); [print\\(f\\\\\" L{e['range']['start']['line']+1}: {e['message'].splitlines\\(\\)[0]}\\\\\"\\) for e in d['generalDiagnostics'] if e['severity']=='error']\")",
"Bash(python3 main.py)",
"Bash(netstat -ltnp)",
"Bash(fuser 8000/tcp)",
"Bash(kill 351610 685390 351625)",
"Read(//proc/531213/net/**)",
"Bash(ps -p 351610 -o pid=)",
"Bash(python -)",
"Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_cert_omitting_sap_windows_maps_without_missing_required_field\")",
"Bash(python -m pytest datatypes/epc/domain/tests/test_from_rdsap_schema.py infrastructure/epc_client/tests/test_mapper_corpus.py)",
"Bash(git -C /workspaces/model stash push -u -- datatypes/epc/schema/rdsap_schema_20_0_0.py datatypes/epc/domain/mapper.py datatypes/epc/domain/tests/test_from_rdsap_schema.py)",
"Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestFromRdSapSchema21_0_1::test_total_floor_area\")",
"Bash(git -C /workspaces/model stash pop)",
"Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_band_more_than_typical_scales_glazing_by_1_25\")",
"Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis\")",
"Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_synthesised_glazing_type_routed_through_cascade\")",
"Bash(python -m pytest infrastructure/epc_client/tests/test_mapper_corpus.py -k \"wip_schema_20\" -q -p no:cov --no-header -rN)",
"Bash(python -m pytest infrastructure/epc_client/tests/test_mapper_corpus.py -k \"wip_schema_20\" -q --no-header)",
"Bash(timeout 150 python scripts/eon/find_epc_data.py)",
"Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634 709635 709636 709638 709639 709640 709641 709642 709644)",
"Read(//home/vscode/.claude/skills/grill-me/**)",
"Bash(awk '{print $9, $5}')",
"Bash(grep -E \"\\\\.py$\")",
"Bash(python3 scripts/eon/harvest_certs.py)",
"Bash(python scripts/eon/harvest_certs.py)",
"Bash(git commit -q -m ' *)",
"Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/domain/mapper.py)",
"Bash(git check-ignore *)",
"Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/schema/rdsap_schema_18_0.py datatypes/epc/domain/mapper.py)",
"Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/schema/rdsap_schema_17_1.py datatypes/epc/domain/mapper.py)",
"Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634 709635 709636 709638 709639 709640 709641 709642 709643 709644 709645 709637)",
"Read(//workspaces/**)",
"Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634)",
"Bash(python 2_export_data.py)",
"Bash(git commit -q -m 'Map RdSAP-Schema-17.0 certs to EpcPropertyData 🟥 *)",
"Bash(python -m py_compile tests/utilities/floats.py backend/app/db/functions/tests/test_portfolio_functions.py backend/documents_parser/tests/test_summary_pdf_mapper_chain.py backend/documents_parser/tests/test_heating_systems_corpus.py)",
"Bash(PYTHONPATH=/workspaces/model python -)",
"Bash(npx --no-install pyright datatypes/epc/schema/sap_schema_17_1.py datatypes/epc/domain/mapper.py datatypes/epc/domain/tests/test_from_sap_schema.py)",
"Bash(git commit -q -m 'Map full-SAP cert identity and scalar fields to EpcPropertyData 🟩 *)",
"Bash(npx --no-install pyright --outputjson datatypes/epc/domain/mapper.py)",
"Bash(git commit -q -m 'Carry full-SAP measured fabric U-value descriptions into the domain model 🟩 *)",
"Bash(python scripts/hyde/elmhurst_session.py --help)",
"Bash(python scripts/hyde/elmhurst_session.py status)",
"Bash(DISPLAY=:99 import -window root /tmp/hyde-viewer/elm_now2.png)",
"Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105&Referrer=https%3a%2f%2fmembers.elmhurstenergy.co.uk%2fRdsapZone%2fHome.aspx\")",
"Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormPropertyDescription.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\")",
"Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\")",
"Bash(DISPLAY=:99 timeout 150 python scripts/hyde/elmhurst_fill.py)",
"Bash(pkill -f \"rdsap10\\\\|for-testing\")",
"Bash(rm -f scripts/hyde/.elmhurst-session/Singleton*)",
"Bash(DISPLAY=:99 timeout 280 python -u scripts/hyde/elmhurst_fill.py --page property_description --commit)",
"Bash(npx --no-install pyright scripts/hyde/elmhurst_download.py)",
"Edit(/.claude/skills/expand-sap-accuracy-corpus/**)",
"Bash(PYTHONPATH=/workspaces/model python scripts/fetch_real_life_epc_sample.py 10093116528)",
"Bash(PYTHONPATH=/workspaces/model python -c ' *)",
"Bash(bash scripts/hyde/start_viewer.sh)",
"Bash(DISPLAY=:99 ELMHURST_URL=\"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\" python scripts/hyde/elmhurst_session.py login)",
"Bash(break)",
"Bash(DISPLAY=:99 timeout 90 python /tmp/check_session.py)",
"Bash(pkill -9 -f \"elm_login_hold.py\")",
"Bash(pkill -9 -f \"elmhurst-session\")",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 150 python /tmp/elm_build_dims.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_dump_full.py Walls WebFormWalls.aspx)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_walls_disc.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_roof_disc.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_dump_full.py Openings WebFormOpenings.aspx)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_win_probe.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 220 python /tmp/elm_build_openings.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_tab_probe.py)",
"Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_del_probe.py)",
"Bash(npx --no-install pyright datatypes/epc/domain/epc_property_data.py datatypes/epc/domain/mapper.py domain/sap10_calculator/rdsap/cert_to_inputs.py)",
"Bash(PYTHONPATH=/workspaces/model python -m pytest domain/sap10_calculator/tests/test_sap_accuracy_corpus.py -q --no-cov)",
"Bash(PYTHONPATH=/workspaces/model python -m pytest tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py -q --no-cov)",
"Edit(/.claude/skills/epc-to-elmhurst-rdsap-inputs/**)",
"Bash(python3 -c \"import sys; sys.path.insert\\(0,'scripts/hyde'\\); import elmhurst_lib; print\\('elmhurst_lib imports OK:', [f for f in dir\\(elmhurst_lib\\) if not f.startswith\\('_'\\)][:12]\\)\")",
"Bash(PYTHONPATH=/workspaces/model python -m pytest tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py tests/infrastructure/epc_client/test_sap_accuracy_corpus.py -q --no-cov)",
"Bash(PYTHONPATH=/workspaces/model python scripts/fetch_real_life_epc_sample.py 10093116543)",
"Bash(echo \"---EXIT $?---\")",
"Bash(sudo apt-get update -o Dir::Etc::sourceparts=-)",
"Bash(apt-cache policy *)",
"Bash(sudo apt-get update)",
"Bash(bash scripts/hyde/start_viewer.sh restart)",
"Bash(echo \"=== listen sockets \\(port 6080 / 5900\\) ===\"; ss -ltnp 2>/dev/null | grep -E ':6080|:5900' || netstat -ltnp 2>/dev/null | grep -E ':6080|:5900' || echo \"ss/netstat unavailable\" *)",
"Read(//proc/5476/**)",
"Bash(sudo python -m playwright install-deps chromium)",
"Bash(python -m playwright install chromium)",
"Bash(python -m playwright --version)",
"Bash(pip show *)"
],
"additionalDirectories": [
"/tmp",
"/workspaces/model/scripts/hyde/.elmhurst-session"
]
}
}

1
.gitignore vendored
View file

@ -304,6 +304,7 @@ backlog/*
# Local Claude config files
.claude/*modelling_cohort.csv
.claude/settings.local.json
.claude/settings.json
# Local EPC debug cache (scripts/eon)
scripts/eon/epc_cache.pkl

Binary file not shown.

View file

@ -42,7 +42,7 @@ Appendix L + U. RdSAP10 Table 32 (p.95) for fuel prices/CO2/PE factors.
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from dataclasses import dataclass, field, replace
from typing import Final, Optional, TYPE_CHECKING
from domain.sap10_calculator.climate.appendix_u import external_temperature_c
@ -863,6 +863,25 @@ class Sap10Calculator(SapCalculator):
"""
def calculate(self, epc: "EpcPropertyData") -> SapResult:
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
# SAP 10.2 Appendix U paragraph 1 (p.124): the SAP and EI ratings are
# computed on UK-average climate (so ratings are nationally
# comparable), but "other calculations (such as for energy use and
# costs on EPCs) are done using local weather" — the EPC-displayed
# CO2 emissions and primary energy use postcode-district weather from
# the PCDB. So we run two climate cascades and graft the demand
# cascade's CO2/PE onto the rating cascade's SAP result. (Worked
# example: simulated case 45 — rating SAP 60.53/CO2 692.13 on
# UK-average; demand CO2 626.78/PE 6581.59 on the W6 postcode.)
from domain.sap10_calculator.rdsap.cert_to_inputs import (
cert_to_demand_inputs,
cert_to_inputs,
)
return calculate_sap_from_inputs(cert_to_inputs(epc))
rating = calculate_sap_from_inputs(cert_to_inputs(epc))
demand = calculate_sap_from_inputs(cert_to_demand_inputs(epc))
return replace(
rating,
co2_kg_per_yr=demand.co2_kg_per_yr,
primary_energy_kwh_per_yr=demand.primary_energy_kwh_per_yr,
primary_energy_kwh_per_m2=demand.primary_energy_kwh_per_m2,
)

View file

@ -2772,7 +2772,11 @@ def _secondary_fuel_cost_gbp_per_kwh(
meter_type, fuel_is_electric=True
):
return _secondary_off_peak_rate_gbp_per_kwh(meter_type)
return prices.unit_price_p_per_kwh(sec_fuel) * _PENCE_TO_GBP
# Normalise colliding gov-API enum codes (e.g. 9 dual fuel, whose
# value collides with Table-32 9 = LPG SC11F) before the price lookup,
# exactly as the main-fuel boundary does — otherwise the same-value
# Table lookup mis-prices the secondary at the colliding fuel's rate.
return prices.unit_price_p_per_kwh(canonical_fuel_code(sec_fuel)) * _PENCE_TO_GBP
def _pv_array_generation_kwh_per_yr(
@ -3927,6 +3931,10 @@ def _secondary_fuel_code(epc: EpcPropertyData) -> int:
code = _int_or_none(epc.sap_heating.secondary_fuel_type)
if code is None:
return _STANDARD_ELECTRICITY_FUEL_CODE
# Normalise colliding gov-API enum codes (e.g. 9 dual fuel, whose value
# collides with the LPG Table code) so the CO2/PE factor lookups resolve
# to the lodged fuel — mirrors the main-fuel boundary + the cost side.
code = canonical_fuel_code(code) or code
if code in CO2_KG_PER_KWH:
return code
return _table_12_factor_fuel_code(code)
@ -7218,7 +7226,21 @@ def cert_to_inputs(
epc=epc,
)
if apm_efficiencies is not None:
eff, water_eff = apm_efficiencies
# η_space (N3.6) always replaces the Table 4a default — the heat
# pump is the space main. η_water (N3.7a) applies ONLY when the DHW
# is actually heated by that main (WHC "from main": 901/902/914). A
# separate electric immersion (WHC 903) or other independent DHW
# source keeps its own water efficiency (immersion = 100%), not the
# HP's water SCOP — otherwise a HP-space + immersion-DHW dwelling
# under-counts its hot-water fuel (case 45: water 2130 -> 1894 kWh,
# +1.5 SAP, because 187.5% × 0.6 in-use = 112.5% was applied where
# the worksheet (216) uses 100%).
eff, apm_water_eff = apm_efficiencies
if (
epc.sap_heating.water_heating_code
in _WATER_INHERIT_FROM_MAIN_CODES
):
water_eff = apm_water_eff
if (
_is_heat_network_main(main)
and epc.sap_heating.water_heating_code in _WATER_INHERIT_FROM_MAIN_CODES

View file

@ -121,11 +121,17 @@ API_FUEL_TO_TABLE_32: Final[dict[int, int]] = {
# 33 = coal — Table-32 code 33 is the electricity 10-hour low rate
# 7.5 p vs house coal 3.67 p (and `is_electric_fuel_code(33)`
# wrongly classified the coal main as electric).
# DEFERRED (not included): API 9 = dual fuel (mineral + wood) is also a
# collision (Table-32 9 = LPG SC11F 3.48 p vs dual fuel 3.99 p) but the
# 0.45 p delta nets neutral-to-negative on the (outlier-dominated)
# dual-fuel certs and shifts them in a direction not yet understood —
# investigate separately.
# 9 = dual fuel (mineral + wood) — Table-32 code 9 is LPG SC11F
# 3.48 p vs dual fuel 3.99 p. The gov-API lodges API enum 9 for a
# dual-fuel appliance (description "Room heaters, dual fuel
# (mineral and wood)"), but the same-value Table-32 lookup returns
# LPG 3.48 p, under-pricing the (mostly secondary) dual-fuel heat.
# A prior session deferred this as "direction not understood"
# while the EPC PE/CO2 lens was confounded by the climate-cascade
# bug (fixed in fc7c4d2d); on the corrected lens the dual-fuel
# secondary cohort over-rates (SAP too high = cost too low) by
# +0.55 signed, and pricing UP to the dual-fuel 3.99 p row reduces
# that over-rate — the correct direction.
#
# COMMUNITY FUELS (handled elsewhere, NOT here): API 30 (waste
# combustion), 31 (biomass) and 32 (biogas) — all "(community)" in the
@ -140,7 +146,7 @@ API_FUEL_TO_TABLE_32: Final[dict[int, int]] = {
# cert_to_inputs), where the community meaning is unambiguous. Community
# fuels 20/25 do not collide with an electricity code, so they resolve
# correctly through the heat-network path without any special handling.
_GOV_API_COLLISION_FUELS: Final[frozenset[int]] = frozenset({5, 33})
_GOV_API_COLLISION_FUELS: Final[frozenset[int]] = frozenset({5, 9, 33})
def canonical_fuel_code(fuel_code: Optional[int]) -> Optional[int]:

View file

@ -1166,6 +1166,16 @@ def heat_transmission_from_cert(
# lodgement is authoritative. Mirrors the roof's "another dwelling
# above" override above. Cert 2115-4121-4711-9361-3686.
part_floor_is_party = "another dwelling below" in (part.floor_type or "").lower()
# A part whose floor_type is a GROUND floor sits in contact with the
# ground (RdSAP 10 §3.12) and is therefore a heat-loss floor, even when
# the dwelling-level flat heuristic (`_dwelling_exposure`) defaults a
# flat to has_exposed_floor=False. The Elmhurst Summary path lodges a
# ground-floor flat's position as a "Ground floor" floor_type (not the
# API floor_heat_loss=1 exposed code), so without this signal the
# cascade dropped its ground floor entirely — simulated case 45 (a
# ground-floor flat the mapper labelled "Top-floor flat"): worksheet
# (28a) = 47.0 × 0.54 = 25.38 W/K billed as 0, over-rating by +7 SAP.
part_floor_is_ground = "ground floor" in (part.floor_type or "").lower()
# A floor lodged as a heat-loss floor — *exposed* (API
# floor_heat_loss=1 → `is_exposed_floor`, "an exposed floor if there
# is an open space below") or *above a partially heated space* (API
@ -1178,6 +1188,7 @@ def heat_transmission_from_cert(
# the "another dwelling below" party signal overrides it downward.
part_has_exposed_floor = (
exposure.has_exposed_floor or is_exposed_floor or is_above_partial
or part_floor_is_ground
) and not part_floor_is_party
floor_area_total = _round_half_up(
geom["ground_floor_area_m2"] if part_has_exposed_floor else 0.0,

108
scripts/dive_cert.py Normal file
View file

@ -0,0 +1,108 @@
"""Deep-dive a single corpus cert: lodged vs computed SAP/CO2/PE + the full
intermediate line-ref dump + the mapped fabric/heat-loss inputs, so the
diverging line is visible WITHOUT an Elmhurst worksheet.
USAGE
PYTHONPATH=/workspaces/model python scripts/dive_cert.py <cert_number_or_substr>
PYTHONPATH=/workspaces/model python scripts/dive_cert.py --filter wall_insulation_type=3 [--n 8]
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
SAP_10_2_SPEC_PRICES,
cert_to_demand_inputs,
cert_to_inputs,
)
from scripts.profile_api_error import features
_CORPUS = Path("backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl")
def _cert_id(doc: dict[str, Any]) -> str:
return str(
doc.get("certificate_number")
or doc.get("lmk_key")
or doc.get("uprn")
or "?"
)
def _dump(doc: dict[str, Any]) -> None:
cert = _cert_id(doc)
lodged_sap = doc.get("energy_rating_current")
lodged_co2 = doc.get("co2_emissions_current")
lodged_pe = doc.get("energy_consumption_current")
epc = EpcPropertyDataMapper.from_api_response(doc)
r = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES))
# SAP/EI rating is the UK-average rating cascade (`r`); EPC CO2/PE use the
# postcode demand cascade (SAP 10.2 Appendix U p.124). Display CO2/PE from
# the demand cascade so they compare like-for-like with the lodged EPC.
d = calculate_sap_from_inputs(cert_to_demand_inputs(epc, prices=SAP_10_2_SPEC_PRICES))
print("=" * 90)
print(f"CERT {cert}")
print(
f" SAP lodged={lodged_sap} ours={r.sap_score_continuous:.2f} "
f"d={r.sap_score_continuous - (lodged_sap or 0):+.2f}"
)
if lodged_co2 is not None:
print(
f" CO2 lodged={lodged_co2:.3f} ours={d.co2_kg_per_yr / 1000:.3f} t "
f"d={d.co2_kg_per_yr / 1000 - lodged_co2:+.3f} (demand cascade)"
)
if lodged_pe is not None:
print(
f" PE lodged={lodged_pe:.1f} ours={d.primary_energy_kwh_per_m2:.1f} "
f"d={d.primary_energy_kwh_per_m2 - lodged_pe:+.1f} kWh/m2 (demand cascade)"
)
print(
f" energy kWh/yr: spaceheat={r.space_heating_kwh_per_yr:.0f} "
f"main={r.main_heating_fuel_kwh_per_yr:.0f} "
f"sec={r.secondary_heating_fuel_kwh_per_yr:.0f} "
f"hw={r.hot_water_kwh_per_yr:.0f} light={r.lighting_kwh_per_yr:.0f} "
f"pumpfan={r.pumps_fans_kwh_per_yr:.0f}"
)
d = epc.__dict__
print(" --- key mapped inputs ---")
f = features(doc)
for k in (
"property_type", "built_form", "age_band", "main_sap_code",
"main_heat_cat", "main_fuel", "has_pcdb_main", "main_data_source",
"wall_construction", "wall_insulation_type", "roof_codes",
"roof_insulation_thickness", "whc", "water_fuel", "immersion_type",
"has_cylinder", "has_secondary", "has_pv", "mains_gas", "n_building_parts",
):
print(f" {k:26s}= {f.get(k)}")
print(" --- intermediate line refs ---")
inter = r.intermediate or {}
for k in sorted(inter):
print(f" {k:34s}= {inter[k]:.4f}")
def main() -> None:
docs = [json.loads(l) for l in _CORPUS.read_text().splitlines() if l.strip()]
if "--filter" in sys.argv:
spec = sys.argv[sys.argv.index("--filter") + 1]
key, _, val = spec.partition("=")
n = int(sys.argv[sys.argv.index("--n") + 1]) if "--n" in sys.argv else 6
hits = [d for d in docs if str(features(d).get(key)) == val]
print(f"{len(hits)} certs match {spec}; dumping first {n}")
for d in hits[:n]:
_dump(d)
return
target = sys.argv[1]
for d in docs:
if target in _cert_id(d):
_dump(d)
return
print(f"no cert matching {target}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,226 @@
"""Profile API-path SAP/CO2/PE error over the COMMITTED corpus (no /tmp cache).
WHAT THIS IS FOR
----------------
The accuracy thesis: the gov-API response carries the full SAP input set and our
calculator is deterministic, so EVERY cert should reproduce the lodged
SAP/CO2/PE. Any divergence is an input-handling bug, not irreducible noise.
This is the per-cert microscope for that loop. It runs the in-repo corpus
(``backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl``) through the
real ``from_api_response`` -> ``cert_to_inputs`` -> ``calculate_sap_from_inputs``
path, then:
1. buckets the signed SAP error by raw-API feature (reusing
``profile_api_error.features``) ranked by wasted accuracy, so a
dropped/mis-mapped field surfaces as a biased bucket;
2. for the worst over- and under-raters, prints the PE/CO2-vs-cost split so
each can be triaged WITHOUT a worksheet:
- PE & CO2 both ~match lodged but SAP off -> COST-side bug
(tariff / PV export / standing charge / secondary fuel);
- PE/CO2 also off -> DEMAND-side bug
(fabric / ventilation / gains / heating demand).
USAGE
-----
PYTHONPATH=/workspaces/model python scripts/profile_corpus_error.py
PYTHONPATH=/workspaces/model python scripts/profile_corpus_error.py --min-n 15 --worst 40
"""
from __future__ import annotations
import json
import statistics as stats
import sys
from collections import defaultdict
from pathlib import Path
from typing import Any, Optional
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
cert_to_demand_inputs,
SAP_10_2_SPEC_PRICES,
cert_to_inputs,
)
from scripts.profile_api_error import features
_CORPUS = Path("backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl")
class Row:
__slots__ = (
"cert", "sap_err", "co2_err_t", "pe_err", "lodged_sap",
"our_sap", "lodged_pe", "our_pe", "feats",
)
def __init__(
self,
cert: str,
sap_err: float,
co2_err_t: Optional[float],
pe_err: Optional[float],
lodged_sap: float,
our_sap: float,
lodged_pe: Optional[float],
our_pe: float,
feats: dict[str, Any],
) -> None:
self.cert = cert
self.sap_err = sap_err
self.co2_err_t = co2_err_t
self.pe_err = pe_err
self.lodged_sap = lodged_sap
self.our_sap = our_sap
self.lodged_pe = lodged_pe
self.our_pe = our_pe
self.feats = feats
def _load() -> list[dict[str, Any]]:
return [
json.loads(line)
for line in _CORPUS.read_text().splitlines()
if line.strip()
]
def _compute(corpus: list[dict[str, Any]]) -> tuple[list[Row], int, int]:
rows: list[Row] = []
skipped = 0
raised = 0
for doc in corpus:
lodged_sap = doc.get("energy_rating_current")
if lodged_sap is None:
skipped += 1
continue
try:
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# SAP/EI rating is the UK-average rating cascade (`result`);
# the EPC-displayed CO2/PE use the postcode demand cascade
# (SAP 10.2 Appendix U p.124). Use the demand cascade for the
# PE/CO2-vs-cost triage so it is not confounded by the climate
# difference (UK-average vs local weather).
demand = calculate_sap_from_inputs(
cert_to_demand_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
except Exception:
raised += 1
continue
cert = str(
doc.get("certificate_number")
or doc.get("lmk_key")
or doc.get("uprn")
or len(rows)
)
lodged_co2_t = doc.get("co2_emissions_current")
lodged_pe = doc.get("energy_consumption_current")
rows.append(Row(
cert=cert,
sap_err=result.sap_score_continuous - lodged_sap,
co2_err_t=(demand.co2_kg_per_yr / 1000.0 - lodged_co2_t)
if lodged_co2_t is not None else None,
pe_err=(demand.primary_energy_kwh_per_m2 - lodged_pe)
if lodged_pe is not None else None,
lodged_sap=lodged_sap,
our_sap=result.sap_score_continuous,
lodged_pe=lodged_pe,
our_pe=demand.primary_energy_kwh_per_m2,
feats=features(doc),
))
return rows, skipped, raised
def _triage(r: Row) -> str:
"""Cost vs demand label from the PE/CO2 split (~tolerant)."""
if r.pe_err is None or r.co2_err_t is None:
return "?"
pe_ok = abs(r.pe_err) < 5.0 # kWh/m2/yr
co2_ok = abs(r.co2_err_t) < 0.10 # t/yr
if pe_ok and co2_ok:
return "COST" # demand reproduces, cost-side off
return "DEMAND"
def main() -> None:
min_n = 12
n_worst = 30
if "--min-n" in sys.argv:
min_n = int(sys.argv[sys.argv.index("--min-n") + 1])
if "--worst" in sys.argv:
n_worst = int(sys.argv[sys.argv.index("--worst") + 1])
rows, skipped, raised = _compute(_load())
n = len(rows)
within = sum(1 for r in rows if abs(r.sap_err) < 0.5) / n * 100
print(
f"profiled {n} certs ({skipped} no-lodged-SAP, {raised} raised) | "
f"within-0.5 = {within:.1f}% | "
f"signed {stats.mean(r.sap_err for r in rows):+.3f} | "
f"MAE {stats.mean(abs(r.sap_err) for r in rows):.3f}"
)
out = [r for r in rows if abs(r.sap_err) >= 0.5]
cost_n = sum(1 for r in out if _triage(r) == "COST")
dem_n = sum(1 for r in out if _triage(r) == "DEMAND")
print(
f"of {len(out)} outside-0.5: {dem_n} DEMAND-side (PE/CO2 also off), "
f"{cost_n} COST-side (PE/CO2 match), {len(out) - cost_n - dem_n} unknown"
)
print("=" * 104)
feat_names = list(rows[0].feats.keys())
bucket_lines: list[tuple[float, str]] = []
for fn in feat_names:
groups: dict[str, list[float]] = defaultdict(list)
for r in rows:
groups[str(r.feats.get(fn))].append(r.sap_err)
for val, es in groups.items():
cnt = len(es)
if cnt < min_n:
continue
w05 = sum(1 for e in es if abs(e) < 0.5)
mabs = stats.mean(abs(e) for e in es)
waste = (cnt - w05) * mabs
bucket_lines.append((waste, (
f" {fn:22s}={val:<20.20s} n={cnt:4d} "
f"within0.5={w05 / cnt * 100:4.0f}% "
f"signed={stats.mean(es):+6.2f} mean|err|={mabs:5.2f} "
f"[waste={waste:6.0f}]"
)))
print(f"TOP ERROR-CARRYING BUCKETS (n_out x mean|err|; min-n={min_n}):")
for _, line in sorted(bucket_lines, key=lambda x: -x[0])[:40]:
print(line)
print("=" * 104)
print(f"WORST {n_worst} OVER-RATERS (our SAP too high -> we under-count loss/cost):")
_dump_worst(sorted(rows, key=lambda r: -r.sap_err)[:n_worst])
print("-" * 104)
print(f"WORST {n_worst} UNDER-RATERS (our SAP too low -> we over-count loss/cost):")
_dump_worst(sorted(rows, key=lambda r: r.sap_err)[:n_worst])
def _dump_worst(rows: list[Row]) -> None:
print(
f" {'cert':>16s} {'lodgSAP':>7s} {'ourSAP':>7s} {'dSAP':>6s} "
f"{'dPE':>6s} {'dCO2t':>6s} {'split':>6s} "
f"heat/prop/wall/roof/fuel"
)
for r in rows:
f = r.feats
sig = (
f"{f.get('main_sap_code')}/{f.get('property_type')}/"
f"{f.get('wall_construction')}/{f.get('roof_codes')}/"
f"{f.get('main_fuel')} pcdb={f.get('has_pcdb_main')} "
f"2nd={f.get('has_secondary')} pv={f.get('has_pv')}"
)
pe = f"{r.pe_err:+6.1f}" if r.pe_err is not None else " ?"
co2 = f"{r.co2_err_t:+6.2f}" if r.co2_err_t is not None else " ?"
print(
f" {r.cert:>16.16s} {r.lodged_sap:7.1f} {r.our_sap:7.2f} "
f"{r.sap_err:+6.2f} {pe} {co2} {_triage(r):>6s} {sig}"
)
if __name__ == "__main__":
main()

View file

@ -686,12 +686,15 @@ def test_ashp_overlay_scores_the_vaillant_end_state_from_a_gas_boiler() -> None:
# dwelling's baseline fabric and so the ASHP end-state SAP. Still a snapshot
# of the Vaillant overlay's own output, validated transitively by the
# system-boiler pin below (which reproduces a real Vaillant cert at delta 0).
# CO2/PE are the postcode DEMAND cascade now that `Sap10Calculator.
# calculate` computes EPC emissions/PE on local weather (SAP 10.2
# Appendix U p.124); SAP is unchanged (UK-average rating cascade).
_assert_overlay_scores(
before,
option.overlay,
sap=51.99820176096402,
co2=1268.4645083243888,
pe=13080.20756425629,
co2=1065.7593506066496,
pe=10995.781557709413,
)
@ -715,12 +718,14 @@ def test_ashp_overlay_scores_the_vaillant_end_state_from_a_gas_boiler_instant_hw
# boiler-1 pin above); the same merge also resolved this cert's main-fuel
# mapper gap (§14.2 mains-gas derivation), so its raw before now baselines —
# see `test_gas_boiler_instant_hw_before_baselines`.
# CO2/PE are the postcode DEMAND cascade now (see the boiler-1 pin above);
# SAP is unchanged (UK-average rating cascade).
_assert_overlay_scores(
before,
option.overlay,
sap=39.00740809309464,
co2=2248.6089062232704,
pe=23094.10189037302,
co2=1845.8588018295509,
pe=18944.42568846759,
)

View file

@ -38,6 +38,7 @@ from datatypes.epc.domain.epc_property_data import (
from domain.sap10_ml.tests._fixtures import (
make_building_part,
make_floor_dimension,
make_main_heating_detail,
make_minimal_sap10_epc,
make_sap_heating,
make_window,
@ -79,6 +80,7 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import (
_pv_overshading_factor, # pyright: ignore[reportPrivateUsage]
_pv_pitch_deg, # pyright: ignore[reportPrivateUsage]
_responsiveness, # pyright: ignore[reportPrivateUsage]
_secondary_fuel_code, # pyright: ignore[reportPrivateUsage]
_secondary_fuel_cost_gbp_per_kwh, # pyright: ignore[reportPrivateUsage]
_secondary_heating_fraction_for_category, # pyright: ignore[reportPrivateUsage]
_section_12_4_4_summer_immersion_applies, # pyright: ignore[reportPrivateUsage]
@ -2161,6 +2163,47 @@ def test_is_electric_main_dual_fuel_table_32_code_10_is_not_electric() -> None:
assert _is_electric_main(community_electric_main) is False
def test_dual_fuel_secondary_api_enum_9_prices_as_dual_fuel_not_lpg() -> None:
# Arrange — the gov-API lodges secondary fuel enum 9 = "dual fuel (mineral
# and wood)", but enum value 9 COLLIDES with the same-valued RdSAP 10
# Table 32 / SAP 10.2 Table 12 code 9 = "LPG (bulk, SC11F)". The secondary
# cost + CO2/PE paths previously took the same-value lookup (LPG 3.48
# p/kWh, CO2 0.241 kg/kWh) instead of translating the enum to the dual-
# fuel row (3.99 p/kWh, CO2 0.087) — under-costing the secondary (SAP
# over-rate) AND over-counting its CO2 (LPG is fossil; dual fuel is part
# wood). Enum 9 is now in `_GOV_API_COLLISION_FUELS`, and both secondary
# paths canonicalise (mirroring the main-fuel boundary). SAP 10.2 Table
# 12 (p.189) / RdSAP 10 Table 32 (p.95).
gas_boiler_main = MainHeatingDetail(
has_fghrs=False, main_fuel_type=26, heat_emitter_type=1,
emitter_temperature=1, main_heating_control=2106,
main_heating_category=2, sap_main_heating_code=102,
)
dual_fuel_secondary_epc = make_minimal_sap10_epc(
total_floor_area_m2=_TYPICAL_TFA_M2,
habitable_rooms_count=4,
country_code="ENG",
sap_heating=make_sap_heating(
main_heating_details=[gas_boiler_main],
secondary_fuel_type=9, # gov-API enum: dual fuel (mineral + wood)
secondary_heating_type=631,
),
)
# Act — the rating-cascade secondary price + the CO2/PE fuel code.
secondary_rate_gbp_per_kwh = _secondary_fuel_cost_gbp_per_kwh(
dual_fuel_secondary_epc.sap_heating,
gas_boiler_main,
2, # standard (single-rate) meter
SAP_10_2_SPEC_PRICES,
)
secondary_factor_code = _secondary_fuel_code(dual_fuel_secondary_epc)
# Assert — dual fuel 3.99 p/kWh (NOT LPG 3.48) + Table code 10 (NOT 9).
assert abs(secondary_rate_gbp_per_kwh - 0.0399) <= 1e-6
assert secondary_factor_code == 10
def test_is_electric_water_dual_fuel_table_32_code_10_is_not_electric() -> None:
# Arrange — same API/Table 32 collision as `_is_electric_main` per
# S0380.136 docstring.
@ -7581,3 +7624,55 @@ def test_index_less_mev_applies_table_4g_note_3_default_data_iuf() -> None:
# 2.5x the raw-0.8 value, not the raw default.
assert fan_kwh > 0.0
assert abs(fan_kwh - expected) <= 1e-9
def test_heat_pump_water_scop_not_applied_to_separate_immersion_dhw() -> None:
# Arrange — SAP 10.2 Appendix N3.7(a): a heat pump's PCDB water
# efficiency (η_water) applies to the DHW ONLY when the cylinder is
# heated BY the heat pump. A separate electric immersion (WHC 903) heats
# the water at 100% regardless of the space-heating system, so the HP's
# water SCOP must NOT leak onto it. Invariant: a WHC-903 immersion's
# hot-water fuel is INDEPENDENT of the main — a heat-pump main and a gas-
# boiler main yield the SAME immersion fuel (both 100%, no primary loss).
# Before the fix the APM override set η_water = 187.5% × 0.6 in-use =
# 112.5% on the HP cert, under-counting its immersion fuel. Worksheet-
# validated on simulated case 45: water (62) = 2130.26 kWh at η_water=100%,
# not 2130.26 / 1.125 = 1893.57.
hp_main = make_main_heating_detail(
main_fuel_type=29, # electricity
heat_emitter_type=1,
main_heating_category=4, # heat pump
main_heating_index_number=100053, # PCDB Table 362 ASHP (ECODAN 5 kW)
main_heating_data_source=1,
)
boiler_main = make_main_heating_detail(
main_fuel_type=26, # mains gas
heat_emitter_type=1,
main_heating_category=2, # gas boiler
sap_main_heating_code=102,
)
def _immersion_epc(main: MainHeatingDetail) -> EpcPropertyData:
return make_minimal_sap10_epc(
total_floor_area_m2=_TYPICAL_TFA_M2,
habitable_rooms_count=4,
country_code="ENG",
has_hot_water_cylinder=True,
sap_heating=make_sap_heating(
main_heating_details=[main],
water_heating_code=903, # separate electric immersion
water_heating_fuel=30, # standard electricity
cylinder_size=2,
cylinder_insulation_type=1,
cylinder_insulation_thickness_mm=25,
),
)
# Act
hp_fuel = cert_to_inputs(_immersion_epc(hp_main)).hot_water_kwh_per_yr
boiler_fuel = cert_to_inputs(_immersion_epc(boiler_main)).hot_water_kwh_per_yr
# Assert — the immersion DHW fuel is identical whether the space main is a
# heat pump or a gas boiler (the HP water SCOP does not apply to it).
assert hp_fuel > 0.0
assert abs(hp_fuel - boiler_fuel) <= 1e-6

View file

@ -0,0 +1,107 @@
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
"simulated case 45" worksheet a ~47 GROUND-FLOOR FLAT heated by an
air-source HEAT PUMP (PCDB 100053 ECODAN, radiators, MCS=No) with a
WHC-903 electric-immersion DHW and a 110 L cylinder, postcode W6 9BF
(SAP Region "Thames Valley").
Case 45 is the 1e-4 oracle for the SAP 10.2 Appendix U (PDF p.124) TWO-
CLIMATE-CASCADE split. The P960 prints the current dwelling TWICE:
* Block 1 "11a. SAP rating / 12a. CO2" computed on UK-AVERAGE
weather (Appendix U Tables U1-U3 region 0). Drives the SAP/EI rating.
Space-heat demand (98c) = 7333.79; SAP value (258) = 60.5318 (-> 61);
total CO2 (272) = 692.13.
* Block 2 "CALCULATION OF EPC COSTS, EMISSIONS AND PRIMARY ENERGY"
computed on POSTCODE-DISTRICT weather (PCDB Table 172, W6). Drives the
EPC-displayed figures. Space-heat demand (98c) = 5921.05; total CO2
(272) = 626.78; total primary energy (286) = 6581.59.
Per Appendix U paragraph 1: "Other calculations (such as for energy use
and costs on EPCs) are done using local weather." `Sap10Calculator.
calculate` therefore runs both cascades and grafts the demand cascade's
CO2/PE onto the rating cascade's SAP — this fixture pins BOTH.
Like the other `_elmhurst_worksheet_001431_case*` fixtures it does NOT
hand-build the EpcPropertyData: it routes the Summary PDF through
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises
the WHOLE extractor + mapper + calculator pipeline.
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
simulated case 45/`. The Summary is mirrored into the tracked
`backend/documents_parser/tests/fixtures/Summary_001431_case45.pdf` so the
test runs without depending on the unstaged workspace.
Per [[feedback-zero-error-strict]]: pins are abs <= 1e-4 against the PDF.
"""
from __future__ import annotations
import re
import subprocess
from pathlib import Path
from typing import Final
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
# [4]=repo root.
_SUMMARY_PDF: Final[Path] = (
Path(__file__).resolve().parents[4]
/ "backend" / "documents_parser" / "tests" / "fixtures"
/ "Summary_001431_case45.pdf"
)
# Block 1 — UK-average RATING cascade (`cert_to_inputs`).
RATING_SPACE_HEATING_KWH: Final[float] = 7333.7892 # (98c)
RATING_SAP_CONTINUOUS: Final[float] = 60.5318 # (258) un-rounded
RATING_SAP_INTEGER: Final[int] = 61 # (258)
RATING_CO2_KG_PER_YR: Final[float] = 692.1287 # (272)
# Block 2 — POSTCODE-district DEMAND cascade (`cert_to_demand_inputs`).
DEMAND_SPACE_HEATING_KWH: Final[float] = 5921.0486 # (98c)
DEMAND_CO2_KG_PER_YR: Final[float] = 626.7797 # (272)
DEMAND_PRIMARY_ENERGY_KWH: Final[float] = 6581.5936 # (286)
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
"""Convert a Summary PDF into the per-page text format the
ElmhurstSiteNotesExtractor expects (label/value token sequences).
Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures.
"""
info = subprocess.run(
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
).stdout
m = re.search(r"Pages:\s+(\d+)", info)
if m is None:
raise RuntimeError(f"Could not parse page count from {pdf_path}")
page_count = int(m.group(1))
pages: list[str] = []
for i in range(1, page_count + 1):
layout = subprocess.run(
[
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
str(pdf_path), "-",
],
capture_output=True, text=True, check=True,
).stdout
tokens: list[str] = []
for line in layout.splitlines():
if not line.strip():
tokens.append("")
continue
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
tokens.extend(parts)
pages.append("\n".join(tokens))
return pages
def build_epc() -> EpcPropertyData:
"""Route the simulated case-45 Summary through extractor + mapper.
No hand-built EpcPropertyData the extractor and mapper are part of
the test target. This module is a pin PROVIDER (build_epc + constants);
the collected assertions live in `test_section_cascade_pins`."""
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)

View file

@ -24,7 +24,10 @@ from typing import Final
import pytest
from domain.sap10_calculator.calculator import Sap10Calculator
from domain.sap10_calculator.calculator import (
Sap10Calculator,
calculate_sap_from_inputs,
)
from domain.sap10_calculator.rdsap.cert_to_inputs import (
cert_to_inputs,
water_heating_section_from_cert,
@ -338,8 +341,13 @@ def test_sap_result_pin(fixture_name: str, field_name: str) -> None:
epc = _FIXTURE_MODULES[fixture_name].build_epc()
expected = getattr(pin, field_name)
# Act
result = Sap10Calculator().calculate(epc)
# Act — these pins are the worksheet's Block-1 (energy-rating) line refs,
# i.e. the UK-average RATING cascade. `Sap10Calculator.calculate` now
# grafts the postcode DEMAND cascade's CO2/PE onto the result (SAP 10.2
# Appendix U p.124), so the rating-cascade fields are pinned via
# `cert_to_inputs` directly; the demand cascade is pinned separately
# (corpus gauge + simulated case 45 Block-2 pins).
result = calculate_sap_from_inputs(cert_to_inputs(epc))
actual = getattr(result, field_name)
# Assert

View file

@ -157,6 +157,70 @@ def test_mixed_flat_pitched_roof_does_not_contaminate_pitched_u_value() -> None:
assert abs(result.roof_w_per_k - 44.6) <= 2.0
def test_ground_floor_flat_bills_floor_despite_flat_dwelling_type() -> None:
# Arrange — a ground-floor flat whose dwelling_type the mapper labelled
# "Top-floor flat" (so the dwelling-level exposure heuristic
# `_dwelling_exposure` suppresses the floor on the assumption a heated
# dwelling sits below), but whose building part lodges a "Ground floor"
# floor_type. A ground floor is in contact with the ground (RdSAP 10
# §3.12) -> heat-loss floor. The Elmhurst Summary path lodges this as a
# "Ground floor" floor_type (not the API floor_heat_loss=1 exposed code),
# so without the per-part ground signal the cascade dropped the floor.
# Worksheet-validated by simulated case 45: (28a) = 47.0 × U=0.54 = 25.38
# W/K, billed as 0 before this fix (+7 SAP).
ground = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="C",
floor_type="Ground floor",
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=47.0, room_height_m=2.4,
heat_loss_perimeter_m=15.8, party_wall_length_m=0.0, floor=0,
),
],
)
epc = make_minimal_sap10_epc(
total_floor_area_m2=47.0, country_code="ENG",
dwelling_type="Top-floor flat", property_type="Flat",
sap_building_parts=[ground],
)
# Act
result = heat_transmission_from_cert(epc)
# Assert — the ground floor carries heat loss (≈ 47 × 0.54), not 0.
assert result.floor_w_per_k > 20.0
def test_top_floor_flat_with_party_floor_stays_suppressed() -> None:
# Arrange — the contrast: a flat lodging "(another dwelling below)" sits
# over a heated dwelling, so its floor is a party floor with no heat loss
# (RdSAP 10 §3). The ground-floor override must NOT fire — proving the
# discriminator is the floor_type, not the flat label.
party = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="C",
floor_type="To another dwelling below",
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=47.0, room_height_m=2.4,
heat_loss_perimeter_m=15.8, party_wall_length_m=0.0, floor=0,
),
],
)
epc = make_minimal_sap10_epc(
total_floor_area_m2=47.0, country_code="ENG",
dwelling_type="Top-floor flat", property_type="Flat",
sap_building_parts=[party],
)
# Act
result = heat_transmission_from_cert(epc)
# Assert — party floor, no heat loss.
assert result.floor_w_per_k == 0.0
def test_part_geometry_floorless_part_honours_full_key_contract() -> None:
# Arrange — a building part lodged with NO sap_floor_dimensions (e.g.
# a party-wall-only or RR-only extension; observed on 5 certs in a

View file

@ -46,6 +46,7 @@ from tests.domain.sap10_calculator.worksheet import (
_elmhurst_worksheet_001431_case21 as _w001431_case21,
_elmhurst_worksheet_001431_case43 as _w001431_case43,
_elmhurst_worksheet_001431_case44 as _w001431_case44,
_elmhurst_worksheet_001431_case45 as _w001431_case45,
)
@ -491,6 +492,67 @@ def test_case44_blower_door_pressure_test_matches_pdf() -> None:
_pin(vent.effective_monthly_ach[0], 0.5812, "§2 (25) Jan case44")
def test_case45_heat_pump_two_climate_cascade_matches_pdf() -> None:
"""Simulated case 45 (heat-pump ground-floor flat, postcode W6) is the
1e-4 oracle for the SAP 10.2 Appendix U (p.124) two-climate-cascade
split. The P960 prints the current dwelling twice:
* Block 1 ("11a SAP rating / 12a CO2") on UK-AVERAGE weather (region
0): space heat (98c) 7333.79, SAP (258) 60.5318, CO2 (272) 692.13.
* Block 2 ("EPC COSTS, EMISSIONS AND PRIMARY ENERGY") on POSTCODE
weather (PCDB Table 172, W6): space heat (98c) 5921.05, CO2 (272)
626.78, primary energy (286) 6581.59.
The SAP/EI rating reads the rating cascade; the EPC-displayed CO2/PE
read the demand cascade. Pins both ends at abs=1e-4."""
# Arrange
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_demand_inputs
epc = _w001431_case45.build_epc()
# The split only exists because the postcode resolves to local weather.
assert local_climate_for_cert(epc) is not None
# Act — both climate cascades from the one cert.
rating = calculate_sap_from_inputs(cert_to_inputs(epc))
demand = calculate_sap_from_inputs(cert_to_demand_inputs(epc))
# Assert — Block 1 (UK-average rating cascade).
_pin(
rating.space_heating_kwh_per_yr,
_w001431_case45.RATING_SPACE_HEATING_KWH,
"(98c) rating case45",
)
_pin(
rating.sap_score_continuous,
_w001431_case45.RATING_SAP_CONTINUOUS,
"(258) rating case45",
)
assert rating.sap_score == _w001431_case45.RATING_SAP_INTEGER
_pin(
rating.co2_kg_per_yr,
_w001431_case45.RATING_CO2_KG_PER_YR,
"(272) rating case45",
)
# Assert — Block 2 (postcode demand cascade).
_pin(
demand.space_heating_kwh_per_yr,
_w001431_case45.DEMAND_SPACE_HEATING_KWH,
"(98c) demand case45",
)
_pin(
demand.co2_kg_per_yr,
_w001431_case45.DEMAND_CO2_KG_PER_YR,
"(272) demand case45",
)
_pin(
demand.primary_energy_kwh_per_yr,
_w001431_case45.DEMAND_PRIMARY_ENERGY_KWH,
"(286) demand case45",
)
def test_case6_main_2_emitter_and_control_extracted() -> None:
"""Simulated case 6's §14.1 Main Heating2 lodges its OWN emitter
("Underfloor Heating") and control ("SAP code 2110, ...") the two

View file

@ -30,11 +30,7 @@ from typing import Any
import pytest
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
SAP_10_2_SPEC_PRICES,
cert_to_inputs,
)
from domain.sap10_calculator.calculator import Sap10Calculator
_CORPUS = Path(
"backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl"
@ -119,10 +115,45 @@ _CORPUS = Path(
# 100010129331 (roof 110.5 -> 31.3 W/K, +13.1 -> -0.05 SAP). within-0.5
# 68.8% -> 69.5% (MAE 0.888 -> 0.859; PE 13.9 -> 13.6); 3-part cohort 56% ->
# 61%. Pinned in test_heat_transmission (by_kind split + no-contamination).
_MIN_WITHIN_HALF_SAP = 0.69
_MAX_SAP_MAE = 0.86
_MAX_CO2_MAE_TONNES = 0.30 # t CO2 / yr vs co2_emissions_current
_MAX_PE_PER_M2_MAE = 14.0 # kWh / m2 / yr vs energy_consumption_current
# GROUND-FLOOR FLAT FLOOR EXPOSURE (RdSAP 10 §3.12): a ground-floor flat whose
# dwelling_type the mapper labelled "Top-floor flat" had its ground floor (in
# contact with the ground -> heat loss) dropped, because the flat exposure
# heuristic keys on dwelling_type and the Summary path lodges the position as a
# "Ground floor" floor_type (not the API floor_heat_loss=1 code). Treating a
# "ground floor" floor_type as exposed (worksheet-validated to 1e-4 on simulated
# case 45: floor (28a) 0 -> 25.38 W/K, fabric (33) 75.6 -> 101.01) -> 69.5% ->
# 69.7% (MAE 0.859 -> 0.854). Pinned in test_heat_transmission.
# POSTCODE DEMAND CASCADE (SAP 10.2 Appendix U paragraph 1, p.124): the
# CO2/PE over-estimate diagnosed above as "per-cert mapper/demand fidelity"
# was largely a CLIMATE-cascade bug. The SAP/EI rating is computed on
# UK-average weather (Tables U1-U3 region 0), but EPC-displayed energy use,
# CO2 emissions and primary energy use POSTCODE-DISTRICT weather from PCDB
# Table 172 — "other calculations (such as for energy use and costs on EPCs)
# are done using local weather". We were feeding the UK-average demand to all
# three outputs, so warm-region certs (most of England, warmer than the
# UK-average) over-counted heating demand → CO2/PE high. `Sap10Calculator.
# calculate` now grafts the demand cascade's CO2/PE onto the rating cascade's
# SAP. Across the corpus this moved CO2 MAE 0.26 -> 0.12 t/yr (bias +0.18 ->
# +0.04) and PE MAE 13.6 -> 3.8 kWh/m2/yr (bias +9.0 -> +0.24); SAP unchanged
# (rating cascade). Worksheet-validated to 1e-4 on simulated case 45 (rating
# CO2 692.13; demand CO2 626.78, PE 6581.59). The residual PE/CO2 spread is
# now the genuine per-cert mapper-fidelity tail.
# DUAL-FUEL SECONDARY COLLISION (RdSAP 10 Table 32 / SAP 10.2 Table 12): the
# gov-API lodges fuel enum 9 ("dual fuel, mineral and wood") for a dual-fuel
# room heater, but enum 9 collides with the same-valued Table-32/12 code 9
# (LPG SC11F), so the price (3.48 vs dual-fuel 3.99 p/kWh) AND the CO2/PE
# factors (LPG 0.241 / 1.163 vs dual fuel 0.087 / 1.049) resolved to LPG —
# the secondary was under-costed (→ SAP over-rate) and over-counted on CO2.
# Canonicalising enum 9 (now in `_GOV_API_COLLISION_FUELS`) on the secondary
# cost + factor paths took within-0.5 69.7% -> 70.2% (MAE 0.854 -> 0.845;
# dual-fuel-secondary cohort 42.9% -> 49.0%, signed +0.55 -> +0.41) and CO2
# MAE 0.12 -> 0.08 t/yr (bias +0.04 -> 0.00). A prior session deferred enum 9
# ("direction not understood") while the PE/CO2 lens was confounded by the
# climate-cascade bug (fc7c4d2d); the corrected lens shows the over-rate.
_MIN_WITHIN_HALF_SAP = 0.70
_MAX_SAP_MAE = 0.85
_MAX_CO2_MAE_TONNES = 0.09 # t CO2 / yr vs co2_emissions_current
_MAX_PE_PER_M2_MAE = 4.0 # kWh / m2 / yr vs energy_consumption_current
def _load_corpus() -> list[dict[str, Any]]:
@ -147,8 +178,12 @@ def test_api_path_sap_accuracy_on_rdsap_21_0_1_corpus(
co2_signed_errs_t: list[float] = [] # our lodged, tonnes/yr
pe_signed_errs: list[float] = [] # our lodged, kWh/m²/yr
skipped = 0
_calculator = Sap10Calculator()
# Act — run the API → EpcPropertyData → calculator pipeline per cert.
# `Sap10Calculator.calculate` runs both climate cascades (SAP 10.2
# Appendix U p.124): the SAP rating on UK-average weather, CO2/PE on
# postcode-district weather — exactly the two figures the EPC lodges.
for doc in corpus:
lodged_sap = doc.get("energy_rating_current")
if lodged_sap is None:
@ -156,9 +191,7 @@ def test_api_path_sap_accuracy_on_rdsap_21_0_1_corpus(
continue
try:
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
result = _calculator.calculate(epc)
except Exception:
# A mapper / calculator raise is a coverage gap tracked elsewhere
# (eval_api_sap_accuracy.py); here we gauge the certs that compute.