diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index f9ab3156..00000000 --- a/.claude/settings.json +++ /dev/null @@ -1,168 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(python -c ' *)", - "Bash(python -m pytest tests/ -v --no-cov)", - "Bash(git -C /workspaces/model diff --stat)", - "Bash(python -c \"import moto; print\\('moto installed:', moto.__version__\\)\")", - "Bash(grep -E \"\\\\.\\(py|sql\\)$\")", - "Bash(xargs basename -a)", - "Bash(ls -la /workspaces/home 2>/dev/null | head -20)", - "Read(//workspaces/home/**)", - "Bash(command -v uv)", - "Bash(uv --version)", - "Bash(echo \"uv: $\\(uv --version\\)\")", - "Bash(python -m pyright --version)", - "Bash(npx --no-install pyright --version)", - "Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --outputjson etl/hubspot/hubspotClient.py etl/hubspot/hubspotDataTodB.py etl/hubspot/project_data.py etl/hubspot/scripts/scraper/main.py backend/app/db/models/hubspot_project_data.py backend/app/db/models/hubspot_deal_data.py etl/hubspot/tests/test_scraper_handler.py etl/hubspot/tests/test_hubspot_data_to_db.py etl/hubspot/tests/test_hubspot_client_integration.py)", - "Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('errors',s['errorCount'],'warnings',s['warningCount']\\); [print\\(f\\\\\"{x['severity']}: {x['file'].split\\('/'\\)[-1]}:{x['range']['start']['line']+1} {x['rule'] if 'rule' in x else ''} -- {x['message'].splitlines\\(\\)[0]}\\\\\"\\) for x in d['generalDiagnostics'] if x['severity']=='error']\")", - "Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js etl/hubspot/project_data.py backend/app/db/models/hubspot_project_data.py)", - "Bash(python -c \"import sqlmodel, sqlalchemy, hubspot; print\\('deps importable'\\)\")", - "Bash(python -m pytest --version)", - "Bash(python -m pytest etl/hubspot/tests/ -m \"not integration\" -p no:cacheprovider -o addopts=\"\" -q)", - "Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --pythonpath /usr/local/bin/python etl/hubspot/project_data.py backend/app/db/models/hubspot_project_data.py)", - "Bash(node /home/vscode/.npm/_npx/110e52990071af13/node_modules/pyright/dist/pyright.js --pythonpath /usr/local/bin/python etl/hubspot/company_data.py)", - "Bash(python /tmp/inspect_project.py)", - "Read(//home/vscode/github/**)", - "Bash(find / -maxdepth 6 -type d -name assessment-model)", - "Bash(terraform fmt *)", - "Bash(terraform init *)", - "Bash(terraform validate *)", - "Bash(python -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py -q)", - "Bash(python -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides -q --no-cov)", - "Bash(python -m py_compile tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py)", - "Bash(git add *)", - "Bash(git status *)", - "Bash(cp pyrightconfig.json /tmp/pyrightconfig.bak.json)", - "Bash(python3 -)", - "Bash(npx --yes pyright infrastructure/postgres/landlord_overrides_postgres_repository.py applications/landlord_description_overrides/handler.py tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py orchestration/classifiable_column.py)", - "Bash(git stash *)", - "Bash(npx --yes pyright applications/landlord_description_overrides/handler.py)", - "Bash(python3 -m pytest tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py -q)", - "Bash(python3 -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides/ -q -p no:cov)", - "Bash(python3 -m pytest tests/orchestration/test_landlord_description_overrides_orchestrator.py tests/repositories/landlord_overrides/ -q)", - "Bash(cp pyrightconfig.json /tmp/pyrightconfig.bak2.json)", - "Bash(npx --yes pyright infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py)", - "Bash(GIT_EDITOR=true git rebase --continue)", - "Bash(git worktree *)", - "Bash(git branch *)", - "Bash(echo \"exit: $?\")", - "Bash(git reset *)", - "Bash(echo \"fetch-exit: $?\")", - "Bash(sed -n *)", - "Bash(set -e)", - "Bash(git rm *)", - "Bash(git ls-tree *)", - "Bash(command -v pyright)", - "Bash(git merge *)", - "Bash(git rev-list *)", - "Bash(git remote *)", - "Bash(git ls-remote *)", - "Bash(grep -v '\\\\.sample$')", - "Bash([ -f \".git/hooks/$h\" ])", - "Bash(python3 -m pytest tests/test_lambda_packaging.py -p no:cacheprovider --no-header -q)", - "Bash(python3 -m pytest tests/test_lambda_packaging.py -p no:cov --no-header -q)", - "Bash(python3 -m pytest tests/test_lambda_packaging.py --no-cov -p no:cacheprovider -q)", - "Bash(python3 -m pytest tests/test_lambda_packaging.py --no-cov -q)", - "Bash(python3 -m pyright tests/test_lambda_packaging.py)", - "Bash(python3 -m pytest tests/ --collect-only --no-cov -q)", - "Bash(python3 -c \"import yaml; [print\\(f, 'OK'\\) for f in ['.github/workflows/ddd_tests.yml','.github/workflows/unit_tests.yml'] if yaml.safe_load\\(open\\(f\\)\\)]\")", - "Bash(python3 -m pytest tests/ -q --no-cov)", - "Bash(python3 -c ' *)", - "Bash(python3 -m pyright --outputjson scripts/hyde/main.py)", - "Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('errors:',s['errorCount'],'warnings:',s['warningCount']\\); [print\\(f\\\\\" L{e['range']['start']['line']+1}: {e['message'].splitlines\\(\\)[0]}\\\\\"\\) for e in d['generalDiagnostics'] if e['severity']=='error']\")", - "Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); s=d['summary']; print\\('pyright errors:',s['errorCount'],'warnings:',s['warningCount']\\); [print\\(f\\\\\" L{e['range']['start']['line']+1}: {e['message'].splitlines\\(\\)[0]}\\\\\"\\) for e in d['generalDiagnostics'] if e['severity']=='error']\")", - "Bash(python3 main.py)", - "Bash(netstat -ltnp)", - "Bash(fuser 8000/tcp)", - "Bash(kill 351610 685390 351625)", - "Read(//proc/531213/net/**)", - "Bash(ps -p 351610 -o pid=)", - "Bash(python -)", - "Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_cert_omitting_sap_windows_maps_without_missing_required_field\")", - "Bash(python -m pytest datatypes/epc/domain/tests/test_from_rdsap_schema.py infrastructure/epc_client/tests/test_mapper_corpus.py)", - "Bash(git -C /workspaces/model stash push -u -- datatypes/epc/schema/rdsap_schema_20_0_0.py datatypes/epc/domain/mapper.py datatypes/epc/domain/tests/test_from_rdsap_schema.py)", - "Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestFromRdSapSchema21_0_1::test_total_floor_area\")", - "Bash(git -C /workspaces/model stash pop)", - "Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_band_more_than_typical_scales_glazing_by_1_25\")", - "Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis\")", - "Bash(python -m pytest \"datatypes/epc/domain/tests/test_from_rdsap_schema.py::TestRdSap20_0_0ReducedFieldSynthesis::test_synthesised_glazing_type_routed_through_cascade\")", - "Bash(python -m pytest infrastructure/epc_client/tests/test_mapper_corpus.py -k \"wip_schema_20\" -q -p no:cov --no-header -rN)", - "Bash(python -m pytest infrastructure/epc_client/tests/test_mapper_corpus.py -k \"wip_schema_20\" -q --no-header)", - "Bash(timeout 150 python scripts/eon/find_epc_data.py)", - "Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634 709635 709636 709638 709639 709640 709641 709642 709644)", - "Read(//home/vscode/.claude/skills/grill-me/**)", - "Bash(awk '{print $9, $5}')", - "Bash(grep -E \"\\\\.py$\")", - "Bash(python3 scripts/eon/harvest_certs.py)", - "Bash(python scripts/eon/harvest_certs.py)", - "Bash(git commit -q -m ' *)", - "Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/domain/mapper.py)", - "Bash(git check-ignore *)", - "Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/schema/rdsap_schema_18_0.py datatypes/epc/domain/mapper.py)", - "Bash(timeout 250 npx -y pyright@1.1.410 datatypes/epc/schema/rdsap_schema_17_1.py datatypes/epc/domain/mapper.py)", - "Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634 709635 709636 709638 709639 709640 709641 709642 709643 709644 709645 709637)", - "Read(//workspaces/**)", - "Bash(python -m scripts.run_modelling_e2e --scenario-id 1266 --portfolio-id 785 --measures high_heat_retention_storage_heaters,solar_pv 709634)", - "Bash(python 2_export_data.py)", - "Bash(git commit -q -m 'Map RdSAP-Schema-17.0 certs to EpcPropertyData ๐ŸŸฅ *)", - "Bash(python -m py_compile tests/utilities/floats.py backend/app/db/functions/tests/test_portfolio_functions.py backend/documents_parser/tests/test_summary_pdf_mapper_chain.py backend/documents_parser/tests/test_heating_systems_corpus.py)", - "Bash(PYTHONPATH=/workspaces/model python -)", - "Bash(npx --no-install pyright datatypes/epc/schema/sap_schema_17_1.py datatypes/epc/domain/mapper.py datatypes/epc/domain/tests/test_from_sap_schema.py)", - "Bash(git commit -q -m 'Map full-SAP cert identity and scalar fields to EpcPropertyData ๐ŸŸฉ *)", - "Bash(npx --no-install pyright --outputjson datatypes/epc/domain/mapper.py)", - "Bash(git commit -q -m 'Carry full-SAP measured fabric U-value descriptions into the domain model ๐ŸŸฉ *)", - "Bash(python scripts/hyde/elmhurst_session.py --help)", - "Bash(python scripts/hyde/elmhurst_session.py status)", - "Bash(DISPLAY=:99 import -window root /tmp/hyde-viewer/elm_now2.png)", - "Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105&Referrer=https%3a%2f%2fmembers.elmhurstenergy.co.uk%2fRdsapZone%2fHome.aspx\")", - "Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormPropertyDescription.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\")", - "Bash(DISPLAY=:99 timeout 90 python scripts/hyde/elmhurst_explore.py \"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\")", - "Bash(DISPLAY=:99 timeout 150 python scripts/hyde/elmhurst_fill.py)", - "Bash(pkill -f \"rdsap10\\\\|for-testing\")", - "Bash(rm -f scripts/hyde/.elmhurst-session/Singleton*)", - "Bash(DISPLAY=:99 timeout 280 python -u scripts/hyde/elmhurst_fill.py --page property_description --commit)", - "Bash(npx --no-install pyright scripts/hyde/elmhurst_download.py)", - "Edit(/.claude/skills/expand-sap-accuracy-corpus/**)", - "Bash(PYTHONPATH=/workspaces/model python scripts/fetch_real_life_epc_sample.py 10093116528)", - "Bash(PYTHONPATH=/workspaces/model python -c ' *)", - "Bash(bash scripts/hyde/start_viewer.sh)", - "Bash(DISPLAY=:99 ELMHURST_URL=\"https://rdsap10online.elmhurstenergy.co.uk/Processing/WebFormAddress.aspx?Guid=B44A0DB4-4C08-4241-B818-86F060172105\" python scripts/hyde/elmhurst_session.py login)", - "Bash(break)", - "Bash(DISPLAY=:99 timeout 90 python /tmp/check_session.py)", - "Bash(pkill -9 -f \"elm_login_hold.py\")", - "Bash(pkill -9 -f \"elmhurst-session\")", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 150 python /tmp/elm_build_dims.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_dump_full.py Walls WebFormWalls.aspx)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_walls_disc.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_roof_disc.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_dump_full.py Openings WebFormOpenings.aspx)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_win_probe.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 220 python /tmp/elm_build_openings.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_tab_probe.py)", - "Bash(ELM_ACCESS=\"P960-0001\" ELM_PWD=\"y22bseGUzr\" DISPLAY=:99 timeout 120 python /tmp/elm_del_probe.py)", - "Bash(npx --no-install pyright datatypes/epc/domain/epc_property_data.py datatypes/epc/domain/mapper.py domain/sap10_calculator/rdsap/cert_to_inputs.py)", - "Bash(PYTHONPATH=/workspaces/model python -m pytest domain/sap10_calculator/tests/test_sap_accuracy_corpus.py -q --no-cov)", - "Bash(PYTHONPATH=/workspaces/model python -m pytest tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py -q --no-cov)", - "Edit(/.claude/skills/epc-to-elmhurst-rdsap-inputs/**)", - "Bash(python3 -c \"import sys; sys.path.insert\\(0,'scripts/hyde'\\); import elmhurst_lib; print\\('elmhurst_lib imports OK:', [f for f in dir\\(elmhurst_lib\\) if not f.startswith\\('_'\\)][:12]\\)\")", - "Bash(PYTHONPATH=/workspaces/model python -m pytest tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py tests/infrastructure/epc_client/test_sap_accuracy_corpus.py -q --no-cov)", - "Bash(PYTHONPATH=/workspaces/model python scripts/fetch_real_life_epc_sample.py 10093116543)", - "Bash(echo \"---EXIT $?---\")", - "Bash(sudo apt-get update -o Dir::Etc::sourceparts=-)", - "Bash(apt-cache policy *)", - "Bash(sudo apt-get update)", - "Bash(bash scripts/hyde/start_viewer.sh restart)", - "Bash(echo \"=== listen sockets \\(port 6080 / 5900\\) ===\"; ss -ltnp 2>/dev/null | grep -E ':6080|:5900' || netstat -ltnp 2>/dev/null | grep -E ':6080|:5900' || echo \"ss/netstat unavailable\" *)", - "Read(//proc/5476/**)", - "Bash(sudo python -m playwright install-deps chromium)", - "Bash(python -m playwright install chromium)", - "Bash(python -m playwright --version)", - "Bash(pip show *)" - ], - "additionalDirectories": [ - "/tmp", - "/workspaces/model/scripts/hyde/.elmhurst-session" - ] - } -} diff --git a/.gitignore b/.gitignore index 9cb34cb3..9c77b311 100644 --- a/.gitignore +++ b/.gitignore @@ -304,6 +304,7 @@ backlog/* # Local Claude config files .claude/*modelling_cohort.csv .claude/settings.local.json +.claude/settings.json # Local EPC debug cache (scripts/eon) scripts/eon/epc_cache.pkl diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_case45.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_case45.pdf new file mode 100644 index 00000000..48a5273e Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_case45.pdf differ diff --git a/domain/sap10_calculator/calculator.py b/domain/sap10_calculator/calculator.py index f7099f18..11cecf73 100644 --- a/domain/sap10_calculator/calculator.py +++ b/domain/sap10_calculator/calculator.py @@ -42,7 +42,7 @@ Appendix L + U. RdSAP10 Table 32 (p.95) for fuel prices/CO2/PE factors. from __future__ import annotations from abc import ABC, abstractmethod -from dataclasses import dataclass, field +from dataclasses import dataclass, field, replace from typing import Final, Optional, TYPE_CHECKING from domain.sap10_calculator.climate.appendix_u import external_temperature_c @@ -863,6 +863,25 @@ class Sap10Calculator(SapCalculator): """ def calculate(self, epc: "EpcPropertyData") -> SapResult: - from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs + # SAP 10.2 Appendix U paragraph 1 (p.124): the SAP and EI ratings are + # computed on UK-average climate (so ratings are nationally + # comparable), but "other calculations (such as for energy use and + # costs on EPCs) are done using local weather" โ€” the EPC-displayed + # CO2 emissions and primary energy use postcode-district weather from + # the PCDB. So we run two climate cascades and graft the demand + # cascade's CO2/PE onto the rating cascade's SAP result. (Worked + # example: simulated case 45 โ€” rating SAP 60.53/CO2 692.13 on + # UK-average; demand CO2 626.78/PE 6581.59 on the W6 postcode.) + from domain.sap10_calculator.rdsap.cert_to_inputs import ( + cert_to_demand_inputs, + cert_to_inputs, + ) - return calculate_sap_from_inputs(cert_to_inputs(epc)) + rating = calculate_sap_from_inputs(cert_to_inputs(epc)) + demand = calculate_sap_from_inputs(cert_to_demand_inputs(epc)) + return replace( + rating, + co2_kg_per_yr=demand.co2_kg_per_yr, + primary_energy_kwh_per_yr=demand.primary_energy_kwh_per_yr, + primary_energy_kwh_per_m2=demand.primary_energy_kwh_per_m2, + ) diff --git a/domain/sap10_calculator/rdsap/cert_to_inputs.py b/domain/sap10_calculator/rdsap/cert_to_inputs.py index 60da4ce1..cd146109 100644 --- a/domain/sap10_calculator/rdsap/cert_to_inputs.py +++ b/domain/sap10_calculator/rdsap/cert_to_inputs.py @@ -2772,7 +2772,11 @@ def _secondary_fuel_cost_gbp_per_kwh( meter_type, fuel_is_electric=True ): return _secondary_off_peak_rate_gbp_per_kwh(meter_type) - return prices.unit_price_p_per_kwh(sec_fuel) * _PENCE_TO_GBP + # Normalise colliding gov-API enum codes (e.g. 9 dual fuel, whose + # value collides with Table-32 9 = LPG SC11F) before the price lookup, + # exactly as the main-fuel boundary does โ€” otherwise the same-value + # Table lookup mis-prices the secondary at the colliding fuel's rate. + return prices.unit_price_p_per_kwh(canonical_fuel_code(sec_fuel)) * _PENCE_TO_GBP def _pv_array_generation_kwh_per_yr( @@ -3927,6 +3931,10 @@ def _secondary_fuel_code(epc: EpcPropertyData) -> int: code = _int_or_none(epc.sap_heating.secondary_fuel_type) if code is None: return _STANDARD_ELECTRICITY_FUEL_CODE + # Normalise colliding gov-API enum codes (e.g. 9 dual fuel, whose value + # collides with the LPG Table code) so the CO2/PE factor lookups resolve + # to the lodged fuel โ€” mirrors the main-fuel boundary + the cost side. + code = canonical_fuel_code(code) or code if code in CO2_KG_PER_KWH: return code return _table_12_factor_fuel_code(code) @@ -7218,7 +7226,21 @@ def cert_to_inputs( epc=epc, ) if apm_efficiencies is not None: - eff, water_eff = apm_efficiencies + # ฮท_space (N3.6) always replaces the Table 4a default โ€” the heat + # pump is the space main. ฮท_water (N3.7a) applies ONLY when the DHW + # is actually heated by that main (WHC "from main": 901/902/914). A + # separate electric immersion (WHC 903) or other independent DHW + # source keeps its own water efficiency (immersion = 100%), not the + # HP's water SCOP โ€” otherwise a HP-space + immersion-DHW dwelling + # under-counts its hot-water fuel (case 45: water 2130 -> 1894 kWh, + # +1.5 SAP, because 187.5% ร— 0.6 in-use = 112.5% was applied where + # the worksheet (216) uses 100%). + eff, apm_water_eff = apm_efficiencies + if ( + epc.sap_heating.water_heating_code + in _WATER_INHERIT_FROM_MAIN_CODES + ): + water_eff = apm_water_eff if ( _is_heat_network_main(main) and epc.sap_heating.water_heating_code in _WATER_INHERIT_FROM_MAIN_CODES diff --git a/domain/sap10_calculator/tables/table_32.py b/domain/sap10_calculator/tables/table_32.py index 8377fe86..14544aea 100644 --- a/domain/sap10_calculator/tables/table_32.py +++ b/domain/sap10_calculator/tables/table_32.py @@ -121,11 +121,17 @@ API_FUEL_TO_TABLE_32: Final[dict[int, int]] = { # 33 = coal โ€” Table-32 code 33 is the electricity 10-hour low rate # 7.5 p vs house coal 3.67 p (and `is_electric_fuel_code(33)` # wrongly classified the coal main as electric). -# DEFERRED (not included): API 9 = dual fuel (mineral + wood) is also a -# collision (Table-32 9 = LPG SC11F 3.48 p vs dual fuel 3.99 p) but the -# 0.45 p delta nets neutral-to-negative on the (outlier-dominated) -# dual-fuel certs and shifts them in a direction not yet understood โ€” -# investigate separately. +# 9 = dual fuel (mineral + wood) โ€” Table-32 code 9 is LPG SC11F +# 3.48 p vs dual fuel 3.99 p. The gov-API lodges API enum 9 for a +# dual-fuel appliance (description "Room heaters, dual fuel +# (mineral and wood)"), but the same-value Table-32 lookup returns +# LPG 3.48 p, under-pricing the (mostly secondary) dual-fuel heat. +# A prior session deferred this as "direction not understood" +# while the EPC PE/CO2 lens was confounded by the climate-cascade +# bug (fixed in fc7c4d2d); on the corrected lens the dual-fuel +# secondary cohort over-rates (SAP too high = cost too low) by +# +0.55 signed, and pricing UP to the dual-fuel 3.99 p row reduces +# that over-rate โ€” the correct direction. # # COMMUNITY FUELS (handled elsewhere, NOT here): API 30 (waste # combustion), 31 (biomass) and 32 (biogas) โ€” all "(community)" in the @@ -140,7 +146,7 @@ API_FUEL_TO_TABLE_32: Final[dict[int, int]] = { # cert_to_inputs), where the community meaning is unambiguous. Community # fuels 20/25 do not collide with an electricity code, so they resolve # correctly through the heat-network path without any special handling. -_GOV_API_COLLISION_FUELS: Final[frozenset[int]] = frozenset({5, 33}) +_GOV_API_COLLISION_FUELS: Final[frozenset[int]] = frozenset({5, 9, 33}) def canonical_fuel_code(fuel_code: Optional[int]) -> Optional[int]: diff --git a/domain/sap10_calculator/worksheet/heat_transmission.py b/domain/sap10_calculator/worksheet/heat_transmission.py index 0fed5174..80c65aac 100644 --- a/domain/sap10_calculator/worksheet/heat_transmission.py +++ b/domain/sap10_calculator/worksheet/heat_transmission.py @@ -1166,6 +1166,16 @@ def heat_transmission_from_cert( # lodgement is authoritative. Mirrors the roof's "another dwelling # above" override above. Cert 2115-4121-4711-9361-3686. part_floor_is_party = "another dwelling below" in (part.floor_type or "").lower() + # A part whose floor_type is a GROUND floor sits in contact with the + # ground (RdSAP 10 ยง3.12) and is therefore a heat-loss floor, even when + # the dwelling-level flat heuristic (`_dwelling_exposure`) defaults a + # flat to has_exposed_floor=False. The Elmhurst Summary path lodges a + # ground-floor flat's position as a "Ground floor" floor_type (not the + # API floor_heat_loss=1 exposed code), so without this signal the + # cascade dropped its ground floor entirely โ€” simulated case 45 (a + # ground-floor flat the mapper labelled "Top-floor flat"): worksheet + # (28a) = 47.0 ร— 0.54 = 25.38 W/K billed as 0, over-rating by +7 SAP. + part_floor_is_ground = "ground floor" in (part.floor_type or "").lower() # A floor lodged as a heat-loss floor โ€” *exposed* (API # floor_heat_loss=1 โ†’ `is_exposed_floor`, "an exposed floor if there # is an open space below") or *above a partially heated space* (API @@ -1178,6 +1188,7 @@ def heat_transmission_from_cert( # the "another dwelling below" party signal overrides it downward. part_has_exposed_floor = ( exposure.has_exposed_floor or is_exposed_floor or is_above_partial + or part_floor_is_ground ) and not part_floor_is_party floor_area_total = _round_half_up( geom["ground_floor_area_m2"] if part_has_exposed_floor else 0.0, diff --git a/scripts/dive_cert.py b/scripts/dive_cert.py new file mode 100644 index 00000000..8d6da1b4 --- /dev/null +++ b/scripts/dive_cert.py @@ -0,0 +1,108 @@ +"""Deep-dive a single corpus cert: lodged vs computed SAP/CO2/PE + the full +intermediate line-ref dump + the mapped fabric/heat-loss inputs, so the +diverging line is visible WITHOUT an Elmhurst worksheet. + +USAGE + PYTHONPATH=/workspaces/model python scripts/dive_cert.py + PYTHONPATH=/workspaces/model python scripts/dive_cert.py --filter wall_insulation_type=3 [--n 8] +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +from domain.sap10_calculator.rdsap.cert_to_inputs import ( + SAP_10_2_SPEC_PRICES, + cert_to_demand_inputs, + cert_to_inputs, +) +from scripts.profile_api_error import features + +_CORPUS = Path("backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl") + + +def _cert_id(doc: dict[str, Any]) -> str: + return str( + doc.get("certificate_number") + or doc.get("lmk_key") + or doc.get("uprn") + or "?" + ) + + +def _dump(doc: dict[str, Any]) -> None: + cert = _cert_id(doc) + lodged_sap = doc.get("energy_rating_current") + lodged_co2 = doc.get("co2_emissions_current") + lodged_pe = doc.get("energy_consumption_current") + epc = EpcPropertyDataMapper.from_api_response(doc) + r = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)) + # SAP/EI rating is the UK-average rating cascade (`r`); EPC CO2/PE use the + # postcode demand cascade (SAP 10.2 Appendix U p.124). Display CO2/PE from + # the demand cascade so they compare like-for-like with the lodged EPC. + d = calculate_sap_from_inputs(cert_to_demand_inputs(epc, prices=SAP_10_2_SPEC_PRICES)) + print("=" * 90) + print(f"CERT {cert}") + print( + f" SAP lodged={lodged_sap} ours={r.sap_score_continuous:.2f} " + f"d={r.sap_score_continuous - (lodged_sap or 0):+.2f}" + ) + if lodged_co2 is not None: + print( + f" CO2 lodged={lodged_co2:.3f} ours={d.co2_kg_per_yr / 1000:.3f} t " + f"d={d.co2_kg_per_yr / 1000 - lodged_co2:+.3f} (demand cascade)" + ) + if lodged_pe is not None: + print( + f" PE lodged={lodged_pe:.1f} ours={d.primary_energy_kwh_per_m2:.1f} " + f"d={d.primary_energy_kwh_per_m2 - lodged_pe:+.1f} kWh/m2 (demand cascade)" + ) + print( + f" energy kWh/yr: spaceheat={r.space_heating_kwh_per_yr:.0f} " + f"main={r.main_heating_fuel_kwh_per_yr:.0f} " + f"sec={r.secondary_heating_fuel_kwh_per_yr:.0f} " + f"hw={r.hot_water_kwh_per_yr:.0f} light={r.lighting_kwh_per_yr:.0f} " + f"pumpfan={r.pumps_fans_kwh_per_yr:.0f}" + ) + d = epc.__dict__ + print(" --- key mapped inputs ---") + f = features(doc) + for k in ( + "property_type", "built_form", "age_band", "main_sap_code", + "main_heat_cat", "main_fuel", "has_pcdb_main", "main_data_source", + "wall_construction", "wall_insulation_type", "roof_codes", + "roof_insulation_thickness", "whc", "water_fuel", "immersion_type", + "has_cylinder", "has_secondary", "has_pv", "mains_gas", "n_building_parts", + ): + print(f" {k:26s}= {f.get(k)}") + print(" --- intermediate line refs ---") + inter = r.intermediate or {} + for k in sorted(inter): + print(f" {k:34s}= {inter[k]:.4f}") + + +def main() -> None: + docs = [json.loads(l) for l in _CORPUS.read_text().splitlines() if l.strip()] + if "--filter" in sys.argv: + spec = sys.argv[sys.argv.index("--filter") + 1] + key, _, val = spec.partition("=") + n = int(sys.argv[sys.argv.index("--n") + 1]) if "--n" in sys.argv else 6 + hits = [d for d in docs if str(features(d).get(key)) == val] + print(f"{len(hits)} certs match {spec}; dumping first {n}") + for d in hits[:n]: + _dump(d) + return + target = sys.argv[1] + for d in docs: + if target in _cert_id(d): + _dump(d) + return + print(f"no cert matching {target}") + + +if __name__ == "__main__": + main() diff --git a/scripts/profile_corpus_error.py b/scripts/profile_corpus_error.py new file mode 100644 index 00000000..6ef2c4e6 --- /dev/null +++ b/scripts/profile_corpus_error.py @@ -0,0 +1,226 @@ +"""Profile API-path SAP/CO2/PE error over the COMMITTED corpus (no /tmp cache). + +WHAT THIS IS FOR +---------------- +The accuracy thesis: the gov-API response carries the full SAP input set and our +calculator is deterministic, so EVERY cert should reproduce the lodged +SAP/CO2/PE. Any divergence is an input-handling bug, not irreducible noise. + +This is the per-cert microscope for that loop. It runs the in-repo corpus +(``backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl``) through the +real ``from_api_response`` -> ``cert_to_inputs`` -> ``calculate_sap_from_inputs`` +path, then: + 1. buckets the signed SAP error by raw-API feature (reusing + ``profile_api_error.features``) ranked by wasted accuracy, so a + dropped/mis-mapped field surfaces as a biased bucket; + 2. for the worst over- and under-raters, prints the PE/CO2-vs-cost split so + each can be triaged WITHOUT a worksheet: + - PE & CO2 both ~match lodged but SAP off -> COST-side bug + (tariff / PV export / standing charge / secondary fuel); + - PE/CO2 also off -> DEMAND-side bug + (fabric / ventilation / gains / heating demand). + +USAGE +----- + PYTHONPATH=/workspaces/model python scripts/profile_corpus_error.py + PYTHONPATH=/workspaces/model python scripts/profile_corpus_error.py --min-n 15 --worst 40 +""" +from __future__ import annotations + +import json +import statistics as stats +import sys +from collections import defaultdict +from pathlib import Path +from typing import Any, Optional + +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +from domain.sap10_calculator.rdsap.cert_to_inputs import ( + cert_to_demand_inputs, + SAP_10_2_SPEC_PRICES, + cert_to_inputs, +) +from scripts.profile_api_error import features + +_CORPUS = Path("backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl") + + +class Row: + __slots__ = ( + "cert", "sap_err", "co2_err_t", "pe_err", "lodged_sap", + "our_sap", "lodged_pe", "our_pe", "feats", + ) + + def __init__( + self, + cert: str, + sap_err: float, + co2_err_t: Optional[float], + pe_err: Optional[float], + lodged_sap: float, + our_sap: float, + lodged_pe: Optional[float], + our_pe: float, + feats: dict[str, Any], + ) -> None: + self.cert = cert + self.sap_err = sap_err + self.co2_err_t = co2_err_t + self.pe_err = pe_err + self.lodged_sap = lodged_sap + self.our_sap = our_sap + self.lodged_pe = lodged_pe + self.our_pe = our_pe + self.feats = feats + + +def _load() -> list[dict[str, Any]]: + return [ + json.loads(line) + for line in _CORPUS.read_text().splitlines() + if line.strip() + ] + + +def _compute(corpus: list[dict[str, Any]]) -> tuple[list[Row], int, int]: + rows: list[Row] = [] + skipped = 0 + raised = 0 + for doc in corpus: + lodged_sap = doc.get("energy_rating_current") + if lodged_sap is None: + skipped += 1 + continue + try: + epc = EpcPropertyDataMapper.from_api_response(doc) + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + # SAP/EI rating is the UK-average rating cascade (`result`); + # the EPC-displayed CO2/PE use the postcode demand cascade + # (SAP 10.2 Appendix U p.124). Use the demand cascade for the + # PE/CO2-vs-cost triage so it is not confounded by the climate + # difference (UK-average vs local weather). + demand = calculate_sap_from_inputs( + cert_to_demand_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + except Exception: + raised += 1 + continue + cert = str( + doc.get("certificate_number") + or doc.get("lmk_key") + or doc.get("uprn") + or len(rows) + ) + lodged_co2_t = doc.get("co2_emissions_current") + lodged_pe = doc.get("energy_consumption_current") + rows.append(Row( + cert=cert, + sap_err=result.sap_score_continuous - lodged_sap, + co2_err_t=(demand.co2_kg_per_yr / 1000.0 - lodged_co2_t) + if lodged_co2_t is not None else None, + pe_err=(demand.primary_energy_kwh_per_m2 - lodged_pe) + if lodged_pe is not None else None, + lodged_sap=lodged_sap, + our_sap=result.sap_score_continuous, + lodged_pe=lodged_pe, + our_pe=demand.primary_energy_kwh_per_m2, + feats=features(doc), + )) + return rows, skipped, raised + + +def _triage(r: Row) -> str: + """Cost vs demand label from the PE/CO2 split (~tolerant).""" + if r.pe_err is None or r.co2_err_t is None: + return "?" + pe_ok = abs(r.pe_err) < 5.0 # kWh/m2/yr + co2_ok = abs(r.co2_err_t) < 0.10 # t/yr + if pe_ok and co2_ok: + return "COST" # demand reproduces, cost-side off + return "DEMAND" + + +def main() -> None: + min_n = 12 + n_worst = 30 + if "--min-n" in sys.argv: + min_n = int(sys.argv[sys.argv.index("--min-n") + 1]) + if "--worst" in sys.argv: + n_worst = int(sys.argv[sys.argv.index("--worst") + 1]) + + rows, skipped, raised = _compute(_load()) + n = len(rows) + within = sum(1 for r in rows if abs(r.sap_err) < 0.5) / n * 100 + print( + f"profiled {n} certs ({skipped} no-lodged-SAP, {raised} raised) | " + f"within-0.5 = {within:.1f}% | " + f"signed {stats.mean(r.sap_err for r in rows):+.3f} | " + f"MAE {stats.mean(abs(r.sap_err) for r in rows):.3f}" + ) + out = [r for r in rows if abs(r.sap_err) >= 0.5] + cost_n = sum(1 for r in out if _triage(r) == "COST") + dem_n = sum(1 for r in out if _triage(r) == "DEMAND") + print( + f"of {len(out)} outside-0.5: {dem_n} DEMAND-side (PE/CO2 also off), " + f"{cost_n} COST-side (PE/CO2 match), {len(out) - cost_n - dem_n} unknown" + ) + print("=" * 104) + + feat_names = list(rows[0].feats.keys()) + bucket_lines: list[tuple[float, str]] = [] + for fn in feat_names: + groups: dict[str, list[float]] = defaultdict(list) + for r in rows: + groups[str(r.feats.get(fn))].append(r.sap_err) + for val, es in groups.items(): + cnt = len(es) + if cnt < min_n: + continue + w05 = sum(1 for e in es if abs(e) < 0.5) + mabs = stats.mean(abs(e) for e in es) + waste = (cnt - w05) * mabs + bucket_lines.append((waste, ( + f" {fn:22s}={val:<20.20s} n={cnt:4d} " + f"within0.5={w05 / cnt * 100:4.0f}% " + f"signed={stats.mean(es):+6.2f} mean|err|={mabs:5.2f} " + f"[waste={waste:6.0f}]" + ))) + print(f"TOP ERROR-CARRYING BUCKETS (n_out x mean|err|; min-n={min_n}):") + for _, line in sorted(bucket_lines, key=lambda x: -x[0])[:40]: + print(line) + + print("=" * 104) + print(f"WORST {n_worst} OVER-RATERS (our SAP too high -> we under-count loss/cost):") + _dump_worst(sorted(rows, key=lambda r: -r.sap_err)[:n_worst]) + print("-" * 104) + print(f"WORST {n_worst} UNDER-RATERS (our SAP too low -> we over-count loss/cost):") + _dump_worst(sorted(rows, key=lambda r: r.sap_err)[:n_worst]) + + +def _dump_worst(rows: list[Row]) -> None: + print( + f" {'cert':>16s} {'lodgSAP':>7s} {'ourSAP':>7s} {'dSAP':>6s} " + f"{'dPE':>6s} {'dCO2t':>6s} {'split':>6s} " + f"heat/prop/wall/roof/fuel" + ) + for r in rows: + f = r.feats + sig = ( + f"{f.get('main_sap_code')}/{f.get('property_type')}/" + f"{f.get('wall_construction')}/{f.get('roof_codes')}/" + f"{f.get('main_fuel')} pcdb={f.get('has_pcdb_main')} " + f"2nd={f.get('has_secondary')} pv={f.get('has_pv')}" + ) + pe = f"{r.pe_err:+6.1f}" if r.pe_err is not None else " ?" + co2 = f"{r.co2_err_t:+6.2f}" if r.co2_err_t is not None else " ?" + print( + f" {r.cert:>16.16s} {r.lodged_sap:7.1f} {r.our_sap:7.2f} " + f"{r.sap_err:+6.2f} {pe} {co2} {_triage(r):>6s} {sig}" + ) + + +if __name__ == "__main__": + main() diff --git a/tests/domain/modelling/test_elmhurst_cascade_pins.py b/tests/domain/modelling/test_elmhurst_cascade_pins.py index 0e6832b7..19186eae 100644 --- a/tests/domain/modelling/test_elmhurst_cascade_pins.py +++ b/tests/domain/modelling/test_elmhurst_cascade_pins.py @@ -686,12 +686,15 @@ def test_ashp_overlay_scores_the_vaillant_end_state_from_a_gas_boiler() -> None: # dwelling's baseline fabric and so the ASHP end-state SAP. Still a snapshot # of the Vaillant overlay's own output, validated transitively by the # system-boiler pin below (which reproduces a real Vaillant cert at delta 0). + # CO2/PE are the postcode DEMAND cascade now that `Sap10Calculator. + # calculate` computes EPC emissions/PE on local weather (SAP 10.2 + # Appendix U p.124); SAP is unchanged (UK-average rating cascade). _assert_overlay_scores( before, option.overlay, sap=51.99820176096402, - co2=1268.4645083243888, - pe=13080.20756425629, + co2=1065.7593506066496, + pe=10995.781557709413, ) @@ -715,12 +718,14 @@ def test_ashp_overlay_scores_the_vaillant_end_state_from_a_gas_boiler_instant_hw # boiler-1 pin above); the same merge also resolved this cert's main-fuel # mapper gap (ยง14.2 mains-gas derivation), so its raw before now baselines โ€” # see `test_gas_boiler_instant_hw_before_baselines`. + # CO2/PE are the postcode DEMAND cascade now (see the boiler-1 pin above); + # SAP is unchanged (UK-average rating cascade). _assert_overlay_scores( before, option.overlay, sap=39.00740809309464, - co2=2248.6089062232704, - pe=23094.10189037302, + co2=1845.8588018295509, + pe=18944.42568846759, ) diff --git a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py index 24d594f7..d8934c16 100644 --- a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py +++ b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py @@ -38,6 +38,7 @@ from datatypes.epc.domain.epc_property_data import ( from domain.sap10_ml.tests._fixtures import ( make_building_part, make_floor_dimension, + make_main_heating_detail, make_minimal_sap10_epc, make_sap_heating, make_window, @@ -79,6 +80,7 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import ( _pv_overshading_factor, # pyright: ignore[reportPrivateUsage] _pv_pitch_deg, # pyright: ignore[reportPrivateUsage] _responsiveness, # pyright: ignore[reportPrivateUsage] + _secondary_fuel_code, # pyright: ignore[reportPrivateUsage] _secondary_fuel_cost_gbp_per_kwh, # pyright: ignore[reportPrivateUsage] _secondary_heating_fraction_for_category, # pyright: ignore[reportPrivateUsage] _section_12_4_4_summer_immersion_applies, # pyright: ignore[reportPrivateUsage] @@ -2161,6 +2163,47 @@ def test_is_electric_main_dual_fuel_table_32_code_10_is_not_electric() -> None: assert _is_electric_main(community_electric_main) is False +def test_dual_fuel_secondary_api_enum_9_prices_as_dual_fuel_not_lpg() -> None: + # Arrange โ€” the gov-API lodges secondary fuel enum 9 = "dual fuel (mineral + # and wood)", but enum value 9 COLLIDES with the same-valued RdSAP 10 + # Table 32 / SAP 10.2 Table 12 code 9 = "LPG (bulk, SC11F)". The secondary + # cost + CO2/PE paths previously took the same-value lookup (LPG 3.48 + # p/kWh, CO2 0.241 kg/kWh) instead of translating the enum to the dual- + # fuel row (3.99 p/kWh, CO2 0.087) โ€” under-costing the secondary (SAP + # over-rate) AND over-counting its CO2 (LPG is fossil; dual fuel is part + # wood). Enum 9 is now in `_GOV_API_COLLISION_FUELS`, and both secondary + # paths canonicalise (mirroring the main-fuel boundary). SAP 10.2 Table + # 12 (p.189) / RdSAP 10 Table 32 (p.95). + gas_boiler_main = MainHeatingDetail( + has_fghrs=False, main_fuel_type=26, heat_emitter_type=1, + emitter_temperature=1, main_heating_control=2106, + main_heating_category=2, sap_main_heating_code=102, + ) + dual_fuel_secondary_epc = make_minimal_sap10_epc( + total_floor_area_m2=_TYPICAL_TFA_M2, + habitable_rooms_count=4, + country_code="ENG", + sap_heating=make_sap_heating( + main_heating_details=[gas_boiler_main], + secondary_fuel_type=9, # gov-API enum: dual fuel (mineral + wood) + secondary_heating_type=631, + ), + ) + + # Act โ€” the rating-cascade secondary price + the CO2/PE fuel code. + secondary_rate_gbp_per_kwh = _secondary_fuel_cost_gbp_per_kwh( + dual_fuel_secondary_epc.sap_heating, + gas_boiler_main, + 2, # standard (single-rate) meter + SAP_10_2_SPEC_PRICES, + ) + secondary_factor_code = _secondary_fuel_code(dual_fuel_secondary_epc) + + # Assert โ€” dual fuel 3.99 p/kWh (NOT LPG 3.48) + Table code 10 (NOT 9). + assert abs(secondary_rate_gbp_per_kwh - 0.0399) <= 1e-6 + assert secondary_factor_code == 10 + + def test_is_electric_water_dual_fuel_table_32_code_10_is_not_electric() -> None: # Arrange โ€” same API/Table 32 collision as `_is_electric_main` per # S0380.136 docstring. @@ -7581,3 +7624,55 @@ def test_index_less_mev_applies_table_4g_note_3_default_data_iuf() -> None: # 2.5x the raw-0.8 value, not the raw default. assert fan_kwh > 0.0 assert abs(fan_kwh - expected) <= 1e-9 + + +def test_heat_pump_water_scop_not_applied_to_separate_immersion_dhw() -> None: + # Arrange โ€” SAP 10.2 Appendix N3.7(a): a heat pump's PCDB water + # efficiency (ฮท_water) applies to the DHW ONLY when the cylinder is + # heated BY the heat pump. A separate electric immersion (WHC 903) heats + # the water at 100% regardless of the space-heating system, so the HP's + # water SCOP must NOT leak onto it. Invariant: a WHC-903 immersion's + # hot-water fuel is INDEPENDENT of the main โ€” a heat-pump main and a gas- + # boiler main yield the SAME immersion fuel (both 100%, no primary loss). + # Before the fix the APM override set ฮท_water = 187.5% ร— 0.6 in-use = + # 112.5% on the HP cert, under-counting its immersion fuel. Worksheet- + # validated on simulated case 45: water (62) = 2130.26 kWh at ฮท_water=100%, + # not 2130.26 / 1.125 = 1893.57. + hp_main = make_main_heating_detail( + main_fuel_type=29, # electricity + heat_emitter_type=1, + main_heating_category=4, # heat pump + main_heating_index_number=100053, # PCDB Table 362 ASHP (ECODAN 5 kW) + main_heating_data_source=1, + ) + boiler_main = make_main_heating_detail( + main_fuel_type=26, # mains gas + heat_emitter_type=1, + main_heating_category=2, # gas boiler + sap_main_heating_code=102, + ) + + def _immersion_epc(main: MainHeatingDetail) -> EpcPropertyData: + return make_minimal_sap10_epc( + total_floor_area_m2=_TYPICAL_TFA_M2, + habitable_rooms_count=4, + country_code="ENG", + has_hot_water_cylinder=True, + sap_heating=make_sap_heating( + main_heating_details=[main], + water_heating_code=903, # separate electric immersion + water_heating_fuel=30, # standard electricity + cylinder_size=2, + cylinder_insulation_type=1, + cylinder_insulation_thickness_mm=25, + ), + ) + + # Act + hp_fuel = cert_to_inputs(_immersion_epc(hp_main)).hot_water_kwh_per_yr + boiler_fuel = cert_to_inputs(_immersion_epc(boiler_main)).hot_water_kwh_per_yr + + # Assert โ€” the immersion DHW fuel is identical whether the space main is a + # heat pump or a gas boiler (the HP water SCOP does not apply to it). + assert hp_fuel > 0.0 + assert abs(hp_fuel - boiler_fuel) <= 1e-6 diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case45.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case45.py new file mode 100644 index 00000000..7d9ab95f --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case45.py @@ -0,0 +1,107 @@ +"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431 +"simulated case 45" worksheet โ€” a ~47 mยฒ GROUND-FLOOR FLAT heated by an +air-source HEAT PUMP (PCDB 100053 ECODAN, radiators, MCS=No) with a +WHC-903 electric-immersion DHW and a 110 L cylinder, postcode W6 9BF +(SAP Region "Thames Valley"). + +Case 45 is the 1e-4 oracle for the SAP 10.2 Appendix U (PDF p.124) TWO- +CLIMATE-CASCADE split. The P960 prints the current dwelling TWICE: + + * Block 1 โ€” "11a. SAP rating / 12a. CO2" โ€” computed on UK-AVERAGE + weather (Appendix U Tables U1-U3 region 0). Drives the SAP/EI rating. + Space-heat demand (98c) = 7333.79; SAP value (258) = 60.5318 (-> 61); + total CO2 (272) = 692.13. + * Block 2 โ€” "CALCULATION OF EPC COSTS, EMISSIONS AND PRIMARY ENERGY" โ€” + computed on POSTCODE-DISTRICT weather (PCDB Table 172, W6). Drives the + EPC-displayed figures. Space-heat demand (98c) = 5921.05; total CO2 + (272) = 626.78; total primary energy (286) = 6581.59. + +Per Appendix U paragraph 1: "Other calculations (such as for energy use +and costs on EPCs) are done using local weather." `Sap10Calculator. +calculate` therefore runs both cascades and grafts the demand cascade's +CO2/PE onto the rating cascade's SAP โ€” this fixture pins BOTH. + +Like the other `_elmhurst_worksheet_001431_case*` fixtures it does NOT +hand-build the EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises +the WHOLE extractor + mapper + calculator pipeline. + +Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/ +simulated case 45/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_001431_case45.pdf` so the +test runs without depending on the unstaged workspace. + +Per [[feedback-zero-error-strict]]: pins are abs <= 1e-4 against the PDF. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_001431_case45.pdf" +) + +# Block 1 โ€” UK-average RATING cascade (`cert_to_inputs`). +RATING_SPACE_HEATING_KWH: Final[float] = 7333.7892 # (98c) +RATING_SAP_CONTINUOUS: Final[float] = 60.5318 # (258) un-rounded +RATING_SAP_INTEGER: Final[int] = 61 # (258) +RATING_CO2_KG_PER_YR: Final[float] = 692.1287 # (272) + +# Block 2 โ€” POSTCODE-district DEMAND cascade (`cert_to_demand_inputs`). +DEMAND_SPACE_HEATING_KWH: Final[float] = 5921.0486 # (98c) +DEMAND_CO2_KG_PER_YR: Final[float] = 626.7797 # (272) +DEMAND_PRIMARY_ENERGY_KWH: Final[float] = 6581.5936 # (286) + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label/value token sequences). + Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-45 Summary through extractor + mapper. + No hand-built EpcPropertyData โ€” the extractor and mapper are part of + the test target. This module is a pin PROVIDER (build_epc + constants); + the collected assertions live in `test_section_cascade_pins`.""" + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py index 1637f281..1625bcbc 100644 --- a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py +++ b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py @@ -24,7 +24,10 @@ from typing import Final import pytest -from domain.sap10_calculator.calculator import Sap10Calculator +from domain.sap10_calculator.calculator import ( + Sap10Calculator, + calculate_sap_from_inputs, +) from domain.sap10_calculator.rdsap.cert_to_inputs import ( cert_to_inputs, water_heating_section_from_cert, @@ -338,8 +341,13 @@ def test_sap_result_pin(fixture_name: str, field_name: str) -> None: epc = _FIXTURE_MODULES[fixture_name].build_epc() expected = getattr(pin, field_name) - # Act - result = Sap10Calculator().calculate(epc) + # Act โ€” these pins are the worksheet's Block-1 (energy-rating) line refs, + # i.e. the UK-average RATING cascade. `Sap10Calculator.calculate` now + # grafts the postcode DEMAND cascade's CO2/PE onto the result (SAP 10.2 + # Appendix U p.124), so the rating-cascade fields are pinned via + # `cert_to_inputs` directly; the demand cascade is pinned separately + # (corpus gauge + simulated case 45 Block-2 pins). + result = calculate_sap_from_inputs(cert_to_inputs(epc)) actual = getattr(result, field_name) # Assert diff --git a/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py b/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py index 1f20aeed..213bf2ff 100644 --- a/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py +++ b/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py @@ -157,6 +157,70 @@ def test_mixed_flat_pitched_roof_does_not_contaminate_pitched_u_value() -> None: assert abs(result.roof_w_per_k - 44.6) <= 2.0 +def test_ground_floor_flat_bills_floor_despite_flat_dwelling_type() -> None: + # Arrange โ€” a ground-floor flat whose dwelling_type the mapper labelled + # "Top-floor flat" (so the dwelling-level exposure heuristic + # `_dwelling_exposure` suppresses the floor on the assumption a heated + # dwelling sits below), but whose building part lodges a "Ground floor" + # floor_type. A ground floor is in contact with the ground (RdSAP 10 + # ยง3.12) -> heat-loss floor. The Elmhurst Summary path lodges this as a + # "Ground floor" floor_type (not the API floor_heat_loss=1 exposed code), + # so without the per-part ground signal the cascade dropped the floor. + # Worksheet-validated by simulated case 45: (28a) = 47.0 ร— U=0.54 = 25.38 + # W/K, billed as 0 before this fix (+7 SAP). + ground = make_building_part( + identifier=BuildingPartIdentifier.MAIN, + construction_age_band="C", + floor_type="Ground floor", + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=47.0, room_height_m=2.4, + heat_loss_perimeter_m=15.8, party_wall_length_m=0.0, floor=0, + ), + ], + ) + epc = make_minimal_sap10_epc( + total_floor_area_m2=47.0, country_code="ENG", + dwelling_type="Top-floor flat", property_type="Flat", + sap_building_parts=[ground], + ) + + # Act + result = heat_transmission_from_cert(epc) + + # Assert โ€” the ground floor carries heat loss (โ‰ˆ 47 ร— 0.54), not 0. + assert result.floor_w_per_k > 20.0 + + +def test_top_floor_flat_with_party_floor_stays_suppressed() -> None: + # Arrange โ€” the contrast: a flat lodging "(another dwelling below)" sits + # over a heated dwelling, so its floor is a party floor with no heat loss + # (RdSAP 10 ยง3). The ground-floor override must NOT fire โ€” proving the + # discriminator is the floor_type, not the flat label. + party = make_building_part( + identifier=BuildingPartIdentifier.MAIN, + construction_age_band="C", + floor_type="To another dwelling below", + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=47.0, room_height_m=2.4, + heat_loss_perimeter_m=15.8, party_wall_length_m=0.0, floor=0, + ), + ], + ) + epc = make_minimal_sap10_epc( + total_floor_area_m2=47.0, country_code="ENG", + dwelling_type="Top-floor flat", property_type="Flat", + sap_building_parts=[party], + ) + + # Act + result = heat_transmission_from_cert(epc) + + # Assert โ€” party floor, no heat loss. + assert result.floor_w_per_k == 0.0 + + def test_part_geometry_floorless_part_honours_full_key_contract() -> None: # Arrange โ€” a building part lodged with NO sap_floor_dimensions (e.g. # a party-wall-only or RR-only extension; observed on 5 certs in a diff --git a/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py b/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py index 6547a585..ce93e21c 100644 --- a/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py +++ b/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py @@ -46,6 +46,7 @@ from tests.domain.sap10_calculator.worksheet import ( _elmhurst_worksheet_001431_case21 as _w001431_case21, _elmhurst_worksheet_001431_case43 as _w001431_case43, _elmhurst_worksheet_001431_case44 as _w001431_case44, + _elmhurst_worksheet_001431_case45 as _w001431_case45, ) @@ -491,6 +492,67 @@ def test_case44_blower_door_pressure_test_matches_pdf() -> None: _pin(vent.effective_monthly_ach[0], 0.5812, "ยง2 (25) Jan case44") +def test_case45_heat_pump_two_climate_cascade_matches_pdf() -> None: + """Simulated case 45 (heat-pump ground-floor flat, postcode W6) is the + 1e-4 oracle for the SAP 10.2 Appendix U (p.124) two-climate-cascade + split. The P960 prints the current dwelling twice: + + * Block 1 ("11a SAP rating / 12a CO2") on UK-AVERAGE weather (region + 0): space heat (98c) 7333.79, SAP (258) 60.5318, CO2 (272) 692.13. + * Block 2 ("EPC COSTS, EMISSIONS AND PRIMARY ENERGY") on POSTCODE + weather (PCDB Table 172, W6): space heat (98c) 5921.05, CO2 (272) + 626.78, primary energy (286) 6581.59. + + The SAP/EI rating reads the rating cascade; the EPC-displayed CO2/PE + read the demand cascade. Pins both ends at abs=1e-4.""" + # Arrange + from domain.sap10_calculator.calculator import calculate_sap_from_inputs + from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_demand_inputs + + epc = _w001431_case45.build_epc() + # The split only exists because the postcode resolves to local weather. + assert local_climate_for_cert(epc) is not None + + # Act โ€” both climate cascades from the one cert. + rating = calculate_sap_from_inputs(cert_to_inputs(epc)) + demand = calculate_sap_from_inputs(cert_to_demand_inputs(epc)) + + # Assert โ€” Block 1 (UK-average rating cascade). + _pin( + rating.space_heating_kwh_per_yr, + _w001431_case45.RATING_SPACE_HEATING_KWH, + "(98c) rating case45", + ) + _pin( + rating.sap_score_continuous, + _w001431_case45.RATING_SAP_CONTINUOUS, + "(258) rating case45", + ) + assert rating.sap_score == _w001431_case45.RATING_SAP_INTEGER + _pin( + rating.co2_kg_per_yr, + _w001431_case45.RATING_CO2_KG_PER_YR, + "(272) rating case45", + ) + + # Assert โ€” Block 2 (postcode demand cascade). + _pin( + demand.space_heating_kwh_per_yr, + _w001431_case45.DEMAND_SPACE_HEATING_KWH, + "(98c) demand case45", + ) + _pin( + demand.co2_kg_per_yr, + _w001431_case45.DEMAND_CO2_KG_PER_YR, + "(272) demand case45", + ) + _pin( + demand.primary_energy_kwh_per_yr, + _w001431_case45.DEMAND_PRIMARY_ENERGY_KWH, + "(286) demand case45", + ) + + def test_case6_main_2_emitter_and_control_extracted() -> None: """Simulated case 6's ยง14.1 Main Heating2 lodges its OWN emitter ("Underfloor Heating") and control ("SAP code 2110, ...") โ€” the two diff --git a/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py b/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py index 9aed0f34..ef933adb 100644 --- a/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py +++ b/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py @@ -30,11 +30,7 @@ from typing import Any import pytest from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from domain.sap10_calculator.calculator import calculate_sap_from_inputs -from domain.sap10_calculator.rdsap.cert_to_inputs import ( - SAP_10_2_SPEC_PRICES, - cert_to_inputs, -) +from domain.sap10_calculator.calculator import Sap10Calculator _CORPUS = Path( "backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl" @@ -119,10 +115,45 @@ _CORPUS = Path( # 100010129331 (roof 110.5 -> 31.3 W/K, +13.1 -> -0.05 SAP). within-0.5 # 68.8% -> 69.5% (MAE 0.888 -> 0.859; PE 13.9 -> 13.6); 3-part cohort 56% -> # 61%. Pinned in test_heat_transmission (by_kind split + no-contamination). -_MIN_WITHIN_HALF_SAP = 0.69 -_MAX_SAP_MAE = 0.86 -_MAX_CO2_MAE_TONNES = 0.30 # t CO2 / yr vs co2_emissions_current -_MAX_PE_PER_M2_MAE = 14.0 # kWh / m2 / yr vs energy_consumption_current +# GROUND-FLOOR FLAT FLOOR EXPOSURE (RdSAP 10 ยง3.12): a ground-floor flat whose +# dwelling_type the mapper labelled "Top-floor flat" had its ground floor (in +# contact with the ground -> heat loss) dropped, because the flat exposure +# heuristic keys on dwelling_type and the Summary path lodges the position as a +# "Ground floor" floor_type (not the API floor_heat_loss=1 code). Treating a +# "ground floor" floor_type as exposed (worksheet-validated to 1e-4 on simulated +# case 45: floor (28a) 0 -> 25.38 W/K, fabric (33) 75.6 -> 101.01) -> 69.5% -> +# 69.7% (MAE 0.859 -> 0.854). Pinned in test_heat_transmission. +# POSTCODE DEMAND CASCADE (SAP 10.2 Appendix U paragraph 1, p.124): the +# CO2/PE over-estimate diagnosed above as "per-cert mapper/demand fidelity" +# was largely a CLIMATE-cascade bug. The SAP/EI rating is computed on +# UK-average weather (Tables U1-U3 region 0), but EPC-displayed energy use, +# CO2 emissions and primary energy use POSTCODE-DISTRICT weather from PCDB +# Table 172 โ€” "other calculations (such as for energy use and costs on EPCs) +# are done using local weather". We were feeding the UK-average demand to all +# three outputs, so warm-region certs (most of England, warmer than the +# UK-average) over-counted heating demand โ†’ CO2/PE high. `Sap10Calculator. +# calculate` now grafts the demand cascade's CO2/PE onto the rating cascade's +# SAP. Across the corpus this moved CO2 MAE 0.26 -> 0.12 t/yr (bias +0.18 -> +# +0.04) and PE MAE 13.6 -> 3.8 kWh/m2/yr (bias +9.0 -> +0.24); SAP unchanged +# (rating cascade). Worksheet-validated to 1e-4 on simulated case 45 (rating +# CO2 692.13; demand CO2 626.78, PE 6581.59). The residual PE/CO2 spread is +# now the genuine per-cert mapper-fidelity tail. +# DUAL-FUEL SECONDARY COLLISION (RdSAP 10 Table 32 / SAP 10.2 Table 12): the +# gov-API lodges fuel enum 9 ("dual fuel, mineral and wood") for a dual-fuel +# room heater, but enum 9 collides with the same-valued Table-32/12 code 9 +# (LPG SC11F), so the price (3.48 vs dual-fuel 3.99 p/kWh) AND the CO2/PE +# factors (LPG 0.241 / 1.163 vs dual fuel 0.087 / 1.049) resolved to LPG โ€” +# the secondary was under-costed (โ†’ SAP over-rate) and over-counted on CO2. +# Canonicalising enum 9 (now in `_GOV_API_COLLISION_FUELS`) on the secondary +# cost + factor paths took within-0.5 69.7% -> 70.2% (MAE 0.854 -> 0.845; +# dual-fuel-secondary cohort 42.9% -> 49.0%, signed +0.55 -> +0.41) and CO2 +# MAE 0.12 -> 0.08 t/yr (bias +0.04 -> 0.00). A prior session deferred enum 9 +# ("direction not understood") while the PE/CO2 lens was confounded by the +# climate-cascade bug (fc7c4d2d); the corrected lens shows the over-rate. +_MIN_WITHIN_HALF_SAP = 0.70 +_MAX_SAP_MAE = 0.85 +_MAX_CO2_MAE_TONNES = 0.09 # t CO2 / yr vs co2_emissions_current +_MAX_PE_PER_M2_MAE = 4.0 # kWh / m2 / yr vs energy_consumption_current def _load_corpus() -> list[dict[str, Any]]: @@ -147,8 +178,12 @@ def test_api_path_sap_accuracy_on_rdsap_21_0_1_corpus( co2_signed_errs_t: list[float] = [] # our โˆ’ lodged, tonnes/yr pe_signed_errs: list[float] = [] # our โˆ’ lodged, kWh/mยฒ/yr skipped = 0 + _calculator = Sap10Calculator() # Act โ€” run the API โ†’ EpcPropertyData โ†’ calculator pipeline per cert. + # `Sap10Calculator.calculate` runs both climate cascades (SAP 10.2 + # Appendix U p.124): the SAP rating on UK-average weather, CO2/PE on + # postcode-district weather โ€” exactly the two figures the EPC lodges. for doc in corpus: lodged_sap = doc.get("energy_rating_current") if lodged_sap is None: @@ -156,9 +191,7 @@ def test_api_path_sap_accuracy_on_rdsap_21_0_1_corpus( continue try: epc = EpcPropertyDataMapper.from_api_response(doc) - result = calculate_sap_from_inputs( - cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) - ) + result = _calculator.calculate(epc) except Exception: # A mapper / calculator raise is a coverage gap tracked elsewhere # (eval_api_sap_accuracy.py); here we gauge the certs that compute.