mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge pull request #1219 from Hestia-Homes/feature/junte+khalim
rdSap 17, 18, 19, 20, now maps to EPCPropertyData
This commit is contained in:
commit
015ab9d17b
58 changed files with 9532 additions and 603 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -303,3 +303,6 @@ backlog/*
|
|||
|
||||
# Local Claude config files
|
||||
.claude/*modelling_cohort.csv
|
||||
|
||||
# Local EPC debug cache (scripts/eon)
|
||||
scripts/eon/epc_cache.pkl
|
||||
|
|
|
|||
|
|
@ -88,13 +88,17 @@ _Avoid_: patches (deprecated), corrections, manual EPC, edits
|
|||
### Modelling
|
||||
|
||||
**Effective EPC**:
|
||||
The assembled `EpcPropertyData` picture the modelling pipeline scores for a single Property. Assembled from whichever source applies: Site Notes alone; or the public EPC with **Landlord Overrides** applied; or — when the EPC is **old** — its schema re-mapped to current and gaps filled from neighbour predictions; or — when there is **no EPC** — components **estimated from surrounding properties**. Carries source-derived physical fields and originally recorded performance values; the performance scored from this picture is held separately in **Baseline Performance**.
|
||||
The assembled `EpcPropertyData` picture the modelling pipeline scores for a single Property. Assembled from whichever source applies: Site Notes alone; or the public EPC with **Landlord Overrides** applied; or — when the EPC is **old** — its schema re-mapped to current via **Reduced-Field Synthesis** (deterministic, from the cert plus calibrated coefficients — no neighbour data); or — when there is **no EPC** — components **estimated from surrounding properties** (a separate neighbour-prediction ML mechanism, not yet implemented). Carries source-derived physical fields and originally recorded performance values; the performance scored from this picture is held separately in **Baseline Performance**.
|
||||
_Avoid_: modelling EPC, working EPC, resolved EPC, derived EPC
|
||||
|
||||
**Rebaselining**:
|
||||
Establishing a Property's **Effective Performance** (SAP score, EPC Band, CO2, Primary Energy Intensity, space-heating & hot-water kWh) by **assembling the Effective EPC picture and scoring it** through **SAP10 Calculation** (the deterministic `Sap10Calculator`, which superseded the old ML-API rebaseliner; an ML residual head over the calculator is future — ADR-0009/0013). The *assembly* is the substance: apply **Landlord Overrides** (e.g. boiler → ASHP, wall insulated) as a simulation on the `EpcPropertyData`; estimate components from surrounding properties when there is no EPC; re-map an old-schema EPC to current and gap-fill from neighbour predictions. The calculator is the **scoring engine at the tail**, not the whole of Rebaselining — so its call lives inside the Rebaseliner, after assembly. Triggered whenever the assembled picture differs from the lodged record: (a) the EPC was lodged under a methodology the calculator supersedes (`sap_version < 10.2`), (b) Overrides / Site Notes changed the physical state (walls / heating / windows / etc.), or (c) the picture is estimated or remapped rather than a real current EPC. Produces Effective Performance; Lodged Performance is preserved unchanged. The same single scoring also yields the per-end-use kWh that **Bill Derivation** prices — one scoring, two products. kWh is an ML target per ADR-0007 — see [[epc-ml-transform]].
|
||||
Establishing a Property's **Effective Performance** (SAP score, EPC Band, CO2, Primary Energy Intensity, space-heating & hot-water kWh) by **assembling the Effective EPC picture and scoring it** through **SAP10 Calculation** (the deterministic `Sap10Calculator`, which superseded the old ML-API rebaseliner; an ML residual head over the calculator is future — ADR-0009/0013). The *assembly* is the substance: apply **Landlord Overrides** (e.g. boiler → ASHP, wall insulated) as a simulation on the `EpcPropertyData`; re-map an old-schema EPC to current via **Reduced-Field Synthesis** (deterministic, cert-only); estimate components from surrounding properties when there is no EPC (neighbour-prediction gap-fill — a separate ML mechanism, not yet implemented). The calculator is the **scoring engine at the tail**, not the whole of Rebaselining — so its call lives inside the Rebaseliner, after assembly. Triggered whenever the assembled picture differs from the lodged record: (a) the EPC was lodged under a methodology the calculator supersedes (`sap_version < 10.2`), (b) Overrides / Site Notes changed the physical state (walls / heating / windows / etc.), or (c) the picture is estimated or remapped rather than a real current EPC. Produces Effective Performance; Lodged Performance is preserved unchanged. The same single scoring also yields the per-end-use kWh that **Bill Derivation** prices — one scoring, two products. kWh is an ML target per ADR-0007 — see [[epc-ml-transform]].
|
||||
_Avoid_: re-scoring, re-prediction, performance recomputation, refresh (for cache-freshness)
|
||||
|
||||
**Reduced-Field Synthesis**:
|
||||
Deterministically translating an **old / reduced-data EPC schema** into the current `EpcPropertyData`, synthesising the *measured* fields the target expects from the source's *reduced or categorical* fields, using only the cert itself plus fixed calibrated coefficients — never neighbour data. Used when re-mapping a **pre-SAP10** cert (e.g. `RdSAP-Schema-20.0.0`) as part of assembling the **Effective EPC**: e.g. a glazing-area *band* + floor area → window m²; bath/shower *room counts* → bath and shower counts. A *best attempt* with no ground truth to validate against (per the **Validation Cohort** rule, a pre-SAP10 cert has no same-spec lodged figure to check), so each synthesis assumption is recorded explicitly in code and tests to keep it debuggable. Distinct from **neighbour-prediction gap-fill** (ML estimation of genuinely-absent fields from surrounding properties — the no-EPC path, a separate mechanism not yet implemented) and from the calculator's own RdSAP Table-5 defaulting in `cert_to_inputs` (which expands `EpcPropertyData` into the full SAP input set downstream).
|
||||
_Avoid_: gap-fill (means the neighbour-ML path), reduced-data expansion (overloaded with the calculator's Table-5 step), remapping (the schema-translation part only)
|
||||
|
||||
**Baseline Performance**:
|
||||
A Property's current performance aggregate, holding both Lodged Performance and Effective Performance plus the energy block: delivered kWh **per end use** (heating, hot water, lighting, appliances, cooking, pumps/fans, cooling) and the **annual bill** composed into per-section costs plus a total, produced by **Bill Derivation** from SAP10 Calculation's per-end-use kWh × current Fuel Rates. Persisted as one row (flat typed columns, per-section kWh + cost + total); surfaced as one block in the UI.
|
||||
_Avoid_: baseline predictions, predicted baseline, rebaselined values
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
# 3.11 required: domain.modelling.measure_type uses enum.StrEnum (added in 3.11).
|
||||
FROM public.ecr.aws/lambda/python:3.11
|
||||
# FROM python:3.11.10-bullseye
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from backend.app.db.models.recommendations import (
|
|||
ScenarioModel,
|
||||
)
|
||||
from domain.modelling.portfolio_goal import PortfolioGoal
|
||||
from tests.utilities.floats import assert_float_matches
|
||||
|
||||
|
||||
def _rec(
|
||||
|
|
@ -83,14 +84,14 @@ def test_aggregation_sums_default_measures_linked_by_plan_id(
|
|||
# Assert — the default measures' sums land on the Scenario row
|
||||
scenario = db_session.query(ScenarioModel).filter_by(id=7).one()
|
||||
assert scenario.cost is not None
|
||||
assert abs(scenario.cost - 1500.0) <= 1e-9 # 1000 + 500
|
||||
assert_float_matches(scenario.cost, 1500.0) # 1000 + 500
|
||||
assert scenario.energy_savings is not None
|
||||
assert abs(scenario.energy_savings - 800.0) <= 1e-9 # Σ kwh_savings
|
||||
assert_float_matches(scenario.energy_savings, 800.0) # Σ kwh_savings
|
||||
assert scenario.energy_cost_savings is not None
|
||||
assert abs(scenario.energy_cost_savings - 200.0) <= 1e-9 # 120 + 80
|
||||
assert_float_matches(scenario.energy_cost_savings, 200.0) # 120 + 80
|
||||
assert scenario.co2_equivalent_savings is not None
|
||||
assert abs(scenario.co2_equivalent_savings - 0.7) <= 1e-9 # 0.5 + 0.2
|
||||
assert_float_matches(scenario.co2_equivalent_savings, 0.7) # 0.5 + 0.2
|
||||
assert scenario.total_work_hours is not None
|
||||
assert abs(scenario.total_work_hours - 8.0) <= 1e-9 # 4 + 4
|
||||
assert_float_matches(scenario.total_work_hours, 8.0) # 4 + 4
|
||||
assert scenario.property_valuation_increase == 2500.0
|
||||
assert scenario.labour_days == 3.0
|
||||
|
|
|
|||
|
|
@ -17,6 +17,11 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# backend.app.db.models.recommendations imports infrastructure.postgres.modelling,
|
||||
# which imports domain.modelling; without these the lambda fails at init with
|
||||
# "No module named 'infrastructure'" / "'domain'".
|
||||
COPY infrastructure/ infrastructure/
|
||||
COPY domain/ domain/
|
||||
|
||||
COPY backend/bulk_address2uprn_combiner/main.py .
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,11 @@ COPY utils/ utils/
|
|||
# NOTE: if build is ever slow we can be more specific with which files are copied
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# backend.app.db.models.recommendations imports infrastructure.postgres.modelling,
|
||||
# which imports domain.modelling; without these the lambda fails at init with
|
||||
# "No module named 'infrastructure'" / "'domain'".
|
||||
COPY infrastructure/ infrastructure/
|
||||
COPY domain/ domain/
|
||||
|
||||
|
||||
# -----------------------------
|
||||
|
|
|
|||
|
|
@ -43,6 +43,10 @@ COPY ./utils/ ./utils/
|
|||
# engine.py -> backend.apis.GoogleSolarApi -> infrastructure.solar; without this
|
||||
# the lambda fails at init with "No module named 'infrastructure'".
|
||||
COPY ./infrastructure/ ./infrastructure/
|
||||
# backend.app.db.models.portfolio re-exports PortfolioGoal from
|
||||
# domain.modelling.portfolio_goal (ADR-0017 amendment); without this the lambda
|
||||
# fails at init with "No module named 'domain'".
|
||||
COPY ./domain/ ./domain/
|
||||
COPY ./etl/epc/ ./etl/epc/
|
||||
COPY ./etl/epc_clean/ ./etl/epc_clean/
|
||||
COPY ./etl/bill_savings/ ./etl/bill_savings/
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
|||
cert_to_demand_inputs,
|
||||
cert_to_inputs,
|
||||
)
|
||||
from tests.utilities.floats import assert_float_matches
|
||||
|
||||
|
||||
_CORPUS_ROOT = (
|
||||
|
|
@ -984,9 +985,13 @@ def test_oil_6_no_room_thermostat_applies_table_4c2_minus_5pp_space_efficiency()
|
|||
|
||||
# Assert — Table 4b 80% winter less the Table 4c(2) -5pp interlock
|
||||
# penalty = 75% (matches worksheet (210)).
|
||||
assert abs(inputs.main_heating_efficiency - 0.75) <= 1e-9, (
|
||||
f"oil 6 space efficiency {inputs.main_heating_efficiency:.4f} "
|
||||
f"!= 0.75 (Table 4b 0.80 - Table 4c(2) 0.05 interlock penalty)"
|
||||
assert_float_matches(
|
||||
inputs.main_heating_efficiency,
|
||||
0.75,
|
||||
msg=(
|
||||
f"oil 6 space efficiency {inputs.main_heating_efficiency:.4f} "
|
||||
f"!= 0.75 (Table 4b 0.80 - Table 4c(2) 0.05 interlock penalty)"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1012,9 +1017,13 @@ def test_oil_6_absent_room_thermostat_applies_table_4f_pump_1_3_multiplier() ->
|
|||
|
||||
# Assert — 41 x 1.3 (circulation pump) + 100 (oil flue fan/pump) =
|
||||
# 153.3 kWh (matches worksheet (231)).
|
||||
assert abs(inputs.pumps_fans_kwh_per_yr - 153.3) <= 1e-9, (
|
||||
f"oil 6 pumps/fans {inputs.pumps_fans_kwh_per_yr:.4f} kWh "
|
||||
f"!= 153.3 (41 x 1.3 absent-room-thermostat pump + 100 oil aux)"
|
||||
assert_float_matches(
|
||||
inputs.pumps_fans_kwh_per_yr,
|
||||
153.3,
|
||||
msg=(
|
||||
f"oil 6 pumps/fans {inputs.pumps_fans_kwh_per_yr:.4f} kWh "
|
||||
f"!= 153.3 (41 x 1.3 absent-room-thermostat pump + 100 oil aux)"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
|||
heat_transmission_section_from_cert,
|
||||
)
|
||||
from domain.sap10_ml.rdsap_uvalues import u_party_wall
|
||||
from tests.utilities.floats import assert_float_matches
|
||||
from tests.domain.sap10_calculator.worksheet import (
|
||||
_elmhurst_worksheet_000474 as _w000474,
|
||||
_elmhurst_worksheet_000477 as _w000477,
|
||||
|
|
@ -1565,8 +1566,8 @@ def test_extension_party_wall_type_read_independently_of_as_main_wall() -> None:
|
|||
f"expected Main=4 (CU, U=0.5) + Ext=0 (Unable, U=0.25), got {party_codes}"
|
||||
)
|
||||
# The two map to different SAP party-wall U-values.
|
||||
assert abs(u_party_wall(4) - 0.5) <= 1e-9
|
||||
assert abs(u_party_wall(0) - 0.25) <= 1e-9
|
||||
assert_float_matches(u_party_wall(4), 0.5)
|
||||
assert_float_matches(u_party_wall(0), 0.25)
|
||||
|
||||
|
||||
def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None:
|
||||
|
|
|
|||
1000
backend/epc_api/json_samples/RdSAP-Schema-17.0/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-17.0/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-17.1/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-17.1/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-18.0/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-18.0/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-19.0/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-19.0/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -19,6 +19,11 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# backend.app.db.models.recommendations imports infrastructure.postgres.modelling,
|
||||
# which imports domain.modelling; without these the lambda fails at init with
|
||||
# "No module named 'infrastructure'" / "'domain'".
|
||||
COPY infrastructure/ infrastructure/
|
||||
COPY domain/ domain/
|
||||
|
||||
# Lambda handler
|
||||
CMD ["backend/ordnanceSurvey/main.handler"]
|
||||
|
|
|
|||
|
|
@ -19,6 +19,11 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# backend.app.db.models.recommendations imports infrastructure.postgres.modelling,
|
||||
# which imports domain.modelling; without these the lambda fails at init with
|
||||
# "No module named 'infrastructure'" / "'domain'".
|
||||
COPY infrastructure/ infrastructure/
|
||||
COPY domain/ domain/
|
||||
|
||||
# Copy the handler
|
||||
COPY backend/postcode_splitter/main.py .
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ from typing import Final, List, Optional, Union
|
|||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
|
||||
|
||||
_API_EXTENSION = re.compile(r"^Extension\s+(\d+)$")
|
||||
|
||||
|
||||
|
|
@ -36,9 +35,7 @@ class BuildingPartIdentifier(Enum):
|
|||
OTHER = "other"
|
||||
|
||||
@classmethod
|
||||
def from_api_string(
|
||||
cls, api_identifier: Optional[str]
|
||||
) -> "BuildingPartIdentifier":
|
||||
def from_api_string(cls, api_identifier: Optional[str]) -> "BuildingPartIdentifier":
|
||||
"""Map a gov-EPC API `BuildingPart.identifier` to its canonical
|
||||
member. "Main Dwelling" → MAIN; "Extension N" → EXTENSION_N
|
||||
(for N in 1..4). `None` (permitted by the 21_0_1 schema) and
|
||||
|
|
@ -76,6 +73,7 @@ class Addendum:
|
|||
Present on ~43% of real RdSAP certs (stone-walls / system-build / a list of
|
||||
numeric improvement codes the assessor wanted to call out).
|
||||
"""
|
||||
|
||||
stone_walls: Optional[bool] = None
|
||||
system_build: Optional[bool] = None
|
||||
addendum_numbers: Optional[List[int]] = None
|
||||
|
|
@ -184,10 +182,12 @@ class SapVentilation:
|
|||
flueless_gas_fires_count: Optional[int] = None
|
||||
ventilation_in_pcdf_database: Optional[bool] = None
|
||||
# SAP10.2 §2 cert lodgements not previously surfaced on this type.
|
||||
sheltered_sides: Optional[int] = None # (19) — cert assessor lodge, 0..4
|
||||
sheltered_sides: Optional[int] = None # (19) — cert assessor lodge, 0..4
|
||||
has_suspended_timber_floor: Optional[bool] = None # (12) gate
|
||||
suspended_timber_floor_sealed: Optional[bool] = None
|
||||
has_draught_lobby: Optional[bool] = None # (13) gate (overrides .draught_lobby for §2 cascade)
|
||||
has_draught_lobby: Optional[bool] = (
|
||||
None # (13) gate (overrides .draught_lobby for §2 cascade)
|
||||
)
|
||||
# SAP 10.2 §2 (17a) — air permeability at 4 Pa from the low-pressure
|
||||
# Pulse pressure test, m³/h per m² of envelope area. When present the
|
||||
# cascade routes (18) via the AP4 formula `0.263 × AP4^0.924 + (8)`.
|
||||
|
|
@ -302,10 +302,11 @@ class PhotovoltaicArray:
|
|||
measured PV configuration; `photovoltaic_supply` carries the fallback
|
||||
`percent_roof_area` estimate when the surveyor could not confirm details.
|
||||
"""
|
||||
|
||||
peak_power: float
|
||||
pitch: int
|
||||
orientation: int
|
||||
overshading: int
|
||||
orientation: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -515,7 +516,9 @@ class SapBuildingPart:
|
|||
floor_u_value_known: Optional[bool] = None
|
||||
|
||||
roof_construction: Optional[int] = None
|
||||
roof_construction_type: Optional[str] = None # str from site notes e.g. "PS Pitched, sloping ceiling"
|
||||
roof_construction_type: Optional[str] = (
|
||||
None # str from site notes e.g. "PS Pitched, sloping ceiling"
|
||||
)
|
||||
roof_insulation_location: Optional[Union[int, str]] = (
|
||||
None # TODO: make enum/mapping?
|
||||
)
|
||||
|
|
@ -592,6 +595,7 @@ class RenewableHeatIncentive:
|
|||
baseline `space_heating_kwh` and `hot_water_kwh` for SAP10 properties (used as ML
|
||||
training targets per ADR-0007).
|
||||
"""
|
||||
|
||||
space_heating_kwh: float
|
||||
water_heating_kwh: float
|
||||
impact_of_loft_insulation_kwh: Optional[float] = None
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -307,6 +307,36 @@ class TestFromRdSapSchema21_0_0:
|
|||
# photovoltaic_supply is None when the measured shape is present
|
||||
assert result.sap_energy_source.photovoltaic_supply is None
|
||||
|
||||
def test_photovoltaic_array_orientation_nd_nulls_only_that_field(self) -> None:
|
||||
# Arrange — a 3-array dwelling where the middle array lodges the RdSAP
|
||||
# 'ND' ("Not Defined") sentinel for orientation. Regression for the
|
||||
# real 21.0.1 cert 5236-4425-7600-0474-2292: 'ND' must null ONLY that
|
||||
# array's orientation, not crash the int() coercion and drop every
|
||||
# array (which happened when 'ND' was handled in the shared
|
||||
# _measurement_value coercer instead of field-scoped _pv_orientation).
|
||||
data = load("21_0_0.json")
|
||||
data["sap_energy_source"]["photovoltaic_supply"] = [
|
||||
[{"pitch": 3, "peak_power": 2.0, "orientation": 3, "overshading": 1}],
|
||||
[{"pitch": 1, "peak_power": 2.0, "orientation": "ND", "overshading": 1}],
|
||||
[{"pitch": 3, "peak_power": 2.0, "orientation": 7, "overshading": 1}],
|
||||
]
|
||||
schema = from_dict(RdSapSchema21_0_0, data)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_rdsap_schema_21_0_0(schema)
|
||||
|
||||
# Assert — all three arrays survive; only the 'ND' orientation is None,
|
||||
# and its sibling fields + the other arrays keep their real values.
|
||||
arrays = result.sap_energy_source.photovoltaic_arrays
|
||||
assert arrays is not None
|
||||
assert len(arrays) == 3
|
||||
assert [a.orientation for a in arrays] == [3, None, 7]
|
||||
nd_array = arrays[1]
|
||||
assert nd_array.orientation is None
|
||||
assert nd_array.peak_power == 2.0
|
||||
assert nd_array.pitch == 1
|
||||
assert nd_array.overshading == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema 21.0.1 (most comprehensive — full field coverage)
|
||||
|
|
@ -1101,3 +1131,805 @@ class TestApiRoofConstructionCode:
|
|||
|
||||
# Assert
|
||||
assert result == "Pitched, sloping ceiling"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema 20.0.0 — Reduced-Field Synthesis (ADR-0027)
|
||||
#
|
||||
# RdSAP 20.0.0 is a pre-SAP10 reduced-data schema: it records as categories or
|
||||
# aggregates the measured fields the calculator needs (a glazed_area *band*, not
|
||||
# window m²; bath/shower *room counts*, not bath counts). The mapper synthesises
|
||||
# the measured form from the cert alone (no neighbour data). Each test name
|
||||
# encodes the synthesis ASSUMPTION it pins, because a pre-SAP10 cert has no
|
||||
# same-spec lodged figure to validate against (Validation-Cohort rule).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CORPUS_20_0_0 = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../../../../backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl",
|
||||
)
|
||||
|
||||
|
||||
def _load_20_0_0_corpus() -> list[Dict[str, Any]]:
|
||||
if not os.path.exists(_CORPUS_20_0_0):
|
||||
return []
|
||||
with open(_CORPUS_20_0_0) as f:
|
||||
return [json.loads(line) for line in f if line.strip()]
|
||||
|
||||
|
||||
class TestRdSap20_0_0ReducedFieldSynthesis:
|
||||
|
||||
def test_cert_omitting_sap_windows_maps_without_missing_required_field(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — 993/1000 corpus certs omit `sap_windows` entirely; the
|
||||
# placeholder schema declared it required, so every one failed to parse.
|
||||
# Required→optional (default []) must let them through.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next((c for c in corpus if "sap_windows" not in c), None)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert omits sap_windows")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
def test_band_normal_synthesises_total_glazing_at_0_148_of_floor_area(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0027 assumption: 20.0.0 lodges only a glazed_area *band*
|
||||
# (1 = Normal), not window m². For Normal, synthesised total glazing =
|
||||
# 0.148 x total_floor_area (the median glazing/floor ratio measured from
|
||||
# the 1000 real 21.0.1 certs). A band-1 cert with no per-window array.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("glazed_area") == 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-1 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — 4 windows (N/E/S/W avg-orientation split), each height 1.0,
|
||||
# total width-sum (= total area, height=1) at 0.148 x TFA.
|
||||
assert len(result.sap_windows) == 4
|
||||
assert all(w.window_height == 1.0 for w in result.sap_windows)
|
||||
assert sorted(w.orientation for w in result.sap_windows) == [1, 3, 5, 7]
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa)
|
||||
|
||||
def test_band_more_than_typical_scales_glazing_by_1_25(self) -> None:
|
||||
# Arrange — ADR-0027: glazed_area band scales the synthesised area off
|
||||
# the Normal ratio. Band 2 ("More than typical") = P75/P50 = 1.25, fit
|
||||
# from the same 21.0.1 ratio distribution as the 0.148 median.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("glazed_area") == 2
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-2 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa * 1.25)
|
||||
|
||||
def test_synthesised_glazing_type_routed_through_cascade(self) -> None:
|
||||
# Arrange — ADR-0027: multiple_glazing_type uses the same code space as
|
||||
# 21.0.1, so route it through `_api_cascade_glazing_type` (as the working
|
||||
# 21.0.1 path does), NOT raw — else the calculator mis-reads code 1
|
||||
# ("double pre-2002") as single. A cert lodging multiple_glazing_type=1.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("multiple_glazing_type") == 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with multiple_glazing_type=1")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — cascade remaps 1 ("DG pre-2002") -> 2 (double), not raw 1.
|
||||
assert all(w.glazing_type == 2 for w in result.sap_windows)
|
||||
|
||||
def test_lighting_counts_incandescent_remainder_and_low_energy_as_lel(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0027: 20.0.0 gives total + low-energy OUTLET counts, not
|
||||
# an LED/CFL/incandescent split. The non-low-energy remainder is
|
||||
# incandescent (else lighting energy is understated for the 439/1000
|
||||
# certs that have any); low-energy → the calculator's LEL path (unknown
|
||||
# LED/CFL split). A cert with some incandescent outlets.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows")
|
||||
and (c.get("fixed_lighting_outlets_count") or 0)
|
||||
> (c.get("low_energy_fixed_lighting_outlets_count") or 0)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with incandescent lighting")
|
||||
total = cert["fixed_lighting_outlets_count"]
|
||||
low = cert["low_energy_fixed_lighting_outlets_count"]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.incandescent_fixed_lighting_bulbs_count == total - low
|
||||
assert result.low_energy_fixed_lighting_bulbs_count == low
|
||||
assert result.led_fixed_lighting_bulbs_count == 0
|
||||
assert result.cfl_fixed_lighting_bulbs_count == 0
|
||||
|
||||
def test_ventilation_maps_chimneys_draughtproofing_and_sheltered_sides(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0027: 20.0.0 lodges open_fireplaces_count (currently
|
||||
# dropped → -80 m³/h/chimney for 53 certs), percent_draughtproofed, and
|
||||
# built_form. Build sap_ventilation with sheltered_sides from built_form
|
||||
# (else the calculator defaults every dwelling to mid-terrace=2). A cert
|
||||
# with an open fireplace.
|
||||
from datatypes.epc.domain.mapper import _api_sheltered_sides # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and (c.get("open_fireplaces_count") or 0) >= 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with an open fireplace")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.open_chimneys_count == cert["open_fireplaces_count"]
|
||||
assert result.percent_draughtproofed == cert["percent_draughtproofed"]
|
||||
assert result.sap_ventilation is not None
|
||||
assert result.sap_ventilation.sheltered_sides == _api_sheltered_sides(
|
||||
cert["built_form"]
|
||||
)
|
||||
|
||||
def test_hot_water_derives_bath_and_mixer_counts_from_room_counts(self) -> None:
|
||||
# Arrange — ADR-0027: 20.0.0's instantaneous_wwhrs carries bath/shower
|
||||
# ROOM counts (a false-friend for the WWHR device index). Derive
|
||||
# number_baths and mixer_shower_count from them so HW demand isn't pinned
|
||||
# to the calculator's modal 1-bath default (496/1000 have ≠1 bath).
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows")
|
||||
and c.get("sap_heating", {}).get("instantaneous_wwhrs")
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with instantaneous_wwhrs")
|
||||
iw = cert["sap_heating"]["instantaneous_wwhrs"]
|
||||
expected_baths = iw["rooms_with_bath_and_or_shower"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
expected_mixers = iw["rooms_with_mixer_shower_no_bath"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.sap_heating.number_baths == expected_baths
|
||||
assert result.sap_heating.mixer_shower_count == expected_mixers
|
||||
|
||||
def test_conservatory_building_part_maps_without_missing_required_field(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0027: 17/1000 certs lodge a conservatory-shaped
|
||||
# sap_building_part carrying only {double_glazed, floor_area,
|
||||
# glazed_perimeter, room_height} — NOT the wall/roof/floor construction
|
||||
# fields. The placeholder schema declared identifier (and the
|
||||
# construction fields) required, so all 17 failed to parse. Following
|
||||
# the 21.0.1 precedent, every SapBuildingPart field is Optional and the
|
||||
# conservatory's effect is carried separately by conservatory_type, so
|
||||
# the all-None part flows through harmlessly.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if any(
|
||||
"identifier" not in part
|
||||
for part in c.get("sap_building_parts", [])
|
||||
)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with a conservatory building part")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
def test_rich_cert_uses_lodged_window_area_for_geometry(self) -> None:
|
||||
# Arrange — ADR-0027: 7/1000 certs DO lodge a per-window sap_windows
|
||||
# array (window_area as a Measurement). Those windows must use their
|
||||
# lodged area as geometry (width = area, height = 1.0) rather than being
|
||||
# synthesised — and must NOT be modelled windowless (width=height=0,
|
||||
# the prior placeholder behaviour for the certs that actually carry the
|
||||
# richest window data).
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next((c for c in corpus if c.get("sap_windows")), None)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert lodging sap_windows")
|
||||
lodged = cert["sap_windows"]
|
||||
expected_total = sum(w["window_area"]["value"] for w in lodged)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — one domain window per lodged window, total glazed area
|
||||
# (width x height, height=1) preserved from the lodged measurement.
|
||||
assert len(result.sap_windows) == len(lodged)
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(expected_total)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RdSAP 18.0 Reduced-Field Synthesis (ADR-0028 — inherit-and-validate). 18.0 is
|
||||
# the same pre-SAP10 reduced family as 20.0.0: glazed_area *band* not window m²,
|
||||
# bath/shower *room counts* not bath counts, lighting OUTLET counts not bulbs.
|
||||
# The mapper synthesises the measured form from the cert alone (no neighbour
|
||||
# data), reusing 20.0.0's coefficients (validated against 18.0's own band-4 rich
|
||||
# certs: observed 0.223 ≈ 0.148 × 1.51). Each test name pins one assumption,
|
||||
# because a pre-SAP10 cert has no same-spec lodged figure (Validation-Cohort).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CORPUS_18_0 = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../../../../backend/epc_api/json_samples/RdSAP-Schema-18.0/corpus.jsonl",
|
||||
)
|
||||
|
||||
|
||||
def _load_18_0_corpus() -> list[Dict[str, Any]]:
|
||||
if not os.path.exists(_CORPUS_18_0):
|
||||
return []
|
||||
with open(_CORPUS_18_0) as f:
|
||||
return [json.loads(line) for line in f if line.strip()]
|
||||
|
||||
|
||||
class TestRdSap18_0ReducedFieldSynthesis:
|
||||
|
||||
def test_cert_dispatches_and_maps_without_missing_required_field(self) -> None:
|
||||
# Arrange — the placeholder 18.0 schema was generated from one example, so
|
||||
# 986/1000 corpus certs fail to parse (over-constrained required fields),
|
||||
# and `from_api_response` never dispatched RdSAP-Schema-18.0 at all.
|
||||
# Dispatch + required→optional must let a real cert through end-to-end.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = corpus[0]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
def test_rich_cert_uses_lodged_window_area_for_geometry(self) -> None:
|
||||
# Arrange — ADR-0028: 10/1000 18.0 certs lodge a per-window sap_windows
|
||||
# array (window_area as a Measurement), all band-4 ("much more glazed").
|
||||
# The placeholder 18.0 schema had NO sap_windows field, so this richest
|
||||
# window data was dropped at parse and the cert modelled windowless. Those
|
||||
# windows must use their lodged area as geometry (width = area, height =
|
||||
# 1.0), not be synthesised.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next((c for c in corpus if c.get("sap_windows")), None)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert lodging sap_windows")
|
||||
lodged = cert["sap_windows"]
|
||||
expected_total = sum(w["window_area"]["value"] for w in lodged)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — one domain window per lodged window, total glazed area
|
||||
# (width x height, height=1) preserved from the lodged measurement.
|
||||
assert len(result.sap_windows) == len(lodged)
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(expected_total)
|
||||
|
||||
def test_band_normal_synthesises_total_glazing_at_0_148_of_floor_area(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0028 (inherit-and-validate): 18.0 lodges only a
|
||||
# glazed_area *band* (1 = Normal, 958/1000), not window m². The inherited
|
||||
# 20.0.0 coefficient — synthesised total glazing = 0.148 x total_floor_area
|
||||
# — is reused unchanged; validated against 18.0's own band-4 rich certs
|
||||
# (observed 0.223 ~ 0.148 x 1.51). A band-1 cert with no per-window array.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("glazed_area") == 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-1 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — 4 windows (N/E/S/W avg-orientation split), each height 1.0,
|
||||
# total width-sum (= total area, height=1) at 0.148 x TFA.
|
||||
assert len(result.sap_windows) == 4
|
||||
assert all(w.window_height == 1.0 for w in result.sap_windows)
|
||||
assert sorted(w.orientation for w in result.sap_windows) == [1, 3, 5, 7]
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa)
|
||||
|
||||
def test_band_more_than_typical_scales_glazing_by_1_25(self) -> None:
|
||||
# Arrange — ADR-0028: the glazed_area band scales synthesised area off the
|
||||
# Normal ratio. Band 2 ("More than typical") = 1.25, the inherited 20.0.0
|
||||
# multiplier (18.0's 26 band-2 windowless certs can't re-fit it — no
|
||||
# measured band-2 windows — so it is reused, not re-derived). A band-2
|
||||
# cert with no per-window array.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("glazed_area") == 2
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-2 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa * 1.25)
|
||||
|
||||
def test_synthesised_glazing_type_routed_through_cascade(self) -> None:
|
||||
# Arrange — ADR-0028: 18.0 multiple_glazing_type shares 20.0.0's code
|
||||
# space (verified vs epc_codes.csv), so route it through the verified
|
||||
# cascade — code 1 ("DG pre-2002") must remap to 2, not be read as single.
|
||||
# A windowless cert lodging multiple_glazing_type=1.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("multiple_glazing_type") == 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with multiple_glazing_type=1")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — cascade remaps 1 ("DG pre-2002") -> 2 (double), not raw 1.
|
||||
assert all(w.glazing_type == 2 for w in result.sap_windows)
|
||||
|
||||
def test_synthesised_glazing_type_handles_not_defined_code(self) -> None:
|
||||
# Arrange — ADR-0028: 69/1000 18.0 certs lodge multiple_glazing_type "ND"
|
||||
# (Not Defined), a string the int-keyed cascade cannot map. The
|
||||
# synthesised window must carry a valid INTEGER glazing_type (treated as
|
||||
# DG-modal, matching the calculator's _G_LIGHT_DEFAULT), never the raw
|
||||
# "ND" string on an int field. A windowless cert lodging "ND".
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("multiple_glazing_type") == "ND"
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no windowless corpus cert with multiple_glazing_type ND")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — every synthesised window has an int glazing_type, not "ND".
|
||||
assert result.sap_windows
|
||||
assert all(isinstance(w.glazing_type, int) for w in result.sap_windows)
|
||||
|
||||
def test_lighting_counts_incandescent_remainder_and_low_energy_as_lel(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0028: 18.0 gives total + low-energy OUTLET counts, not an
|
||||
# LED/CFL/incandescent split (matches 20.0.0). The non-low-energy
|
||||
# remainder is incandescent (else lighting energy is understated); the
|
||||
# low-energy outlets feed the calculator's LEL path. A cert with some
|
||||
# incandescent (non-low-energy) outlets.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if (c.get("fixed_lighting_outlets_count") or 0)
|
||||
> (c.get("low_energy_fixed_lighting_outlets_count") or 0)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with incandescent lighting")
|
||||
total = cert["fixed_lighting_outlets_count"]
|
||||
low = cert["low_energy_fixed_lighting_outlets_count"]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.incandescent_fixed_lighting_bulbs_count == total - low
|
||||
assert result.low_energy_fixed_lighting_bulbs_count == low
|
||||
|
||||
def test_ventilation_maps_chimneys_draughtproofing_and_sheltered_sides(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — ADR-0028: 18.0 lodges open_fireplaces_count (else dropped),
|
||||
# percent_draughtproofed, and built_form. Build sap_ventilation with
|
||||
# sheltered_sides from built_form (else the calculator defaults every
|
||||
# dwelling to mid-terrace=2). A cert with an open fireplace.
|
||||
from datatypes.epc.domain.mapper import _api_sheltered_sides # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and (c.get("open_fireplaces_count") or 0) >= 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with an open fireplace")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.open_chimneys_count == cert["open_fireplaces_count"]
|
||||
assert result.percent_draughtproofed == cert["percent_draughtproofed"]
|
||||
assert result.sap_ventilation is not None
|
||||
assert result.sap_ventilation.sheltered_sides == _api_sheltered_sides(
|
||||
cert["built_form"]
|
||||
)
|
||||
|
||||
def test_hot_water_derives_bath_and_mixer_counts_from_room_counts(self) -> None:
|
||||
# Arrange — ADR-0028: 18.0's instantaneous_wwhrs carries bath/shower ROOM
|
||||
# counts (a false-friend for the WWHR device index), populated 1000/1000.
|
||||
# Derive number_baths and mixer_shower_count so HW demand isn't pinned to
|
||||
# the calculator's modal 1-bath default.
|
||||
corpus = _load_18_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-18.0 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows")
|
||||
and c.get("sap_heating", {}).get("instantaneous_wwhrs")
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with instantaneous_wwhrs")
|
||||
iw = cert["sap_heating"]["instantaneous_wwhrs"]
|
||||
expected_baths = iw["rooms_with_bath_and_or_shower"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
expected_mixers = iw["rooms_with_mixer_shower_no_bath"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert result.sap_heating.number_baths == expected_baths
|
||||
assert result.sap_heating.mixer_shower_count == expected_mixers
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RdSAP 17.1 Reduced-Field Synthesis (ADR-0028 — inherit-and-validate). 17.1 is
|
||||
# the same pre-SAP10 reduced family as 18.0/20.0.0 and reuses the same inherited
|
||||
# 20.0.0 coefficients via the shared `_synthesise_reduced_field_windows` core;
|
||||
# its own band-4 rich certs validate the transfer. Each test name pins one
|
||||
# synthesis assumption (Validation-Cohort rule: no same-spec lodged figure).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CORPUS_17_1 = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../../../../backend/epc_api/json_samples/RdSAP-Schema-17.1/corpus.jsonl",
|
||||
)
|
||||
|
||||
|
||||
def _load_17_1_corpus() -> list[Dict[str, Any]]:
|
||||
if not os.path.exists(_CORPUS_17_1):
|
||||
return []
|
||||
with open(_CORPUS_17_1) as f:
|
||||
return [json.loads(line) for line in f if line.strip()]
|
||||
|
||||
|
||||
class TestRdSap17_1ReducedFieldSynthesis:
|
||||
|
||||
def test_cert_dispatches_and_maps_without_missing_required_field(self) -> None:
|
||||
# Arrange — the placeholder 17.1 schema over-constrains (only 4/1000
|
||||
# parse) and `from_api_response` never dispatched RdSAP-Schema-17.1.
|
||||
# Dispatch + required→optional must let a real cert through end-to-end.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = corpus[0]
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
def test_rich_cert_uses_lodged_window_area_for_geometry(self) -> None:
|
||||
# ADR-0028: 14/1000 17.1 certs lodge a per-window sap_windows array
|
||||
# (band-4); use lodged window_area as geometry, not synthesised.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next((c for c in corpus if c.get("sap_windows")), None)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert lodging sap_windows")
|
||||
lodged = cert["sap_windows"]
|
||||
expected_total = sum(w["window_area"]["value"] for w in lodged)
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert len(result.sap_windows) == len(lodged)
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(expected_total)
|
||||
|
||||
def test_band_normal_synthesises_total_glazing_at_0_148_of_floor_area(
|
||||
self,
|
||||
) -> None:
|
||||
# ADR-0028: band-1 (969/1000) synthesises total glazing = 0.148 x TFA,
|
||||
# the inherited 20.0.0 coefficient (validated vs 17.1's band-4 rich certs).
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(c for c in corpus if not c.get("sap_windows") and c.get("glazed_area") == 1),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-1 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert len(result.sap_windows) == 4
|
||||
assert all(w.window_height == 1.0 for w in result.sap_windows)
|
||||
assert sorted(w.orientation for w in result.sap_windows) == [1, 3, 5, 7]
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa)
|
||||
|
||||
def test_band_more_than_typical_scales_glazing_by_1_25(self) -> None:
|
||||
# ADR-0028: band 2 ("More than typical") = inherited 1.25 multiplier.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(c for c in corpus if not c.get("sap_windows") and c.get("glazed_area") == 2),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no band-2 corpus cert without sap_windows")
|
||||
tfa = float(cert["total_floor_area"])
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
total_area = sum(w.window_width * w.window_height for w in result.sap_windows)
|
||||
assert total_area == pytest.approx(0.148 * tfa * 1.25)
|
||||
|
||||
def test_synthesised_glazing_type_routed_through_cascade(self) -> None:
|
||||
# ADR-0028: multiple_glazing_type shares 20.0.0's code space — route
|
||||
# through the cascade so code 1 ("DG pre-2002") remaps to 2, not single.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("multiple_glazing_type") == 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with multiple_glazing_type=1")
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert all(w.glazing_type == 2 for w in result.sap_windows)
|
||||
|
||||
def test_synthesised_glazing_type_handles_not_defined_code(self) -> None:
|
||||
# ADR-0028: "ND" (56/1000) maps to a valid INTEGER glazing_type (DG-modal),
|
||||
# never the raw string on an int field.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and c.get("multiple_glazing_type") == "ND"
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no windowless corpus cert with multiple_glazing_type ND")
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert result.sap_windows
|
||||
assert all(isinstance(w.glazing_type, int) for w in result.sap_windows)
|
||||
|
||||
def test_lighting_counts_incandescent_remainder_and_low_energy_as_lel(
|
||||
self,
|
||||
) -> None:
|
||||
# ADR-0028: total + low-energy OUTLET counts -> incandescent remainder +
|
||||
# low-energy as LEL.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if (c.get("fixed_lighting_outlets_count") or 0)
|
||||
> (c.get("low_energy_fixed_lighting_outlets_count") or 0)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with incandescent lighting")
|
||||
total = cert["fixed_lighting_outlets_count"]
|
||||
low = cert["low_energy_fixed_lighting_outlets_count"]
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert result.incandescent_fixed_lighting_bulbs_count == total - low
|
||||
assert result.low_energy_fixed_lighting_bulbs_count == low
|
||||
|
||||
def test_ventilation_maps_chimneys_draughtproofing_and_sheltered_sides(
|
||||
self,
|
||||
) -> None:
|
||||
# ADR-0028: open_fireplaces_count -> chimneys, percent_draughtproofed,
|
||||
# sheltered_sides from built_form.
|
||||
from datatypes.epc.domain.mapper import _api_sheltered_sides # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows") and (c.get("open_fireplaces_count") or 0) >= 1
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with an open fireplace")
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert result.open_chimneys_count == cert["open_fireplaces_count"]
|
||||
assert result.percent_draughtproofed == cert["percent_draughtproofed"]
|
||||
assert result.sap_ventilation is not None
|
||||
assert result.sap_ventilation.sheltered_sides == _api_sheltered_sides(
|
||||
cert["built_form"]
|
||||
)
|
||||
|
||||
def test_hot_water_derives_bath_and_mixer_counts_from_room_counts(self) -> None:
|
||||
# ADR-0028: instantaneous_wwhrs ROOM counts -> number_baths/mixer_shower.
|
||||
corpus = _load_17_1_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-17.1 corpus harvested")
|
||||
cert = next(
|
||||
(
|
||||
c
|
||||
for c in corpus
|
||||
if not c.get("sap_windows")
|
||||
and c.get("sap_heating", {}).get("instantaneous_wwhrs")
|
||||
),
|
||||
None,
|
||||
)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert with instantaneous_wwhrs")
|
||||
iw = cert["sap_heating"]["instantaneous_wwhrs"]
|
||||
expected_baths = iw["rooms_with_bath_and_or_shower"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
expected_mixers = iw["rooms_with_mixer_shower_no_bath"] + iw[
|
||||
"rooms_with_bath_and_mixer_shower"
|
||||
]
|
||||
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
assert result.sap_heating.number_baths == expected_baths
|
||||
assert result.sap_heating.mixer_shower_count == expected_mixers
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import CostAmount, DescriptionV1, Measurement
|
||||
|
|
@ -29,6 +29,10 @@ class MainHeatingDetail:
|
|||
main_heating_category: int
|
||||
main_heating_fraction: int
|
||||
main_heating_data_source: int
|
||||
boiler_flue_type: Optional[int] = None
|
||||
fan_flue_present: Optional[str] = None
|
||||
central_heating_pump_age: Optional[int] = None
|
||||
main_heating_index_number: Optional[int] = None
|
||||
sap_main_heating_code: Optional[int] = None
|
||||
|
||||
|
||||
|
|
@ -40,8 +44,13 @@ class SapHeating:
|
|||
instantaneous_wwhrs: Optional[InstantaneousWwhrs]
|
||||
main_heating_details: List[MainHeatingDetail]
|
||||
immersion_heating_type: Union[int, str]
|
||||
cylinder_insulation_type: int
|
||||
has_fixed_air_conditioning: str
|
||||
# ADR-0028: cylinder_insulation_type is absent in 308/1000 17.0 certs.
|
||||
cylinder_insulation_type: Optional[int] = None
|
||||
cylinder_thermostat: Optional[str] = None
|
||||
secondary_fuel_type: Optional[int] = None
|
||||
secondary_heating_type: Optional[int] = None
|
||||
cylinder_insulation_thickness: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -52,7 +61,8 @@ class PhotovoltaicSupplyNoneOrNoDetails:
|
|||
|
||||
@dataclass
|
||||
class PhotovoltaicSupply:
|
||||
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
|
||||
# ADR-0028 data-driven required→optional: 3/1000 omit none_or_no_details.
|
||||
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -69,27 +79,47 @@ class SapFloorDimension:
|
|||
floor: int
|
||||
room_height: Measurement
|
||||
total_floor_area: Measurement
|
||||
party_wall_length: Measurement
|
||||
party_wall_length: Union[Measurement, int]
|
||||
heat_loss_perimeter: Measurement
|
||||
floor_insulation: Optional[int] = None
|
||||
floor_construction: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SapBuildingPart:
|
||||
identifier: str
|
||||
wall_dry_lined: str
|
||||
wall_thickness: int
|
||||
floor_heat_loss: int
|
||||
roof_construction: int
|
||||
wall_construction: int
|
||||
building_part_number: int
|
||||
sap_floor_dimensions: List[SapFloorDimension]
|
||||
wall_insulation_type: int
|
||||
class SapRoomInRoof:
|
||||
"""Room-in-roof details. floor_area is usually a Measurement but some certs
|
||||
lodge a plain number (ADR-0028) — read via `_measurement_value`."""
|
||||
|
||||
floor_area: Union[Measurement, int, float]
|
||||
insulation: str
|
||||
roof_room_connected: str
|
||||
construction_age_band: str
|
||||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
roof_insulation_thickness: str
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class SapBuildingPart:
|
||||
# Data-driven required→optional (ADR-0028): a conservatory-shaped part can
|
||||
# carry only a subset of fields. Every field is Optional (the
|
||||
# 21.0.1/20.0.0/18.0 precedent). 17.0 corpus: 2/1000 omit identifier,
|
||||
# wall_thickness, or roof_insulation_thickness.
|
||||
identifier: Optional[str] = None
|
||||
wall_dry_lined: Optional[str] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
floor_heat_loss: Optional[int] = None
|
||||
roof_construction: Optional[int] = None
|
||||
wall_construction: Optional[int] = None
|
||||
building_part_number: Optional[int] = None
|
||||
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
|
||||
wall_insulation_type: Optional[int] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
party_wall_construction: Optional[Union[int, str]] = None
|
||||
wall_thickness_measured: Optional[str] = None
|
||||
roof_insulation_location: Optional[Union[int, str]] = None
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
floor_insulation_thickness: Optional[str] = None
|
||||
flat_roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -117,15 +147,17 @@ class SuggestedImprovement:
|
|||
environmental_impact_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class AlternativeImprovement:
|
||||
sequence: int
|
||||
typical_saving: CostAmount
|
||||
improvement_type: str
|
||||
improvement_details: ImprovementDetails
|
||||
improvement_category: int
|
||||
energy_performance_rating: int
|
||||
environmental_impact_rating: int
|
||||
# ADR-0028: some certs lodge a reduced alternative-improvement shape (only
|
||||
# improvement_details/-type). Parse-only — every field is Optional.
|
||||
sequence: Optional[int] = None
|
||||
typical_saving: Optional[CostAmount] = None
|
||||
improvement_type: Optional[str] = None
|
||||
improvement_details: Optional[ImprovementDetails] = None
|
||||
improvement_category: Optional[int] = None
|
||||
energy_performance_rating: Optional[int] = None
|
||||
environmental_impact_rating: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -137,6 +169,20 @@ class RenewableHeatIncentive:
|
|||
|
||||
|
||||
@dataclass
|
||||
class SapWindow:
|
||||
"""Per-window geometry. ADR-0028: only 10/1000 17.0 certs lodge this array;
|
||||
window_area arrives as a Measurement and is read via `_measurement_value`.
|
||||
Mirrors the 20.0.0/18.0 SapWindow shape. This is the per-spec Validation
|
||||
Cohort — its lodged geometry is used directly, never synthesised over."""
|
||||
|
||||
orientation: int
|
||||
window_area: float
|
||||
window_type: int
|
||||
glazing_type: int
|
||||
window_location: int
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema17_0:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -152,7 +198,8 @@ class RdSapSchema17_0:
|
|||
built_form: int
|
||||
door_count: int
|
||||
glazed_area: int
|
||||
glazing_gap: str
|
||||
# ADR-0028: glazing_gap lodged as int, str, or omitted (482/1000) — widen.
|
||||
glazing_gap: Optional[Union[int, str]] = None
|
||||
region_code: int
|
||||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
|
|
@ -161,7 +208,9 @@ class RdSapSchema17_0:
|
|||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
dwelling_type: DescriptionV1
|
||||
# ADR-0028: 182/1000 lodge dwelling_type as a plain str, not a localised
|
||||
# DescriptionV1 object. Widen so both shapes parse.
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -174,11 +223,13 @@ class RdSapSchema17_0:
|
|||
transaction_type: int
|
||||
conservatory_type: int
|
||||
heated_room_count: int
|
||||
pvc_window_frames: str
|
||||
# ADR-0028: missing in 343/1000 — widen + default.
|
||||
pvc_window_frames: Optional[str] = None
|
||||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
# ADR-0028: present in only 95/1000 — default to empty.
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -190,14 +241,17 @@ class RdSapSchema17_0:
|
|||
energy_rating_current: int
|
||||
lighting_cost_current: CostAmount
|
||||
main_heating_controls: List[EnergyElement]
|
||||
multiple_glazing_type: int
|
||||
# ADR-0028: int code (1-7) or the string "ND" (Not Defined, 54/1000) — widen
|
||||
# so both parse; the synthesis maps "ND" to a default.
|
||||
multiple_glazing_type: Union[int, str]
|
||||
open_fireplaces_count: int
|
||||
has_hot_water_cylinder: str
|
||||
# ADR-0028: a handful of 17.0 certs omit these boolean flags — default them.
|
||||
has_hot_water_cylinder: Optional[str] = None
|
||||
heating_cost_potential: CostAmount
|
||||
hot_water_cost_current: CostAmount
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: CostAmount
|
||||
|
|
@ -205,7 +259,7 @@ class RdSapSchema17_0:
|
|||
hot_water_cost_potential: CostAmount
|
||||
renewable_heat_incentive: RenewableHeatIncentive
|
||||
energy_consumption_current: int
|
||||
has_fixed_air_conditioning: str
|
||||
has_fixed_air_conditioning: Optional[str] = None
|
||||
multiple_glazed_proportion: int
|
||||
calculation_software_version: str
|
||||
energy_consumption_potential: int
|
||||
|
|
@ -213,10 +267,14 @@ class RdSapSchema17_0:
|
|||
fixed_lighting_outlets_count: int
|
||||
current_energy_efficiency_band: str
|
||||
environmental_impact_potential: int
|
||||
has_heated_separate_conservatory: str
|
||||
has_heated_separate_conservatory: Optional[str] = None
|
||||
potential_energy_efficiency_band: str
|
||||
co2_emissions_current_per_floor_area: int
|
||||
low_energy_fixed_lighting_outlets_count: int
|
||||
sap_flat_details: Optional[SapFlatDetails] = None
|
||||
address_line_2: Optional[str] = None
|
||||
alternative_improvements: Optional[List[AlternativeImprovement]] = None
|
||||
# ADR-0028: additive — the placeholder schema omitted sap_windows entirely.
|
||||
# The 10 rich certs use lodged window_area directly; the windowless majority
|
||||
# synthesise from the glazed_area band.
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import CostAmount, DescriptionV1, Measurement
|
||||
|
|
@ -36,7 +36,7 @@ class MainHeatingDetail:
|
|||
sap_main_heating_code: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class SapHeating:
|
||||
cylinder_size: int
|
||||
water_heating_code: int
|
||||
|
|
@ -44,8 +44,9 @@ class SapHeating:
|
|||
instantaneous_wwhrs: Optional[InstantaneousWwhrs]
|
||||
main_heating_details: List[MainHeatingDetail]
|
||||
immersion_heating_type: Union[int, str]
|
||||
cylinder_insulation_type: int
|
||||
has_fixed_air_conditioning: str
|
||||
# ADR-0028: 325/1000 omit cylinder_insulation_type — default it.
|
||||
cylinder_insulation_type: Optional[int] = None
|
||||
cylinder_thermostat: Optional[str] = None
|
||||
secondary_fuel_type: Optional[int] = None
|
||||
secondary_heating_type: Optional[int] = None
|
||||
|
|
@ -60,7 +61,7 @@ class PhotovoltaicSupplyNoneOrNoDetails:
|
|||
|
||||
@dataclass
|
||||
class PhotovoltaicSupply:
|
||||
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
|
||||
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -84,23 +85,26 @@ class SapFloorDimension:
|
|||
floor_construction: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class SapBuildingPart:
|
||||
identifier: str
|
||||
wall_dry_lined: str
|
||||
wall_thickness: int
|
||||
floor_heat_loss: int
|
||||
roof_construction: int
|
||||
wall_construction: int
|
||||
building_part_number: int
|
||||
sap_floor_dimensions: List[SapFloorDimension]
|
||||
wall_insulation_type: int
|
||||
construction_age_band: str
|
||||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
# ADR-0028: 17/1000 lodge a conservatory-shaped part with none of the
|
||||
# construction fields. Every field is Optional (the 18.0/20.0.0 precedent);
|
||||
# the all-None part flows through harmlessly.
|
||||
identifier: Optional[str] = None
|
||||
wall_dry_lined: Optional[str] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
floor_heat_loss: Optional[int] = None
|
||||
roof_construction: Optional[int] = None
|
||||
wall_construction: Optional[int] = None
|
||||
building_part_number: Optional[int] = None
|
||||
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
|
||||
wall_insulation_type: Optional[int] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
party_wall_construction: Optional[Union[int, str]] = None
|
||||
wall_thickness_measured: Optional[str] = None
|
||||
roof_insulation_location: Optional[Union[int, str]] = None
|
||||
# Can be a thickness string (e.g. "100mm") or 0 for uninsulated flat roofs
|
||||
roof_insulation_thickness: Union[str, int]
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
|
||||
|
||||
|
|
@ -129,15 +133,17 @@ class SuggestedImprovement:
|
|||
environmental_impact_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class AlternativeImprovement:
|
||||
sequence: int
|
||||
typical_saving: CostAmount
|
||||
improvement_type: str
|
||||
improvement_details: ImprovementDetails
|
||||
improvement_category: int
|
||||
energy_performance_rating: int
|
||||
environmental_impact_rating: int
|
||||
# ADR-0028: parse-only (the mapper does not read alternative_improvements);
|
||||
# a reduced shape lodges only some fields, so every field is Optional.
|
||||
sequence: Optional[int] = None
|
||||
typical_saving: Optional[CostAmount] = None
|
||||
improvement_type: Optional[str] = None
|
||||
improvement_details: Optional[ImprovementDetails] = None
|
||||
improvement_category: Optional[int] = None
|
||||
energy_performance_rating: Optional[int] = None
|
||||
environmental_impact_rating: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -149,6 +155,19 @@ class RenewableHeatIncentive:
|
|||
|
||||
|
||||
@dataclass
|
||||
class SapWindow:
|
||||
"""Per-window geometry. ADR-0028: only 14/1000 17.1 certs lodge this array
|
||||
(all band-4); window_area arrives as a Measurement and is read via
|
||||
`_measurement_value`. The extra pvc_frame/glazing_gap keys are ignored."""
|
||||
|
||||
orientation: int
|
||||
window_area: float
|
||||
window_type: int
|
||||
glazing_type: int
|
||||
window_location: int
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema17_1:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -164,7 +183,8 @@ class RdSapSchema17_1:
|
|||
built_form: int
|
||||
door_count: int
|
||||
glazed_area: int
|
||||
glazing_gap: str
|
||||
# ADR-0028: lodged as int, str, or omitted (478/1000) — widen + default.
|
||||
glazing_gap: Optional[Union[int, str]] = None
|
||||
region_code: int
|
||||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
|
|
@ -173,7 +193,8 @@ class RdSapSchema17_1:
|
|||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
dwelling_type: DescriptionV1
|
||||
# ADR-0028: 259/1000 lodge dwelling_type as a plain str, not DescriptionV1.
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -186,11 +207,11 @@ class RdSapSchema17_1:
|
|||
transaction_type: int
|
||||
conservatory_type: int
|
||||
heated_room_count: int
|
||||
pvc_window_frames: str
|
||||
pvc_window_frames: Optional[str] = None
|
||||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -202,14 +223,15 @@ class RdSapSchema17_1:
|
|||
energy_rating_current: int
|
||||
lighting_cost_current: CostAmount
|
||||
main_heating_controls: List[EnergyElement]
|
||||
multiple_glazing_type: int
|
||||
# ADR-0028: int code (1-7) or "ND" (Not Defined, 56/1000) — widen.
|
||||
multiple_glazing_type: Union[int, str]
|
||||
open_fireplaces_count: int
|
||||
has_hot_water_cylinder: str
|
||||
heating_cost_potential: CostAmount
|
||||
hot_water_cost_current: CostAmount
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: CostAmount
|
||||
|
|
@ -232,3 +254,6 @@ class RdSapSchema17_1:
|
|||
sap_flat_details: Optional[SapFlatDetails] = None
|
||||
address_line_2: Optional[str] = None
|
||||
alternative_improvements: Optional[List[AlternativeImprovement]] = None
|
||||
# ADR-0028: additive — the placeholder schema omitted sap_windows, dropping
|
||||
# the 14 rich certs' lodged geometry. Default [] = windowless (synthesised).
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import CostAmount, DescriptionV1, Measurement
|
||||
|
|
@ -60,7 +60,7 @@ class PhotovoltaicSupplyNoneOrNoDetails:
|
|||
|
||||
@dataclass
|
||||
class PhotovoltaicSupply:
|
||||
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
|
||||
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -85,30 +85,38 @@ class SapFloorDimension:
|
|||
|
||||
@dataclass
|
||||
class SapRoomInRoof:
|
||||
"""Room-in-roof details. floor_area is a Measurement object in schema 18.0."""
|
||||
"""Room-in-roof details. floor_area is usually a Measurement object in 18.0,
|
||||
but 6/1000 certs lodge it as a plain number (ADR-0028) — read via
|
||||
`_measurement_value`, which coerces both shapes."""
|
||||
|
||||
floor_area: Measurement
|
||||
floor_area: Union[Measurement, int, float]
|
||||
insulation: str
|
||||
roof_room_connected: str
|
||||
construction_age_band: str
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class SapBuildingPart:
|
||||
identifier: str
|
||||
wall_dry_lined: str
|
||||
wall_thickness: int
|
||||
floor_heat_loss: int
|
||||
roof_construction: int
|
||||
wall_construction: int
|
||||
building_part_number: int
|
||||
sap_floor_dimensions: List[SapFloorDimension]
|
||||
wall_insulation_type: int
|
||||
construction_age_band: str
|
||||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
roof_insulation_thickness: Union[str, int]
|
||||
# Data-driven required→optional (ADR-0028): 17/1000 certs lodge a
|
||||
# conservatory-shaped part carrying only {double_glazed, floor_area,
|
||||
# glazed_perimeter, room_height} — none of the construction fields. Every
|
||||
# field is Optional (the 21.0.1/20.0.0 precedent); the all-None part flows
|
||||
# through harmlessly because the conservatory's effect is carried separately
|
||||
# by conservatory_type.
|
||||
identifier: Optional[str] = None
|
||||
wall_dry_lined: Optional[str] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
floor_heat_loss: Optional[int] = None
|
||||
roof_construction: Optional[int] = None
|
||||
wall_construction: Optional[int] = None
|
||||
building_part_number: Optional[int] = None
|
||||
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
|
||||
wall_insulation_type: Optional[int] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
party_wall_construction: Optional[Union[int, str]] = None
|
||||
wall_thickness_measured: Optional[str] = None
|
||||
roof_insulation_location: Optional[Union[int, str]] = None
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
floor_insulation_thickness: Optional[str] = None
|
||||
|
|
@ -140,15 +148,18 @@ class SuggestedImprovement:
|
|||
environmental_impact_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class AlternativeImprovement:
|
||||
sequence: int
|
||||
typical_saving: CostAmount
|
||||
improvement_type: str
|
||||
improvement_details: ImprovementDetails
|
||||
improvement_category: int
|
||||
energy_performance_rating: int
|
||||
environmental_impact_rating: int
|
||||
# ADR-0028: 165/1000 lodge a reduced alternative-improvement shape (only
|
||||
# improvement_details/-type). Parse-only — the mapper does not read
|
||||
# alternative_improvements — so every field is Optional.
|
||||
sequence: Optional[int] = None
|
||||
typical_saving: Optional[CostAmount] = None
|
||||
improvement_type: Optional[str] = None
|
||||
improvement_details: Optional[ImprovementDetails] = None
|
||||
improvement_category: Optional[int] = None
|
||||
energy_performance_rating: Optional[int] = None
|
||||
environmental_impact_rating: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -160,6 +171,19 @@ class RenewableHeatIncentive:
|
|||
|
||||
|
||||
@dataclass
|
||||
class SapWindow:
|
||||
"""Per-window geometry. ADR-0028: only 10/1000 18.0 certs lodge this array
|
||||
(all band-4); window_area arrives as a Measurement and is read via
|
||||
`_measurement_value`. Mirrors the 20.0.0 SapWindow shape."""
|
||||
|
||||
orientation: int
|
||||
window_area: float
|
||||
window_type: int
|
||||
glazing_type: int
|
||||
window_location: int
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema18_0:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -175,8 +199,9 @@ class RdSapSchema18_0:
|
|||
built_form: int
|
||||
door_count: int
|
||||
glazed_area: int
|
||||
# glazing_gap is an integer in 18.0 (e.g. 12 mm), unlike 17.x where it was a string
|
||||
glazing_gap: int
|
||||
# ADR-0028: glazing_gap is lodged as int (e.g. 12 mm), str ("16+"), or
|
||||
# omitted across the corpus (433/1000) — widen + default, not int-required.
|
||||
glazing_gap: Optional[Union[int, str]] = None
|
||||
region_code: int
|
||||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
|
|
@ -185,7 +210,9 @@ class RdSapSchema18_0:
|
|||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
dwelling_type: DescriptionV1
|
||||
# ADR-0028: 392/1000 lodge dwelling_type as a plain str, not a localised
|
||||
# DescriptionV1 object (matches 20.0.0). Widen so both shapes parse.
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -198,11 +225,11 @@ class RdSapSchema18_0:
|
|||
transaction_type: int
|
||||
conservatory_type: int
|
||||
heated_room_count: int
|
||||
pvc_window_frames: str
|
||||
pvc_window_frames: Optional[str] = None
|
||||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -214,14 +241,16 @@ class RdSapSchema18_0:
|
|||
energy_rating_current: int
|
||||
lighting_cost_current: CostAmount
|
||||
main_heating_controls: List[EnergyElement]
|
||||
multiple_glazing_type: int
|
||||
# ADR-0028: lodged as an int code (1-7) or the string "ND" (Not Defined,
|
||||
# 69/1000) — widen so both parse; the synthesis maps "ND" to a default.
|
||||
multiple_glazing_type: Union[int, str]
|
||||
open_fireplaces_count: int
|
||||
has_hot_water_cylinder: str
|
||||
heating_cost_potential: CostAmount
|
||||
hot_water_cost_current: CostAmount
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: CostAmount
|
||||
|
|
@ -244,3 +273,8 @@ class RdSapSchema18_0:
|
|||
sap_flat_details: Optional[SapFlatDetails] = None
|
||||
address_line_2: Optional[str] = None
|
||||
alternative_improvements: Optional[List[AlternativeImprovement]] = None
|
||||
# ADR-0028: additive — the placeholder schema omitted sap_windows entirely,
|
||||
# silently dropping the 10 rich certs' lodged per-window geometry. Capture it
|
||||
# so the mapper can use lodged window_area directly (default [] = windowless,
|
||||
# synthesised from the glazed_area band).
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import CostAmount, DescriptionV1, Measurement
|
||||
|
|
@ -60,7 +60,9 @@ class PhotovoltaicSupplyNoneOrNoDetails:
|
|||
|
||||
@dataclass
|
||||
class PhotovoltaicSupply:
|
||||
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
|
||||
# ADR-0028 data-driven required→optional: the photovoltaic_supply block can
|
||||
# arrive without its none_or_no_details child (matches 18.0).
|
||||
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -85,28 +87,37 @@ class SapFloorDimension:
|
|||
|
||||
@dataclass
|
||||
class SapRoomInRoof:
|
||||
floor_area: Measurement
|
||||
"""Room-in-roof details. floor_area is usually a Measurement object but some
|
||||
certs lodge it as a plain number (ADR-0028, as in 18.0) — read via
|
||||
`_measurement_value`, which coerces both shapes."""
|
||||
|
||||
floor_area: Union[Measurement, int, float]
|
||||
insulation: str
|
||||
roof_room_connected: str
|
||||
construction_age_band: str
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class SapBuildingPart:
|
||||
identifier: str
|
||||
wall_dry_lined: str
|
||||
wall_thickness: int
|
||||
floor_heat_loss: int
|
||||
roof_construction: int
|
||||
wall_construction: int
|
||||
building_part_number: int
|
||||
sap_floor_dimensions: List[SapFloorDimension]
|
||||
wall_insulation_type: int
|
||||
construction_age_band: str
|
||||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
roof_insulation_thickness: Union[str, int]
|
||||
# Data-driven required→optional (ADR-0028): a conservatory-shaped part can
|
||||
# carry only a subset of fields (none of the construction fields). Every
|
||||
# field is Optional (the 21.0.1/20.0.0/18.0 precedent); the sparse part flows
|
||||
# through harmlessly. 19.0 corpus: 6/1000 omit roof_insulation_thickness,
|
||||
# 2/1000 omit identifier.
|
||||
identifier: Optional[str] = None
|
||||
wall_dry_lined: Optional[str] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
floor_heat_loss: Optional[int] = None
|
||||
roof_construction: Optional[int] = None
|
||||
wall_construction: Optional[int] = None
|
||||
building_part_number: Optional[int] = None
|
||||
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
|
||||
wall_insulation_type: Optional[int] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
party_wall_construction: Optional[Union[int, str]] = None
|
||||
wall_thickness_measured: Optional[str] = None
|
||||
roof_insulation_location: Optional[Union[int, str]] = None
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
floor_insulation_thickness: Optional[str] = None
|
||||
|
|
@ -145,15 +156,18 @@ class SuggestedImprovement:
|
|||
environmental_impact_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(kw_only=True)
|
||||
class AlternativeImprovement:
|
||||
sequence: int
|
||||
typical_saving: CostAmount
|
||||
improvement_type: str
|
||||
improvement_details: ImprovementDetails
|
||||
improvement_category: int
|
||||
energy_performance_rating: int
|
||||
environmental_impact_rating: int
|
||||
# ADR-0028: some certs lodge a reduced alternative-improvement shape (only
|
||||
# improvement_details/-type). Parse-only — the mapper does not read
|
||||
# alternative_improvements — so every field is Optional.
|
||||
sequence: Optional[int] = None
|
||||
typical_saving: Optional[CostAmount] = None
|
||||
improvement_type: Optional[str] = None
|
||||
improvement_details: Optional[ImprovementDetails] = None
|
||||
improvement_category: Optional[int] = None
|
||||
energy_performance_rating: Optional[int] = None
|
||||
environmental_impact_rating: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -165,6 +179,20 @@ class RenewableHeatIncentive:
|
|||
|
||||
|
||||
@dataclass
|
||||
class SapWindow:
|
||||
"""Per-window geometry. ADR-0028: only 6/1000 19.0 certs lodge this array;
|
||||
window_area arrives as a Measurement and is read via `_measurement_value`.
|
||||
Mirrors the 20.0.0/18.0 SapWindow shape. This is the per-spec Validation
|
||||
Cohort — its lodged geometry is used directly, never synthesised over."""
|
||||
|
||||
orientation: int
|
||||
window_area: float
|
||||
window_type: int
|
||||
glazing_type: int
|
||||
window_location: int
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema19_0:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -180,6 +208,9 @@ class RdSapSchema19_0:
|
|||
built_form: int
|
||||
door_count: int
|
||||
glazed_area: int
|
||||
# ADR-0028: glazing_gap is lodged as int (162/1000), str (357/1000), or
|
||||
# omitted (481/1000) — widen + default, not int-required.
|
||||
glazing_gap: Optional[Union[int, str]] = None
|
||||
region_code: int
|
||||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
|
|
@ -188,7 +219,9 @@ class RdSapSchema19_0:
|
|||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
dwelling_type: DescriptionV1
|
||||
# ADR-0028: 503/1000 lodge dwelling_type as a plain str, not a localised
|
||||
# DescriptionV1 object (matches 20.0.0/18.0). Widen so both shapes parse.
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -201,11 +234,13 @@ class RdSapSchema19_0:
|
|||
transaction_type: int
|
||||
conservatory_type: int
|
||||
heated_room_count: int
|
||||
pvc_window_frames: str
|
||||
# ADR-0028: missing in 314/1000 — widen + default.
|
||||
pvc_window_frames: Optional[str] = None
|
||||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
# ADR-0028: present in only 35/1000 — default to empty.
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -217,21 +252,24 @@ class RdSapSchema19_0:
|
|||
energy_rating_current: int
|
||||
lighting_cost_current: CostAmount
|
||||
main_heating_controls: List[EnergyElement]
|
||||
multiple_glazing_type: int
|
||||
# ADR-0028: lodged as an int code (1-7) or the string "ND" (Not Defined,
|
||||
# 50/1000) — widen so both parse; the synthesis maps "ND" to a default.
|
||||
multiple_glazing_type: Union[int, str]
|
||||
open_fireplaces_count: int
|
||||
has_hot_water_cylinder: str
|
||||
heating_cost_potential: CostAmount
|
||||
hot_water_cost_current: CostAmount
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: CostAmount
|
||||
schema_version_original: str
|
||||
hot_water_cost_potential: CostAmount
|
||||
renewable_heat_incentive: RenewableHeatIncentive
|
||||
windows_transmission_details: WindowsTransmissionDetails
|
||||
# 19.0-specific block, absent in 713/1000 — Optional + default.
|
||||
windows_transmission_details: Optional[WindowsTransmissionDetails] = None
|
||||
energy_consumption_current: int
|
||||
has_fixed_air_conditioning: str
|
||||
multiple_glazed_proportion: int
|
||||
|
|
@ -247,5 +285,9 @@ class RdSapSchema19_0:
|
|||
low_energy_fixed_lighting_outlets_count: int
|
||||
sap_flat_details: Optional[SapFlatDetails] = None
|
||||
address_line_2: Optional[str] = None
|
||||
glazing_gap: Optional[Union[str, int]] = None
|
||||
alternative_improvements: Optional[List[AlternativeImprovement]] = None
|
||||
# ADR-0028: additive — the placeholder schema omitted sap_windows entirely,
|
||||
# silently dropping the 6 rich certs' lodged per-window geometry. Capture it
|
||||
# so the mapper can use lodged window_area directly (default [] = windowless,
|
||||
# synthesised from the glazed_area band).
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -1,20 +1,24 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import Measurement
|
||||
from .common import DescriptionV1, Measurement
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnergyElement:
|
||||
# description is a plain string in schema 20.0.0 onwards (no longer a localised object)
|
||||
description: str
|
||||
# ADR-0027: the corpus lodges description as EITHER a plain str OR a
|
||||
# localised {value,language} dict (DescriptionV1) — not str-only as a
|
||||
# one-example placeholder assumed. Union so _coerce builds the right one.
|
||||
description: Union[str, DescriptionV1]
|
||||
energy_efficiency_rating: int
|
||||
environmental_efficiency_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class Addendum:
|
||||
addendum_numbers: List[int]
|
||||
# ADR-0027: an addendum block can lodge only stone_walls/system_build flags
|
||||
# with no numbers list → optional.
|
||||
addendum_numbers: List[int] = field(default_factory=list)
|
||||
stone_walls: Optional[str] = None
|
||||
system_build: Optional[str] = None
|
||||
|
||||
|
|
@ -68,7 +72,9 @@ class PhotovoltaicSupplyNoneOrNoDetails:
|
|||
|
||||
@dataclass
|
||||
class PhotovoltaicSupply:
|
||||
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
|
||||
# ADR-0027: a photovoltaic_supply block can lodge measured-array detail
|
||||
# instead of the none_or_no_details summary → optional (absent on ~10 certs).
|
||||
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -122,19 +128,26 @@ class SapAlternativeWall:
|
|||
|
||||
@dataclass
|
||||
class SapBuildingPart:
|
||||
identifier: str
|
||||
wall_dry_lined: str
|
||||
floor_heat_loss: int
|
||||
roof_construction: int
|
||||
wall_construction: int
|
||||
building_part_number: int
|
||||
sap_floor_dimensions: List[SapFloorDimension]
|
||||
wall_insulation_type: int
|
||||
construction_age_band: str
|
||||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
roof_insulation_thickness: Union[str, int]
|
||||
# ADR-0027: 17/1000 certs lodge a CONSERVATORY-shaped building part carrying
|
||||
# only {double_glazed, floor_area, glazed_perimeter, room_height} — none of
|
||||
# the wall/roof/floor construction fields below. Following the 21.0.1
|
||||
# precedent every field is Optional, so a conservatory part parses to an
|
||||
# all-None SapBuildingPart; its thermal effect is carried separately by the
|
||||
# cert-level conservatory_type, so the empty part flows through harmlessly.
|
||||
identifier: Optional[str] = None
|
||||
wall_dry_lined: Optional[str] = None
|
||||
floor_heat_loss: Optional[int] = None
|
||||
roof_construction: Optional[int] = None
|
||||
wall_construction: Optional[int] = None
|
||||
building_part_number: Optional[int] = None
|
||||
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
|
||||
wall_insulation_type: Optional[int] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
party_wall_construction: Optional[Union[int, str]] = None
|
||||
wall_thickness_measured: Optional[str] = None
|
||||
roof_insulation_location: Optional[Union[int, str]] = None
|
||||
# ADR-0027: absent on 254/1506 building parts (flat-roof / no-loft) → optional.
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
|
|
@ -194,7 +207,12 @@ class RenewableHeatIncentive:
|
|||
impact_of_solid_wall_insulation: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
# ADR-0027: 20.0.0 is a reduced-data schema generated from a single example, so
|
||||
# it over-constrains — fields the corpus routinely omits were declared required,
|
||||
# failing 993/1000 certs at parse. Required→optional is data-driven (any field
|
||||
# present in <100% of the corpus gets a default); `kw_only=True` lifts the
|
||||
# dataclass non-default-after-default ordering rule so defaults can sit inline.
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema20_0_0:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -214,13 +232,14 @@ class RdSapSchema20_0_0:
|
|||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
sap_version: float
|
||||
sap_windows: List[SapWindow]
|
||||
# ADR-0027: 993/1000 omit this; synthesised by Reduced-Field Synthesis.
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
schema_type: str
|
||||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
# dwelling_type is a plain string in schema 20.0.0 onwards
|
||||
dwelling_type: str
|
||||
# ADR-0027: mixed str / localised-dict in the corpus (see EnergyElement).
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -236,7 +255,7 @@ class RdSapSchema20_0_0:
|
|||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -252,24 +271,25 @@ class RdSapSchema20_0_0:
|
|||
open_fireplaces_count: int
|
||||
heating_cost_potential: float
|
||||
hot_water_cost_current: float
|
||||
insulated_door_u_value: float
|
||||
insulated_door_u_value: Optional[float] = None
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: float
|
||||
schema_version_original: str
|
||||
hot_water_cost_potential: float
|
||||
renewable_heat_incentive: RenewableHeatIncentive
|
||||
windows_transmission_details: WindowsTransmissionDetails
|
||||
# ADR-0027: cert-level U/g present in 687/1000; Table-24 default otherwise.
|
||||
windows_transmission_details: Optional[WindowsTransmissionDetails] = None
|
||||
energy_consumption_current: int
|
||||
multiple_glazed_proportion: int
|
||||
calculation_software_version: str
|
||||
energy_consumption_potential: int
|
||||
environmental_impact_current: int
|
||||
fixed_lighting_outlets_count: int
|
||||
multiple_glazed_proportion_nr: Optional[str]
|
||||
multiple_glazed_proportion_nr: Optional[str] = None
|
||||
current_energy_efficiency_band: str
|
||||
environmental_impact_potential: int
|
||||
potential_energy_efficiency_band: str
|
||||
|
|
|
|||
101
docs/adr/0027-rdsap-20-0-0-reduced-field-synthesis.md
Normal file
101
docs/adr/0027-rdsap-20-0-0-reduced-field-synthesis.md
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
---
|
||||
Status: accepted
|
||||
---
|
||||
|
||||
# The pre-SAP10 (RdSAP 20.0.0) mapper does best-attempt Reduced-Field Synthesis
|
||||
|
||||
Decided in a `/grill-me` → `/grill-with-docs` session (2026-06-10). Instantiates and extends
|
||||
[ADR-0015](0015-mappers-own-cert-normalization.md) (mappers own cert normalization); sits inside the
|
||||
**old-schema re-map** half of **Rebaselining** ([CONTEXT.md](../../CONTEXT.md): _Effective EPC_,
|
||||
_Rebaselining_, _Reduced-Field Synthesis_); relates to [ADR-0004](0004-baseline-performance-lodged-effective-pair.md)
|
||||
(lodged-vs-effective pair) and [ADR-0009](0009-deterministic-sap-calculator.md)/[ADR-0013](0013-calculator-produces-effective-performance-shadow-first.md)
|
||||
(the deterministic calculator that scores the result). Resume notes:
|
||||
[docs/grill-sessions/2026-06-09-rdsap-20-0-0-remapper.md](../grill-sessions/2026-06-09-rdsap-20-0-0-remapper.md).
|
||||
|
||||
## Context
|
||||
|
||||
`RdSAP-Schema-20.0.0` is a **pre-SAP10** schema (RdSAP 2012). Its certs are historical
|
||||
(2021–2024 bulk lodgements, harvested offline — the gov EPC API only returns 21.0.x), and we need
|
||||
them re-mapped to the current `EpcPropertyData` so they can be **Rebaselined**: assembled → scored by
|
||||
`Sap10Calculator` → **Effective Performance**. Per the **Validation Cohort** rule
|
||||
([CONTEXT.md](../../CONTEXT.md): _Spec Version_, _Calculated SAP10 Performance_), a pre-SAP10 cert
|
||||
has **no same-spec lodged figure to validate against** — the lodged 20.0.0 score is preserved as
|
||||
**Lodged Performance** but is *not* a 1:1 comparison target. So the calculator's output simply *is*
|
||||
the Effective Performance for these properties; there is no ground truth to check the mapping against.
|
||||
|
||||
The problem: `EpcPropertyData` (and the calculator behind it) expects **measured** fields that
|
||||
20.0.0 records only **categorically**, or not at all:
|
||||
|
||||
- **Windows** — 20.0.0 lodges a `glazed_area` *band* (Normal / More / Less, 945/1000 = Normal) and
|
||||
dwelling-level aggregates, **not** per-window m² (only 7/1000 carry a `sap_windows` array). The
|
||||
calculator needs `width × height` per window for heat-transmission and per-orientation solar gain.
|
||||
An empty `sap_windows=[]` does **not** crash — it silently models a windowless dwelling (zero solar
|
||||
gain, zero window heat loss), which is the worst outcome for a score that drives bills and packages.
|
||||
- **Hot water** — 20.0.0 lodges bath/shower *room counts*, not `number_baths` / `mixer_shower_count`.
|
||||
- **Lighting** — outlet counts + a low-energy count, not per-bulb-type counts.
|
||||
- **Ventilation / chimneys / sheltered sides** — partial or coded differently.
|
||||
|
||||
The placeholder `RdSapSchema20_0_0` (generated from a single example) also over-constrains: 993/1000
|
||||
certs fail to even parse because fields the corpus routinely omits (`sap_windows`,
|
||||
`windows_transmission_details`, `lzc_energy_sources`, many `SapBuildingPart` fields) are declared
|
||||
required.
|
||||
|
||||
Three ways to fill the measured fields were genuinely on the table:
|
||||
|
||||
1. **Leave them empty/zero** — type-safe ingest only. Rejected: silently corrupts the score
|
||||
(windowless dwellings; under-counted baths; under-stated lighting for 439/1000 certs).
|
||||
2. **Neighbour-prediction gap-fill** — ML-estimate from surrounding properties. This is a *separate*
|
||||
mechanism, **not yet implemented**, reserved for the no-EPC case. Out of scope here.
|
||||
3. **Reduced-Field Synthesis** — deterministically synthesize the measured fields from the cert's own
|
||||
reduced/categorical fields plus fixed coefficients. **Chosen.**
|
||||
|
||||
## Decision
|
||||
|
||||
The 20.0.0 mapper produces a *complete* `EpcPropertyData` by **Reduced-Field Synthesis** — using the
|
||||
cert alone plus fixed, corpus-calibrated coefficients, never neighbour data — so the existing
|
||||
`Sap10Calculator` runs unchanged. The calculator + its tests are the acceptance criterion; the mapper
|
||||
owns all synthesis (extending ADR-0015 from code-normalization to reduced-field synthesis).
|
||||
|
||||
Load-bearing, surprising-without-context choices (the reason this is an ADR):
|
||||
|
||||
- **Window area** = `0.148 × total_floor_area × band_multiplier`. The `0.148` (median) and the band
|
||||
multipliers `{Normal 1.00, More 1.25, Less 0.81, MuchMore 1.51, MuchLess 0.62}` are fit from the
|
||||
**glazing-area ÷ floor-area ratio of all 1000 real 21.0.1 certs** (quartiles: P25 0.12 / P50 0.148 /
|
||||
P75 0.185 / P90 0.224), which the band labels map onto. Chosen over the published **RdSAP 2012**
|
||||
band→m² formula because that spec is retired and not in our possession (RdSAP10 / SAP10.2 both
|
||||
*measure* windows and dropped it), whereas our own corpus is data we hold and can validate against a
|
||||
held-out split. The 7 rich certs use their lodged `window_area` directly.
|
||||
- **Orientation** — 20.0.0 records none, so the synthesized area is **split 4-way across N/E/S/W**;
|
||||
the unchanged `solar_gains.py` then averages them (the avg-orientation treatment). The spec's literal
|
||||
default for unrecorded orientation is **E/W** (RdSAP10 §7 / §8.2); 4-way lands within ~3% of that and
|
||||
was chosen for the "distribute it" intuition. (The prior calculator behaviour — *skip* unknown
|
||||
orientation → zero solar gain — is a downward bias we are removing.)
|
||||
- **Window geometry representation** — each synthesized window is `width = area/4, height = 1.0`
|
||||
(width×height is the only quantity the calculator reads; exact, matching existing Elmhurst precedent).
|
||||
- **Everything the calculator already defaults, the mapper leaves to it.** `cert_to_inputs` is the
|
||||
RdSAP Table-5 expansion engine (extract-fans from age+rooms, suspended-timber sealing, draught-lobby,
|
||||
modal hot-water defaults). The mapper supplies raw reduced data only and does **not** re-derive these.
|
||||
- **Schema parse fix** — data-driven required→optional: any field present in <100% of the corpus
|
||||
becomes `Optional` (`[]` for the list fields, `None` otherwise), so all 1000 certs parse.
|
||||
|
||||
Because there is no ground truth (per the Validation-Cohort rule), **every synthesis assumption is
|
||||
recorded explicitly in code comments and test names**, so a future debugger can see exactly which
|
||||
coefficient or default produced a surprising Effective Performance.
|
||||
|
||||
## Consequences
|
||||
|
||||
- **Every 20.0.0 property's Effective Performance depends on these coefficients.** Changing `0.148`,
|
||||
the band multipliers, or the 4-way split shifts every rebaselined 20.0.0 score — and any bill /
|
||||
package built on it. That is the "hard to reverse" cost; it is why they live in one named place with
|
||||
their derivation recorded, not scattered as magic numbers.
|
||||
- **Synthesis is a best attempt, not validated.** We cannot close the loop against a lodged SAP10
|
||||
figure for these certs. Revisit if either (a) the **RdSAP 2012** band→m² formula is sourced (cross-check
|
||||
/ replace the corpus fit), or (b) a same-spec **Validation Cohort** becomes available.
|
||||
- **Fidelity ceilings are accepted and documented:** 20.0.0 cannot give per-orientation glazing
|
||||
(single averaged treatment), cannot distinguish roof windows from wall windows (all treated as wall),
|
||||
and approximates 1 lighting outlet ≈ 1 bulb. These are inherent to the reduced schema, not bugs.
|
||||
- **Neighbour-prediction gap-fill stays out.** If a future slice wants to improve a synthesized field
|
||||
from surrounding properties, that is the separate (unimplemented) ML mechanism and a new ADR — not a
|
||||
tweak to this deterministic path.
|
||||
- The corpus test flips from `xfail` to a strict 1000/1000 **parse + scores-without-crashing** guard;
|
||||
it is a *mapper-correctness* vehicle, **not** a lodged-vs-effective accuracy check.
|
||||
106
docs/adr/0028-pre-sap10-rdsap-family-coefficient-transfer.md
Normal file
106
docs/adr/0028-pre-sap10-rdsap-family-coefficient-transfer.md
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
---
|
||||
Status: accepted
|
||||
---
|
||||
|
||||
# Pre-SAP10 RdSAP coefficients transfer across the family: inherit-and-validate, starting with 18.0
|
||||
|
||||
Decided in a `/grill-me` session (2026-06-11). **Extends** [ADR-0027](0027-rdsap-20-0-0-reduced-field-synthesis.md)
|
||||
(RdSAP 20.0.0 Reduced-Field Synthesis) from a single spec to the wider **pre-SAP10 RdSAP family**;
|
||||
sits inside the **old-schema re-map** half of **Rebaselining** ([CONTEXT.md](../../CONTEXT.md):
|
||||
_Effective EPC_, _Rebaselining_, _Reduced-Field Synthesis_, _Validation Cohort_, _Spec Version_).
|
||||
Relates to [ADR-0015](0015-mappers-own-cert-normalization.md) (mappers own cert normalization) and
|
||||
[ADR-0004](0004-baseline-performance-lodged-effective-pair.md) (lodged-vs-effective pair). Grill spec:
|
||||
[docs/grill-sessions/2026-06-10-pre-sap10-mapper-generalization.md](../grill-sessions/2026-06-10-pre-sap10-mapper-generalization.md).
|
||||
|
||||
## Context
|
||||
|
||||
ADR-0027 proved Reduced-Field Synthesis end-to-end for `RdSAP-Schema-20.0.0`. The pre-SAP10 RdSAP
|
||||
family has more orphaned siblings (`19.0`, `18.0`, `17.1`, `17.0`) whose mapper methods exist but are
|
||||
unreachable (`from_api_response` never dispatches to them) and whose placeholder schemas over-constrain
|
||||
identically. We want each re-mapped to the current `EpcPropertyData` so its historical certs can be
|
||||
**Rebaselined**. This ADR records the *family-level* coefficient decision; `18.0` is the first instance
|
||||
and the worked example. (Order set by direction 2026-06-11: **18.0 alone, end-to-end, first**; `17.1`
|
||||
is a separate later effort.)
|
||||
|
||||
ADR-0027 left one question open for the rest of the family: do later pre-SAP10 specs **reuse** 0027's
|
||||
fitted coefficients (`0.148 × total_floor_area × band_multiplier`, multipliers
|
||||
`{Normal 1.00, More 1.25, Less 0.81, MuchMore 1.51, MuchLess 0.62}`), or **re-fit** per spec? The
|
||||
initial direction (2026-06-10) was *re-fit from each new corpus's own data — do not inherit by default*.
|
||||
Profiling the harvested `18.0` corpus (1000 certs from `certificates-2018.json`, ~82% of that dump)
|
||||
showed why a literal re-fit is **not achievable**, and — more usefully — that it is **not necessary**:
|
||||
|
||||
- **The corpus cannot self-fit the glazing/floor ratio.** A reduced schema records `glazed_area` as a
|
||||
*band*, not per-window m². `18.0`'s population is **958/1000 band-1 (Normal)**, and only **10/1000**
|
||||
carry a lodged `sap_windows` array at all. So there is no measured glazing column to regress on for
|
||||
the band that dominates the stock — the exact constraint ADR-0027 anticipated.
|
||||
- **The 10 rich certs are systematically the outliers, not a representative sample.** They are
|
||||
**9× band-4 ("Much More Than Typical") + 1× band-5 ("Much Less")**, with **zero band-1**. The
|
||||
dwellings that bother to lodge full per-window geometry are the unusually-glazed ones. A "fit" off
|
||||
these would measure band-4 dwellings, then dividing by the band-4 multiplier (1.51) only reconstructs
|
||||
`0.148` — circular.
|
||||
- **Where the corpus *can* be measured, it reproduces ADR-0027's model almost exactly:**
|
||||
|
||||
| Band | 18.0 observed glazing/floor (n) | ADR-0027 predicts (`0.148 × mult`) |
|
||||
|------|---------------------------------|------------------------------------|
|
||||
| 4 (MuchMore, ×1.51) | **0.223** (n=9) | **0.223** |
|
||||
| 5 (MuchLess, ×0.62) | **0.086** (n=1) | **0.092** |
|
||||
|
||||
So the new corpus's own data **validates** the inherited coefficients rather than contradicting them.
|
||||
- **Integer code spaces are identical.** `built_form`, `glazed_area`, `glazed_type`, and
|
||||
`mechanical_ventilation` were diffed against `datatypes/epc/domain/epc_codes.csv` for
|
||||
`18.0` / `17.1` / `20.0.0` / `21.0.1`: byte-identical for every code the corpus uses (`glazed_type`
|
||||
1-8 + ND; `built_form` 1-6 + NR; `glazed_area` 1-5 + ND). The cert-side codes never reach 21.0.1's
|
||||
later extensions. So the verified 21.0.1 glazing/sheltered-sides cascades apply verbatim — no per-spec
|
||||
override.
|
||||
|
||||
## Decision
|
||||
|
||||
For the pre-SAP10 RdSAP family, **inherit ADR-0027's coefficients and validate the transfer per spec —
|
||||
do not re-fit by default.** Concretely, for `18.0` (and as the rule for `17.x`/`19.0`):
|
||||
|
||||
- **Reuse `0.148` and the band multipliers unchanged.** The corpus structurally cannot self-fit them
|
||||
(96% band-1, zero measured band-1 windows), and where it can be measured it reproduces the inherited
|
||||
model to within rounding. Re-fit a spec **only if** its own rich certs contradict the inherited model;
|
||||
`18.0` does not.
|
||||
- **The rich certs are a per-spec Validation Cohort, not a fit set.** Their lodged `window_area` is used
|
||||
**directly** as geometry (the accuracy-where-we-have-it rule from ADR-0027 — synthesise only over the
|
||||
windowless majority, never over real measured data). For `18.0` that is 10 certs direct, 990
|
||||
synthesised.
|
||||
- **Route through the existing verified cascades verbatim** (glazing-type, sheltered-sides), per the
|
||||
code-space diff above.
|
||||
- **Schema parse fix = ADR-0027's mechanism plus one additive change.** (a) `@dataclass(kw_only=True)` +
|
||||
data-driven required→optional: any field present in <100% of the corpus gets a default (`[]` for
|
||||
lists, `None` otherwise) — for `18.0` that is `lzc_energy_sources`, `glazing_gap`
|
||||
(`Optional[Union[int, str]]` — the corpus lodges str, int, **and** absent), `pvc_window_frames`, and
|
||||
scattered `SapBuildingPart` / `AlternativeImprovement` / `PhotovoltaicSupply` fields; this takes the
|
||||
parse rate from 14/1000 to 1000/1000. (b) **Add a `sap_windows` field** — the placeholder `18.0`
|
||||
schema omits it entirely, so without this the 10 rich certs' lodged geometry is silently dropped at
|
||||
parse time, defeating the direct-use rule.
|
||||
|
||||
Because there is still no same-spec ground truth (**Validation Cohort** rule), every synthesis
|
||||
assumption is recorded in code comments + test names, exactly as ADR-0027 requires.
|
||||
|
||||
## Consequences
|
||||
|
||||
- **The coefficients are now shared across specs.** Changing `0.148`, a band multiplier, or the 4-way
|
||||
orientation split moves **every** rebaselined 20.0.0 **and** 18.0 score (and any 17.x/19.0 that later
|
||||
joins). The blast radius of ADR-0027's named-constant block grew; that is the cost of transfer and the
|
||||
reason the constants stay in one place with their derivation recorded.
|
||||
- **The transfer is validated, not the absolute fit.** The band-4 match (0.223 obs vs pred) confirms the
|
||||
*model shape* carries from 21.0.1-era stock to 2018-era stock; it does not independently establish the
|
||||
base ratio for band-1, which remains inherited. Revisit if (a) the retired **RdSAP 2012** band→m²
|
||||
formula is sourced, or (b) a same-spec Validation Cohort becomes available.
|
||||
- **No cross-spec anchor exists in the current corpora.** A dual-lodged UPRN (same dwelling certified
|
||||
under two specs) would let two re-scores cross-check, but the year-capped corpora have **zero** UPRN
|
||||
overlap (18.0∩20.0.0 = 0). A true anchor would have to be *manufactured* via a targeted dual-lodged
|
||||
harvest (scan the 2018 and 2022 dumps for shared UPRNs) — deferred, not part of landing 18.0.
|
||||
- **Acceptance bar matches 20.0.0 (ADR-0027):** the corpus test promotes `RdSAP-Schema-18.0` into the
|
||||
strict **parse + map** guard (1000/1000 return `EpcPropertyData`); it does **not** assert calculator
|
||||
scores. Scoring is spot-checked manually via `scripts/eon/find_epc_data.py`; the formal score-value
|
||||
test stays deferred. Expect wider lodged-vs-recalc deltas than 20.0.0 — the lodged 18.0 figure is on
|
||||
an older SAP version, so it is Lodged Performance, not a target.
|
||||
- **Synthesis stays copied for the first instance; the shared helper is deferred.** `18.0` adapts
|
||||
ADR-0027's synthesis inline (one new instance). The shared, spec-parameterised
|
||||
`_synthesise_reduced_field_windows` is extracted when `17.1` lands (the second instance), pulling
|
||||
20.0.0 + 18.0 + 17.1 through one coefficient block — avoiding abstraction from a single example while
|
||||
preventing three divergent copies.
|
||||
214
docs/grill-sessions/2026-06-09-rdsap-20-0-0-remapper.md
Normal file
214
docs/grill-sessions/2026-06-09-rdsap-20-0-0-remapper.md
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
# Grill session — RdSAP-Schema-20.0.0 → EpcPropertyData remapper
|
||||
|
||||
**Date:** 2026-06-09 (resumed 2026-06-10) · **Branch:** `feature/junte+khalim` · **Status:** IMPLEMENTED — Reduced-Field Synthesis shipped via TDD (ADR-0027). All 1000 corpus certs parse, map, and score; corpus guard flipped to strict.
|
||||
|
||||
Resume by re-running `/grill-me` and feeding it this file.
|
||||
|
||||
---
|
||||
|
||||
## IMPLEMENTED (2026-06-10 TDD session)
|
||||
|
||||
Shipped per ADR-0027 as vertical TDD slices (one test → one impl), commits tagged 🟩:
|
||||
|
||||
- **Schema parse fix** — `RdSapSchema20_0_0` made `kw_only` with data-driven required→optional defaults; `SapBuildingPart` fields all Optional (conservatory parts carry only `{double_glazed, floor_area, glazed_perimeter, room_height}`, no `identifier`). 993→1000/1000 parse.
|
||||
- **Window synthesis** — `_synthesise_20_0_0_sap_windows`: 4-way N/E/S/W split, `width = 0.148 × TFA × band_multiplier / 4`, `height = 1.0`, glazing via `_api_cascade_glazing_type`. The 7 rich certs use lodged `window_area` (`_measurement_value`) for geometry.
|
||||
- **Lighting** — low-energy outlets → LEL path; remainder → incandescent.
|
||||
- **Ventilation** — `open_fireplaces_count` → `open_chimneys_count`; `percent_draughtproofed` passthrough; `sheltered_sides` from `built_form`.
|
||||
- **Hot water** — `number_baths` / `mixer_shower_count` derived from `instantaneous_wwhrs` room counts.
|
||||
- **Corpus guard** — `test_mapper_corpus.py`: 20.0.0 promoted into `SUPPORTED`, xfail dropped → strict **2000/2000** (1000 21.0.1 + 1000 20.0.0) parse+map. Verified all 1000 also score through `Sap10Calculator` without crashing.
|
||||
|
||||
**Deliberately left to the calculator (ADR-0027 "leave defaults to the calculator"):**
|
||||
- `wet_rooms_count = 0` → calculator's `>0 else 1` fallback (only consumed on the decentralised-MEV path).
|
||||
- Window **U / g** — calculator defaults U from `glazing_type` (Table 24) when `window_transmission_details` is absent; verified scoring works without the mapper supplying it.
|
||||
- `mechanical_ventilation` decode (5 MEV certs) — not yet decoded to `mechanical_ventilation_kind`; calculator's natural-ventilation default applies.
|
||||
|
||||
**Deferred (user):** formal SAP-score validation test ("we'll do a sap calculation test later"). Scoring is proven crash-free for all 1000; the score-value guard is the next slice when picked up.
|
||||
|
||||
**Pre-existing failure (NOT a regression):** `TestFromRdSapSchema21_0_1::test_total_floor_area` fails on `main`-state too (confirmed via `git stash`).
|
||||
|
||||
---
|
||||
|
||||
## RESOLVED in 2026-06-10 grill
|
||||
|
||||
**Spec sources (authoritative):** RdSAP10 Specification (9 June 2025) + **SAP 10.2** (already in repo) for anything RdSAP10 doesn't cover. The band→m² glazing rule exists in *neither* (both measure all windows); it was a RdSAP-2012-only convention — hence Q2 below resolved by fitting from our own data.
|
||||
|
||||
**Q1 — bar for "correct".** RESOLVED → **(a) full SAP cascade parity.** Use case is a *counterfactual spec-change predictor*: take 2021–24 lodged 20.0.0 data, hold the building constant, run our 21.0.1 `sap10_calculator` to estimate "what EPC today, no changes" (replaces a surveyor visit). ⇒ mapping-fidelity matters (empty/zero windows would silently corrupt solar+heat-loss and conflate "spec changed" with "we mangled the windows"). Empty `sap_windows` rules out (b)/(c).
|
||||
|
||||
**Deliverable framing.** The job is the **mapper** (`from_rdsap_schema_20_0_0`), not the calculator. Make it produce a *complete* `EpcPropertyData` so the existing `sap10_calculator` runs unchanged. Calculator + its tests = the spec / acceptance criterion. Mapper owns ALL synthesis.
|
||||
|
||||
**Windows (gap B) — fully specified:**
|
||||
- **Area:** `window_area = 0.148 × total_floor_area × band_multiplier`. The `0.148` = median glazing/floor ratio measured from all 1000 real 21.0.1 certs (mean 0.155; ~constant across dwelling sizes 0.141–0.156, so a flat proportional rule is sound). Band 1 ("Normal") = ×1.0 and covers 945/1000 certs. The 7 rich certs (have `sap_windows`) use their lodged `window_area` directly.
|
||||
- **Band multipliers:** RESOLVED → `{1:1.00, 2:1.25, 3:0.81, 4:1.51, 5:0.62, ND:1.00}`, derived from the 21.0.1 ratio-distribution quartiles (Normal=P50, More=P75/P50, Less=P25/P50, MuchMore=P90/P50, MuchLess=P10/P50) — same source as the 0.148. Interpretation (human band → population quartile) but defensible; 55/1000 certs, low impact. ADR-worthy.
|
||||
- **Orientation:** 20.0.0 records none → **split the synthesized area 4-way across N/E/S/W** (codes 1/3/5/7) so the *unchanged* `solar_gains.py` averages them (avg-orientation treatment; current code skips unknown-orientation → zero solar gain, which we're removing). Spec's literal default for unrecorded orientation is E/W (§7 + §8.2/Table 25 conservatory note); 4-way ≈ that average within ~3%, chosen for the "distribute it" intuition.
|
||||
- **Representation:** each of the 4 windows = `SapWindow(window_width = area/4, window_height = 1.0)`. width×height is the ONLY thing the calculator reads (verified: solar_gains/internal_gains/heat_transmission/ML all use the product); `width=area, height=1.0` is exact and matches existing precedent at `mapper.py:4448-4456` (Elmhurst path).
|
||||
- **U / g:** use `windows_transmission_details` (u_value, solar_transmittance) where present (687/1000); else default from **RdSAP10 Table 24** keyed by `multiple_glazing_type` + `glazing_gap` + age band (313/1000). Frame factor 0.7 PVC/wood, 0.8 metal.
|
||||
- **Glazing type:** RESOLVED → 20.0.0 `glazed_type` codes 1–8+ND are IDENTICAL to 21.0.1's (epc_codes.csv), so route through the existing `_api_cascade_glazing_type` verbatim: `glazing_type = _api_cascade_glazing_type(multiple_glazing_type)` for the 993, `_api_cascade_glazing_type(w.glazing_type)` for the 7. This (a) fixes a real bug — current mapper raw-passes the code, so the calculator mis-reads code 1 "double pre-2002" as single (62 certs); (b) needs no extension despite the cascade only remapping 1→2, because g⊥ comes from the per-window `window_transmission_details.solar_transmittance` (lodged for 687, Table-24-synthesized for 313) — so `glazing_type` only feeds g_L daylight, where the cascade is correct for every code present (code-5 single never appears; secondary g_L=0.80 ✓). `ND` is a string → calculator defaults g⊥ to 0.76 (double) naturally.
|
||||
|
||||
**Non-window gaps — spec resolutions found (RdSAP10 tables), most likely already in `sap10_calculator`; verify which the calculator applies vs. the mapper must supply:**
|
||||
- Hot-water demand (bath/shower counts): RESOLVED → derive from `instantaneous_wwhrs` room counts. NAME TRAP: domain `InstantaneousWwhrs` = WWHR *device* index numbers (correctly empty for 20.0.0); the 20.0.0 *schema* `instantaneous_wwhrs` carries *room counts* (`rooms_with_bath_and_or_shower`, `rooms_with_mixer_shower_no_bath`, `rooms_with_bath_and_mixer_shower`). Calculator reads `sap_heating.number_baths`/`mixer_shower_count`/`electric_shower_count` (modal-default 1 bath/1 mixer/0 electric when None — `cert_to_inputs.py:4613-4660`). Present in 1000/1000; **496/1000 have ≠1 bath**, so modal default systematically under-counts HW demand for multi-bath homes. Map: `number_baths = rooms_with_bath_and_or_shower + rooms_with_bath_and_mixer_shower`; `mixer_shower_count = rooms_with_mixer_shower_no_bath + rooms_with_bath_and_mixer_shower`; `electric_shower_count=0`. IMPL CAVEAT: confirm `rooms_with_bath_and_or_shower` means "has a bath" (not shower-only) against RdSAP data dictionary before trusting the bath arithmetic — bounded risk.
|
||||
- `region_code`: NOT a gap — calculator discards it (`cert_to_inputs.py:1409 _region_index`) and uses **UK-average climate for the SAP rating** per RdSAP §14. No mapping needed for the score; only relevant for postcode-accurate *costs* (future, secondary).
|
||||
- `wet_rooms_count`: calculator reads it raw (defaults to 1 when 0; `cert_to_inputs.py:606`), does NOT derive it. Only feeds mechanical-extract fan counts → irrelevant for 995 natural certs, matters for 5 MEV. → mapper derives from habitable rooms (RdSAP Table 5). Currently hardcoded 0.
|
||||
- `sap_roof_windows`: calculator None-safe (`... or []`). 20.0.0 has no rooflight signal → set None; all synthesized glazing = wall (vertical) windows. Minor fidelity ceiling. ADR-note.
|
||||
- `blocked_chimneys_count`: domain `Optional[int]=None`, calc treats None→0. No 20.0.0 source → leave None.
|
||||
- Energy scores/costs (gap E): cert OUTPUTS, Optional on domain, calculator RECOMPUTES (not inputs). → map 1:1 as the **comparison baseline** (lodged old-20.0.0 vs our recomputed 21.0.1-spec score). Zero calc impact.
|
||||
- `percent_draughtproofed`: schema has it → populate. EASY WIN.
|
||||
- `wet_rooms_count`: RdSAP10 Table 5 → derived from habitable rooms (1–2→K+1, 3–4→K+2, …).
|
||||
- `sheltered_sides`: Table 5 → from built form (0 detached, 1 semi/end, 3 enclosed-mid, 2 else).
|
||||
- extract fans / draught lobby / infiltration: Table 5 → from age band + rooms + built form.
|
||||
- living area, cylinder size/insulation, door area (1.85 m²): Tables 27/28/29, §3.7 defaults.
|
||||
- Ventilation: RESOLVED. **Key finding — the CALCULATOR is the RdSAP-expansion engine**: it applies Table-5 defaults internally (extract-fans from age+rooms via `_rdsap_extract_fans_default`, suspended-timber sealing, draught-lobby default), so the mapper supplies raw reduced-data only and must NOT re-derive them. Fixes/construction:
|
||||
- `open_chimneys_count = schema.open_fireplaces_count` (currently hardcoded 0 → drops 80 m³/h/chimney for 53 certs). BUG FIX.
|
||||
- `percent_draughtproofed = schema.percent_draughtproofed` (1000 certs present, currently unset). EASY WIN.
|
||||
- `sap_ventilation = SapVentilation(sheltered_sides=_api_sheltered_sides(schema.built_form), mechanical_ventilation_kind=<decode>)`. All flue/fan/vent counts left None → calculator defaults them. **sheltered_sides matters**: if sap_ventilation=None the calculator defaults it to 2 (mid-terrace) for ALL dwellings (wrong for detached=0, enclosed-mid=3).
|
||||
- `_api_sheltered_sides` reuse VERIFIED safe: 20.0.0 `built_form: int` shares the identical 1–6 code space as 21.0.1 (epc_codes.csv); all corpus values 1–6 mapped; 'NR' (absent) → None → calculator default, no crash.
|
||||
- `mechanical_ventilation` int decode (20.0.0 codes 0=natural, 1=mech supply+extract, 2=mech extract only): 0→NATURAL, 2→mech-extract kind (5 certs), 1→MV no-HR (0 certs; 20.0.0 has no HR flag → conservative). Calculator path: `cert_to_inputs.py:4522+ ventilation_from_cert`.
|
||||
- `sap_roof_windows`: 20.0.0 has no roof/wall distinction → treat all as wall windows.
|
||||
- Lighting (gap C): RESOLVED. 20.0.0 gives `fixed_lighting_outlets_count` (total) + `low_energy_fixed_lighting_outlets_count` (low-energy); `low_energy_lighting` is just the % (ignore). Current mapper hardcodes led/cfl/incandescent=0 → understates lighting for **439/1000** certs that have incandescent bulbs. Fix: `low_energy_fixed_lighting_bulbs_count = low_energy_outlets` (→ calculator LEL path, 15 W/80 Lm/W, per RdSAP10 §12-1 unknown-split), `incandescent_fixed_lighting_bulbs_count = total − low_energy`, led=cfl=0. Approximation: 1 outlet ≈ 1 bulb (20.0.0 has no bulb count; older-RdSAP behaviour). ADR-note both. Calculator path: `internal_gains.py:565 _lighting_capacity_and_efficacy_from_cert`.
|
||||
|
||||
**Verified facts worth keeping:**
|
||||
- 20.0.0 is **offline-only** (gov API returns 21.0.1/21.0.0 only); corpus built from bulk dumps (`scripts/harvest_certs.py`). Only the xfail corpus test exercises it.
|
||||
- 993/1000 fail at the **schema-parse layer** (placeholder over-constrains required fields); fix = required→optional with defaults (`sap_windows`→`[]`, `lzc_energy_sources`→`[]`, `roof_insulation_thickness`→Optional).
|
||||
- Corpus window-field presence: `glazed_area` 1000 (band: 1=945,2=45,4=7,3=3), `multiple_glazing_type` 1000, `multiple_glazed_proportion` 1000, `windows_transmission_details` 687, `sap_windows` 7.
|
||||
|
||||
---
|
||||
|
||||
## Goal
|
||||
|
||||
Make `RdSAP-Schema-20.0.0` certs map cleanly to `EpcPropertyData` via
|
||||
`EpcPropertyDataMapper`, the same way 21.0.1 already does. Two corpora of 1000
|
||||
real certs each:
|
||||
|
||||
- `backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl` — supported, 1000/1000 pass.
|
||||
- `backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl` — target, currently 1000/1000 **fail**.
|
||||
|
||||
User notes:
|
||||
- The current `RdSapSchema20_0_0` schema was a **placeholder generated from a single EPC JSON example** — that's why it over-constrains.
|
||||
- "Do it correctly for each one" — map field-by-field correctly, not just make it type-check.
|
||||
|
||||
---
|
||||
|
||||
## Key code locations
|
||||
|
||||
| Thing | Path |
|
||||
|-------|------|
|
||||
| Domain target | `datatypes/epc/domain/epc_property_data.py` (`EpcPropertyData` L607-788; `SapWindow` L255-270) |
|
||||
| Mapper (dispatcher) | `datatypes/epc/domain/mapper.py` — `from_api_response` L1984; dispatch by `schema_type` |
|
||||
| 20.0.0 mapper method | `datatypes/epc/domain/mapper.py` `from_rdsap_schema_20_0_0` L1078-1275 |
|
||||
| 21.0.1 mapper method (reference) | `datatypes/epc/domain/mapper.py` `from_rdsap_schema_21_0_1` L1556-1942 |
|
||||
| Placeholder schema | `datatypes/epc/schema/rdsap_schema_20_0_0.py` (`RdSapSchema20_0_0` L198-283; `SapWindow` L84-89) |
|
||||
| Corpus test | `infrastructure/epc_client/tests/test_mapper_corpus.py` (20.0.0 is `xfail`, strict=False) |
|
||||
| Schema validation helper | `datatypes/epc/schema/helpers.py:36` (raises "missing required field") |
|
||||
|
||||
---
|
||||
|
||||
## What actually fails today (1000/1000)
|
||||
|
||||
Failures are at the **schema-parse layer** (helpers.py:36), *not* the mapper method.
|
||||
Placeholder declares fields required that the real corpus often omits:
|
||||
|
||||
| Count | Missing required field |
|
||||
|------:|------------------------|
|
||||
| 993 | `sap_windows` |
|
||||
| 6 | `lzc_energy_sources` |
|
||||
| 1 | `SapBuildingPart.roof_insulation_thickness` |
|
||||
|
||||
Corpus field-presence (20.0.0, n=1000): `window` 1000, `sap_windows` **7**,
|
||||
`lzc_energy_sources` **68**.
|
||||
|
||||
---
|
||||
|
||||
## Central finding — windows
|
||||
|
||||
- 20.0.0 has **no `sap_windows` array**. Windows live as scalar aggregates:
|
||||
`glazed_area` (enum, e.g. 1), `glazing_gap` ("16+"), `multiple_glazing_type` (3),
|
||||
`multiple_glazed_proportion` (100), `pvc_window_frames` ("true"), plus the
|
||||
`window` energy element and cert-level `windows_transmission_details`
|
||||
(`u_value`, `data_source`, `solar_transmittance`).
|
||||
- **There is NO RdSAP default-window-area formula anywhere in the codebase.**
|
||||
SAP window area comes entirely from per-window `width × height` in `sap_windows`
|
||||
(`cert_to_inputs.py:3787` → `_window_total_area_and_avg_u` L1480). 21.0.1 certs
|
||||
supply that geometry; 20.0.0 supplies none (`glazed_area` is an enum, not m²).
|
||||
- Therefore `sap_windows=[]` **does not crash** but models the dwelling as
|
||||
**windowless**: zero solar gain, zero window heat loss. That's the core gap.
|
||||
|
||||
### Downstream consumers of `sap_windows` (from trace)
|
||||
- `worksheet/heat_transmission.py` L594-627, L711-731 — per-window U branch if all
|
||||
windows have `window_transmission_details.u_value`, else aggregate fallback;
|
||||
iterates width/height/location/wall_type for net wall area.
|
||||
- `worksheet/solar_gains.py` L389-397 — per-window orientation × area × solar_transmittance.
|
||||
- `worksheet/internal_gains.py` L617-627 — daylight factor; early-returns 1.433 if no windows + no rooflights.
|
||||
- `sap10_ml/transform.py` L1629-1707 `_window_aggregates` — tolerant of empty (zero-counts).
|
||||
- `modelling/scoring/overlay_applicator.py:48` — **indexes `sap_windows[index]`** → IndexError only if an overlay targets a window index that doesn't exist.
|
||||
- `modelling/generators/glazing_recommendation.py` L87-88 — safe on empty.
|
||||
|
||||
**Load-bearing SapWindow fields:** `window_width`, `window_height`, `orientation`,
|
||||
`glazing_type`, `window_transmission_details` (conditional), `window_location`,
|
||||
`window_wall_type`. ML-only: `frame_material`, `frame_factor`, `glazing_gap`,
|
||||
`draught_proofed`, shutters, `window_type`.
|
||||
|
||||
---
|
||||
|
||||
## Gap map
|
||||
|
||||
| # | Gap | 20.0.0 reality | Placeholder does now |
|
||||
|---|-----|----------------|----------------------|
|
||||
| **A** | Schema over-constrained (1-example origin) | `sap_windows` absent 993/1000; `lzc_energy_sources` absent 932; `roof_insulation_thickness` sometimes absent | declares required → all parse fail |
|
||||
| **B** | **Windows** | aggregates only (see above), no array | requires `sap_windows` array |
|
||||
| **C** | Lighting | `fixed_lighting_outlets_count`, `low_energy_fixed_lighting_outlets_count`, `low_energy_lighting` (outlets + low-energy bool) | hardcodes led/cfl/incandescent **bulb** counts = 0 |
|
||||
| **D** | Orphan counts | has `open_fireplaces_count`, `percent_draughtproofed`; no wet-rooms source | `wet_rooms=0`, `open_chimneys=0`, `draughtproofed_door=None` |
|
||||
| **E** | Energy scores/costs | present (`energy_rating_current`, costs, co2…) | not mapped at all |
|
||||
|
||||
---
|
||||
|
||||
## Decision tree / open question queue
|
||||
|
||||
### ▶ Q1 (ROOT, PENDING) — What is the bar for "correct"? Who consumes 20.0.0 output?
|
||||
Decides the windows strategy (B), which everything hangs off.
|
||||
- **(a) Full SAP cascade parity** — must run `sap10_calculator` accurately ⇒ must synthesize per-window geometry ⇒ must implement RdSAP standard window-area formula (Table S4: area from TFA + habitable rooms, scaled by `glazed_area` enum). Biggest scope.
|
||||
- **(b) ML/feature parity** — feeds `sap10_ml/transform.py`; tolerates empty windows, leans on aggregates. Synthesis optional.
|
||||
- **(c) Type-safe ingest only** — parse without crashing + map every field with a real source; windows empty.
|
||||
|
||||
**Recommendation:** target (a) but **stage it**: first ship schema fix + all
|
||||
directly-mappable fields (→ 1000/1000 *parsing*), then synthesize a **single
|
||||
aggregate `SapWindow`** from scalars (area = RdSAP standard formula × `glazed_area`
|
||||
enum; `glazing_type` ← `multiple_glazing_type`; U ← `windows_transmission_details`;
|
||||
draught/frame ← `pvc_window_frames`/`glazing_gap`). One window keeps total area + U
|
||||
correct for heat-transmission and ML; only loses per-orientation solar split, which
|
||||
20.0.0 cannot provide anyway.
|
||||
|
||||
**Sub-question carried with Q1:** agree that 20.0.0 fundamentally cannot give
|
||||
per-orientation window detail, so a single aggregate window is the ceiling?
|
||||
|
||||
### Queued (depend on Q1)
|
||||
- **Q2 (B, if synth):** Where does window AREA come from? RdSAP standard area formula (need to confirm exact RdSAP table/coefficients) × `glazed_area` enum interpretation (1/2/3 = typical/more/less?). Is there an existing helper, or net-new?
|
||||
- **Q3 (B):** Map `multiple_glazing_type` (3) → domain `glazing_type` code space — is the code space identical to 21.0.1's per-window `glazing_type`, or does it need translation?
|
||||
- **Q4 (B):** U-value — attach `windows_transmission_details.u_value` as the synthesized window's `window_transmission_details` (→ per-window branch) or leave None (→ aggregate fallback)? Both should agree for a single window; pick the simpler.
|
||||
- **Q5 (A):** Schema required→optional policy. Proposal: data-driven — any field present in <100% of corpus becomes `Optional` with sensible default; confirm the threshold + default-vs-None per field. (`sap_windows` → default `[]`; `lzc_energy_sources` → default `[]`; `roof_insulation_thickness` → Optional.)
|
||||
- **Q6 (C):** Lighting. 20.0.0 gives outlets + low-energy bool/count, not bulb split. Options: (i) leave bulb counts 0 (current), (ii) map low-energy → cfl, rest → incandescent, (iii) add outlet fields to domain. Which?
|
||||
- **Q7 (D):** `open_fireplaces_count` → `open_chimneys_count` — same concept? `percent_draughtproofed` → derive `draughtproofed_door_count`? `wet_rooms_count` — any source or stays 0?
|
||||
- **Q8 (E):** Map the energy scores/costs/co2 fields now (they exist in 20.0.0) or defer? Likely just wire them — they're present and 1:1.
|
||||
- **Q9 (test):** Flip plan — when 1000/1000 pass, drop `xfail` + keep strict guard (per test file comment L8-12). Add field-level assertions beyond `isinstance`?
|
||||
|
||||
---
|
||||
|
||||
## How to reproduce the failures
|
||||
|
||||
```bash
|
||||
# bucket the 20.0.0 mapping errors by type
|
||||
python - <<'EOF'
|
||||
import json, collections, traceback
|
||||
from pathlib import Path
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
certs=[json.loads(l) for l in Path("backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl").read_text().splitlines() if l.strip()]
|
||||
b=collections.Counter()
|
||||
for c in certs:
|
||||
try: EpcPropertyDataMapper.from_api_response(c)
|
||||
except Exception as e:
|
||||
tb=traceback.extract_tb(e.__traceback__)[-1]
|
||||
b[f"{type(e).__name__}: {str(e)[:80]} @ {tb.filename.split('/')[-1]}:{tb.lineno}"]+=1
|
||||
for k,v in b.most_common(): print(v,k)
|
||||
EOF
|
||||
|
||||
# run the corpus test (force xfail to show real failures)
|
||||
python -m pytest infrastructure/epc_client/tests/test_mapper_corpus.py -k wip_schema_20 --runxfail -q
|
||||
```
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
# Grill spec — generalise Reduced-Field Synthesis to the rest of the pre-SAP10 RdSAP family
|
||||
|
||||
**Date:** 2026-06-10 · **Branch:** `feature/junte+khalim` · **Status:** SPEC — READY TO GRILL.
|
||||
|
||||
Grill this by running `/grill-me` and feeding it this file. Start at **Q1 (ROOT)**.
|
||||
|
||||
---
|
||||
|
||||
## Why this exists
|
||||
|
||||
The RdSAP **20.0.0** mapper now works end-to-end: all 1000 corpus certs parse, map via
|
||||
**Reduced-Field Synthesis** (ADR-0027), and score through `Sap10Calculator` without crashing.
|
||||
`scripts/eon/find_epc_data.py` shows lodged-vs-our-calculated SAP side by side and the deltas are
|
||||
sane (mostly ±7, same band). The pattern is proven.
|
||||
|
||||
The goal now: **apply the same playbook to the other pre-SAP10 RdSAP specs** so historical EPC data
|
||||
across more lodgement years can be Rebaselined. This is pure leverage — the hard design thinking
|
||||
(synthesis coefficients, Validation-Cohort rule, schema-fix mechanism) is already done; what remains
|
||||
is per-spec drift and a decision about how much to share vs copy.
|
||||
|
||||
## What we already hold (the repeatable 20.0.0 playbook)
|
||||
|
||||
Each step below is *proven* for 20.0.0. The grill is about which steps change per spec.
|
||||
|
||||
1. **Harvest a corpus** — `scripts/eon/harvest_certs.py` streams a local bulk dump
|
||||
(`downloads/certificates-YYYY.json`) for the year that spec dominates, caps at 1000, writes
|
||||
`backend/epc_api/json_samples/<schema>/corpus.jsonl`. No API token needed.
|
||||
2. **Fix the placeholder schema** — every `rdsap_schema_*.py` was generated from ONE example so it
|
||||
over-constrains. Make it `@dataclass(kw_only=True)` + data-driven required→optional (any field
|
||||
present in <100% of the corpus gets a default; `[]` for lists, `None` otherwise) → all certs parse.
|
||||
3. **Synthesise the measured fields** the reduced schema only records categorically:
|
||||
windows (`glazed_area` band × floor area, 4-way N/E/S/W split), lighting LEL, hot-water bath/mixer
|
||||
counts, ventilation/chimneys/sheltered-sides, glazing cascade.
|
||||
4. **Leave calculator defaults to the calculator** — `cert_to_inputs` is the RdSAP Table-5 expansion
|
||||
engine; the mapper supplies raw reduced data only.
|
||||
5. **Wire dispatch + flip a strict guard** — add the `schema_type` branch to
|
||||
`from_api_response`, promote the corpus into the strict parse+map bucket in
|
||||
`infrastructure/epc_client/tests/test_mapper_corpus.py`.
|
||||
6. **Record every synthesis assumption in code comments + test names** (Validation-Cohort rule: no
|
||||
same-spec ground truth).
|
||||
|
||||
## Ground truth about the targets (verified 2026-06-10)
|
||||
|
||||
| Spec | Schema module | Mapper method | Dispatched? | Corpus? | Notes |
|
||||
|------|---------------|---------------|-------------|---------|-------|
|
||||
| 21.0.1 | ✅ | `from_rdsap_schema_21_0_1` | ✅ | ✅ 1000 | reference (rich, measured) |
|
||||
| 21.0.0 | ✅ | `from_rdsap_schema_21_0_0` | ✅ | ❌ | dispatched but unguarded |
|
||||
| **20.0.0** | ✅ | `from_rdsap_schema_20_0_0` | ✅ | ✅ 1000 | **DONE — the template** |
|
||||
| **19.0** | ✅ | `from_rdsap_schema_19_0` | ❌ | ❌ | orphaned; `sap_windows=[]` hardcoded |
|
||||
| **18.0** | ✅ | `from_rdsap_schema_18_0` | ❌ | ❌ | orphaned |
|
||||
| **17.1** | ✅ | `from_rdsap_schema_17_1` | ❌ | ❌ | orphaned |
|
||||
| **17.0** | ✅ | `from_rdsap_schema_17_0` | ❌ | ❌ | orphaned |
|
||||
|
||||
- 19.0 confirmed same reduced-field shape as 20.0.0: `glazed_area: int` band + `multiple_glazing_type:
|
||||
int`, and the mapper currently hardcodes `sap_windows=[]` — i.e. the exact windowless-corruption bug
|
||||
that 20.0.0's synthesis fixed. 18.0/17.1/17.0 are almost certainly the same family.
|
||||
- The 17–19 mapper methods **exist** but are unreachable: `from_api_response` only branches 21.0.1 /
|
||||
21.0.0 / 20.0.0; everything else hits `raise ValueError(f"Unsupported EPC schema")`.
|
||||
- **Corpora are harvestable.** `downloads/README.txt` schema-by-year:
|
||||
`2020 → RdSAP-Schema-19.0 (1632)`, `2021–2024 → 20.0.0`, `2025–2026 → 21.0.1`. Older RdSAP (17.x/18.0)
|
||||
live in the 2012–2019 dumps (all present locally). `SAP-Schema-1x` (full/design SAP) and `CEPC-*`
|
||||
(commercial) are different families with no RdSAP mapper.
|
||||
|
||||
---
|
||||
|
||||
## Decision tree to grill (each has a recommended answer)
|
||||
|
||||
### Q1 (ROOT) — Target set and order. What are we generalising to, and in what order?
|
||||
**Recommend:** the **pre-SAP10 RdSAP family only**, one spec at a time, **19.0 first** (dominant in the
|
||||
2020 dump, closest sibling to 20.0.0, mapper already stubbed), then 18.0 → 17.1 → 17.0 as their dumps
|
||||
confirm volume. **Exclude** `SAP-Schema-1x` (full/design SAP — new-build, not reduced; a separate
|
||||
mapper family and ADR) and `CEPC-*` (non-domestic). **Carve out** 21.0.0 as a quick win: it's already
|
||||
dispatched, it just needs a harvested corpus to join the strict guard.
|
||||
*Sub-question:* do we batch all four 17–19 in one branch sweep, or land 19.0 fully (corpus → schema →
|
||||
synthesis → dispatch → guard) before starting 18.0? Recommend: **land 19.0 end-to-end first** — it
|
||||
either confirms the playbook transfers cleanly (then 18.0/17.x are fast) or surfaces drift early.
|
||||
|
||||
### Q2 — Coefficient reuse vs re-fit (the load-bearing, ADR-worthy one).
|
||||
20.0.0's glazing synthesis uses `0.148 × TFA × band_multiplier`, fit from the **21.0.1** corpus's
|
||||
glazing/floor ratio. For 19.0/18.0/17.x: reuse the same coefficients, or re-fit per spec?
|
||||
**DIRECTION (user, 2026-06-10): re-work the coefficients from each new corpus's own data — do not
|
||||
inherit the 21.0.1 fit by default.** Treat `0.148` + the band multipliers as a *starting hypothesis* to
|
||||
confirm or replace against what the new corpus actually shows, per spec. The empirical numbers lead; we
|
||||
only keep the 20.0.0 values if the new corpus reproduces them.
|
||||
|
||||
**The constraint this hits (must resolve while grilling):** a reduced schema does **not** measure
|
||||
per-window area (that's the whole reason synthesis exists), so a 19.0/18.0/17.x corpus *cannot
|
||||
self-fit the glazing/floor ratio* — there's no measured glazing column in it to regress on. So
|
||||
"work it out from the new corpus" splits into two parts:
|
||||
- **What the reduced corpus *can* give us directly** → re-derive per spec: the `glazed_area` band
|
||||
*distribution* (how many Normal/More/Less), `total_floor_area` distribution, and whether the band
|
||||
codes/semantics match 20.0.0. This validates (or breaks) the band-multiplier assumption empirically.
|
||||
- **The base ratio itself (the `0.148`)** → needs a *measured* reference from the same stock/era.
|
||||
Options to grill: (a) use the contemporaneous measured corpus if one exists for that year (e.g. a
|
||||
rich-window spec lodged alongside), (b) fit from the handful of rich certs the reduced corpus *does*
|
||||
carry (20.0.0 had 7/1000 with lodged `sap_windows` — check the count per spec), or (c) fall back to
|
||||
the 21.0.1 fit *only* if (a)/(b) yield too little signal, and say so explicitly.
|
||||
|
||||
This moves every rebaselined score for the spec, so the per-spec fit + its evidence wants an ADR
|
||||
(extends ADR-0027). Record the derivation (corpus, sample size, quartiles) the same way 20.0.0 did.
|
||||
|
||||
### Q3 — Code-space drift across versions.
|
||||
Do 17–19 use the same integer code spaces as 20.0.0 (glazing_type, built_form, orientation, fuel,
|
||||
heat-emitter, party-wall, roof/floor construction)? 20.0.0's codes turned out **identical** to 21.0.1's,
|
||||
so we routed through the existing cascades verbatim. **Recommend:** assume identical within the RdSAP
|
||||
family; cross-check each version against `epc_codes.csv` during grilling and add a per-spec cascade
|
||||
override *only* where the corpus proves a code diverged. Don't pre-build translation layers.
|
||||
|
||||
### Q4 — Schema-fix mechanism. Same `kw_only` + data-driven required→optional?
|
||||
**Recommend: yes, unchanged.** Each placeholder schema over-constrains identically (single-example
|
||||
generation). Run the one-pass corpus scan to enumerate all missing-required fields at once (not
|
||||
whack-a-mole), then default them. Mechanical, low-risk, proven.
|
||||
|
||||
### Q5 — Shared synthesis helper vs per-mapper copy (the architecture fork).
|
||||
20.0.0's synthesis lives in `_synthesise_20_0_0_sap_windows` + inline mapper blocks. With 19.0 we'll
|
||||
have a **second instance** — the classic extract trigger. **Recommend:** once 19.0 is green, extract a
|
||||
single spec-parameterised `_synthesise_reduced_field_windows(glazed_area, tfa, glazing_type)` (and
|
||||
shared lighting/hot-water/ventilation helpers) so 18.0/17.x are near-free and the coefficients live in
|
||||
exactly one place. Defer the extraction until 19.0 confirms the shape (avoid abstracting from one
|
||||
example). This is the `/improve-codebase-architecture` hook — a deep module behind a small interface.
|
||||
|
||||
### Q6 — Per-spec field availability.
|
||||
Do 17–19 actually lodge the synthesis *inputs* 20.0.0 relies on — `instantaneous_wwhrs` (bath room
|
||||
counts), `low_energy_fixed_lighting_outlets_count`, `percent_draughtproofed`, `open_fireplaces_count`,
|
||||
`multiple_glazing_type`? Older specs may omit or rename some. **Recommend:** profile each corpus up
|
||||
front (one-pass field-presence scan); where a 20.0.0 input is absent, degrade gracefully to the
|
||||
calculator's own default rather than fabricating — and record the gap in a test name.
|
||||
|
||||
### Q7 — Dispatch wiring + acceptance bar.
|
||||
**Recommend:** per spec, add the `schema_type` branch to `from_api_response` (wrapped in
|
||||
`_clear_basement_flag_when_system_built` like the others) and promote its corpus into the strict
|
||||
parse+map bucket in `test_mapper_corpus.py`. Smoke-check with `scripts/eon/find_epc_data.py` (extend
|
||||
the UPRN list with that spec's certs) — our re-score should track the lodged figure within a sane band.
|
||||
The formal SAP-score *value* test stays deferred (same as 20.0.0) until we choose to land it.
|
||||
|
||||
### Q8 — Validation-Cohort / is there ANY cross-check?
|
||||
Same rule as 20.0.0: a pre-SAP10 cert has no same-spec lodged figure to validate against, so synthesis
|
||||
assumptions go in code/test names. **But probe one opportunistic anchor:** a single UPRN re-lodged
|
||||
across spec versions (e.g. a dwelling with both a 19.0 and a 20.0.0 cert, unchanged between) — our
|
||||
re-score of both should roughly agree. **Recommend:** if dual-lodged UPRNs surface during harvest, keep
|
||||
a handful as a cross-spec regression anchor; don't block on it.
|
||||
|
||||
---
|
||||
|
||||
## How to reproduce / kick off (19.0 first)
|
||||
|
||||
```bash
|
||||
# 1. Confirm 19.0 volume + reduced-field shape in the 2020 dump
|
||||
python - <<'EOF'
|
||||
import json
|
||||
from collections import Counter
|
||||
# stream the first N lines of certificates-2020.json, bucket by schema_type,
|
||||
# and dump one RdSAP-Schema-19.0 document to inspect glazed_area / sap_windows
|
||||
EOF
|
||||
|
||||
# 2. Add a harvest source row and run it
|
||||
# scripts/eon/harvest_certs.py SOURCES += ("certificates-2020.json","RdSAP-Schema-19.0",1000)
|
||||
|
||||
# 3. Drive the (orphaned) 19.0 mapper against the new corpus to bucket parse failures
|
||||
python - <<'EOF'
|
||||
import json, collections
|
||||
from pathlib import Path
|
||||
from datatypes.epc.schema.rdsap_schema_19_0 import RdSapSchema19_0
|
||||
from datatypes.epc.schema.helpers import from_dict
|
||||
certs=[json.loads(l) for l in Path("backend/epc_api/json_samples/RdSAP-Schema-19.0/corpus.jsonl").read_text().splitlines() if l.strip()]
|
||||
b=collections.Counter()
|
||||
for c in certs:
|
||||
try: from_dict(RdSapSchema19_0, c)
|
||||
except Exception as e: b[f"{type(e).__name__}: {str(e)[:70]}"]+=1
|
||||
for k,v in b.most_common(): print(v,k)
|
||||
EOF
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- **ADR-0027** (`docs/adr/0027-rdsap-20-0-0-reduced-field-synthesis.md`) — the synthesis decision,
|
||||
coefficients, rejected alternatives. Extend (not replace) for the family-wide coefficient choice (Q2).
|
||||
- **ADR-0015** (mappers own cert normalization), **ADR-0004** (lodged-vs-effective pair).
|
||||
- **CONTEXT.md** — _Reduced-Field Synthesis_, _Rebaselining_, _Lodged / Effective Performance_,
|
||||
_Validation Cohort_, _pre-SAP10_.
|
||||
- **20.0.0 resume doc** — `docs/grill-sessions/2026-06-09-rdsap-20-0-0-remapper.md` (the worked example).
|
||||
|
|
@ -718,7 +718,7 @@ _HOURS_PER_DAY_OVER_1000: Final[float] = 0.024
|
|||
_DAYS_PER_MONTH: Final[tuple[int, ...]] = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
def _pv_annual_s_kwh_per_m2(
|
||||
orientation_code: int,
|
||||
orientation_code: Optional[int],
|
||||
pitch_code: int,
|
||||
climate: "int | PostcodeClimate",
|
||||
) -> float:
|
||||
|
|
@ -727,8 +727,10 @@ def _pv_annual_s_kwh_per_m2(
|
|||
monthly Appendix U3.2 surface flux over the year. `climate` selects
|
||||
Table U3/U4 region (UK average = 0 for the rating cascade) or a
|
||||
`PostcodeClimate` from PCDB Table 172 for the demand cascade.
|
||||
Returns 0.0 for unrecognised orientation codes (cert octants outside
|
||||
1..8) — these PV arrays contribute nothing."""
|
||||
Returns 0.0 for an unknown orientation (None when the cert lodged 'ND',
|
||||
or a code outside 1..8) — these PV arrays contribute nothing."""
|
||||
if orientation_code is None:
|
||||
return 0.0
|
||||
orientation = ORIENTATION_BY_SAP10_CODE.get(orientation_code)
|
||||
if orientation is None:
|
||||
return 0.0
|
||||
|
|
@ -2475,8 +2477,10 @@ def _pv_array_monthly_generation_kwh(
|
|||
E_PV,m = 0.8 × kWp × ZPV × (days_m × S_m × 24 / 1000)
|
||||
where S_m is the §U3.2 surface flux (W/m²). Returns a 12-zero tuple
|
||||
for arrays whose orientation isn't mapped in
|
||||
`ORIENTATION_BY_SAP10_CODE` (defensive — current cert lodgements
|
||||
always cover 1..8)."""
|
||||
`ORIENTATION_BY_SAP10_CODE` (defensive — None when the cert lodged
|
||||
'ND', else a code outside 1..8)."""
|
||||
if array.orientation is None:
|
||||
return (0.0,) * 12
|
||||
orientation = ORIENTATION_BY_SAP10_CODE.get(array.orientation)
|
||||
if orientation is None:
|
||||
return (0.0,) * 12
|
||||
|
|
|
|||
|
|
@ -1229,7 +1229,7 @@ def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]:
|
|||
total_power += a.peak_power
|
||||
weighted_pitch += a.pitch * a.peak_power
|
||||
weighted_overshading += a.overshading * a.peak_power
|
||||
if a.orientation in _OCTANT_NAMES:
|
||||
if a.orientation is not None and a.orientation in _OCTANT_NAMES:
|
||||
octant_power[_OCTANT_NAMES[a.orientation]] += a.peak_power
|
||||
aggregates["has_pv"] = True
|
||||
aggregates["pv_capacity_source"] = "measured"
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ DEFAULT_CATALOGUE = Path(__file__).resolve().parent / "sample_catalogue.json"
|
|||
_PROPERTY_ID = 1
|
||||
_SCENARIO_ID = 7
|
||||
_PORTFOLIO_ID = 1
|
||||
_UPRN = 12345
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -108,7 +109,7 @@ def run_one(
|
|||
portfolio_id=_PORTFOLIO_ID,
|
||||
postcode="A0 0AA",
|
||||
address="1 Some Street",
|
||||
uprn=12345,
|
||||
uprn=_UPRN,
|
||||
),
|
||||
current_market_value=current_market_value,
|
||||
)
|
||||
|
|
@ -211,7 +212,7 @@ def run_modelling(
|
|||
portfolio_id=_PORTFOLIO_ID,
|
||||
postcode="A0 0AA",
|
||||
address="1 Some Street",
|
||||
uprn=12345,
|
||||
uprn=_UPRN,
|
||||
),
|
||||
epc=epc,
|
||||
current_market_value=current_market_value,
|
||||
|
|
@ -222,7 +223,7 @@ def run_modelling(
|
|||
unit = FakeUnitOfWork(
|
||||
property=property_repo,
|
||||
solar=FakeSolarRepo(
|
||||
by_property={_PROPERTY_ID: solar_insights}
|
||||
by_uprn={_UPRN: solar_insights}
|
||||
if solar_insights is not None
|
||||
else None
|
||||
),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
from typing import ClassVar, Optional, Union
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import Enum as SAEnum
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlmodel import SQLModel, Field
|
||||
|
||||
|
|
@ -735,7 +736,29 @@ class EpcEnergyElementModel(SQLModel, table=True):
|
|||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False)
|
||||
|
||||
element_type: str # roof | wall | floor | main_heating | window | lighting | hot_water | secondary_heating | main_heating_controls
|
||||
# The live column is the ``energy_element_type`` Postgres enum (owned by the
|
||||
# Drizzle schema). Binding it as a bare ``str`` makes SQLAlchemy cast the
|
||||
# value to VARCHAR, which Postgres won't implicitly coerce to the enum on
|
||||
# INSERT — so declare the native enum explicitly (``create_type=False``: the
|
||||
# type already exists). The values stay plain strings on the domain side.
|
||||
element_type: str = Field(
|
||||
sa_column=Column(
|
||||
SAEnum(
|
||||
"roof",
|
||||
"wall",
|
||||
"floor",
|
||||
"main_heating",
|
||||
"window",
|
||||
"lighting",
|
||||
"hot_water",
|
||||
"secondary_heating",
|
||||
"main_heating_controls",
|
||||
name="energy_element_type",
|
||||
create_type=False,
|
||||
),
|
||||
nullable=False,
|
||||
)
|
||||
)
|
||||
description: str
|
||||
energy_efficiency_rating: int
|
||||
environmental_efficiency_rating: int
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import ClassVar, Optional, cast
|
||||
from typing import ClassVar, Optional, cast, get_args
|
||||
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import Enum as SAEnum
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
|
|
@ -10,6 +12,11 @@ from domain.property_baseline.property_baseline_performance import PropertyBasel
|
|||
from domain.property_baseline.performance import Performance
|
||||
from domain.property_baseline.rebaseliner import RebaselineReason
|
||||
|
||||
# Native-enum labels for the ``epc`` / ``rebaseline_reason`` columns, sourced
|
||||
# from the domain types so the mirror can't drift from them.
|
||||
_EPC_BANDS: tuple[str, ...] = tuple(band.value for band in Epc)
|
||||
_REBASELINE_REASONS: tuple[str, ...] = get_args(RebaselineReason)
|
||||
|
||||
# Each Bill section's flat-column stem (``bill_{stem}_kwh`` / ``bill_{stem}_cost_gbp``).
|
||||
_SECTION_COLUMN_STEM: dict[BillSection, str] = {
|
||||
BillSection.HEATING: "heating",
|
||||
|
|
@ -35,17 +42,32 @@ class PropertyBaselinePerformanceModel(SQLModel, table=True):
|
|||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
property_id: int = Field(unique=True, index=True)
|
||||
|
||||
# ``*_epc_band`` and ``rebaseline_reason`` are native Postgres enums on the
|
||||
# live table (``epc`` / ``rebaseline_reason``, owned by Drizzle); declaring
|
||||
# them as bare ``str`` makes SQLAlchemy bind VARCHAR on INSERT, which
|
||||
# Postgres won't coerce to the enum. Bind the native enums explicitly
|
||||
# (``create_type=False``: the types already exist). Values stay plain
|
||||
# strings on the domain side (``Epc(...)`` / the RebaselineReason Literal).
|
||||
lodged_sap_score: int
|
||||
lodged_epc_band: str
|
||||
lodged_epc_band: str = Field(
|
||||
sa_column=Column(SAEnum(*_EPC_BANDS, name="epc", create_type=False), nullable=False)
|
||||
)
|
||||
lodged_co2_emissions_t_per_yr: float
|
||||
lodged_primary_energy_intensity_kwh_per_m2_yr: int
|
||||
|
||||
effective_sap_score: int
|
||||
effective_epc_band: str
|
||||
effective_epc_band: str = Field(
|
||||
sa_column=Column(SAEnum(*_EPC_BANDS, name="epc", create_type=False), nullable=False)
|
||||
)
|
||||
effective_co2_emissions_t_per_yr: float
|
||||
effective_primary_energy_intensity_kwh_per_m2_yr: int
|
||||
|
||||
rebaseline_reason: str
|
||||
rebaseline_reason: str = Field(
|
||||
sa_column=Column(
|
||||
SAEnum(*_REBASELINE_REASONS, name="rebaseline_reason", create_type=False),
|
||||
nullable=False,
|
||||
)
|
||||
)
|
||||
|
||||
space_heating_kwh: float
|
||||
water_heating_kwh: float
|
||||
|
|
|
|||
|
|
@ -2,21 +2,30 @@ from __future__ import annotations
|
|||
|
||||
from typing import Any, ClassVar, Optional
|
||||
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import BigInteger, Column, Float
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class SolarBuildingInsightsRow(SQLModel, table=True):
|
||||
"""Persisted Google Solar `buildingInsights` response for one Property.
|
||||
class SolarRow(SQLModel, table=True):
|
||||
"""Mirror of the live ``solar`` table (owned by the Drizzle schema): the raw
|
||||
Google Solar ``buildingInsights`` response for one UPRN, stored whole as
|
||||
JSONB so a future SolarPotential projection can be derived without
|
||||
re-fetching. Keyed by ``uprn`` (the live table carries no ``property_id``);
|
||||
``longitude``/``latitude`` are the coordinates the fetch was made against.
|
||||
|
||||
Stored as JSONB — the raw fetched insights are retained whole so the
|
||||
structured projection a future SolarPotential type needs can be derived
|
||||
without re-fetching. One row per Property.
|
||||
Only the columns this repo reads/writes are mirrored — ``created_at`` /
|
||||
``updated_at`` are left to the database's ``DEFAULT now()``.
|
||||
"""
|
||||
|
||||
__tablename__: ClassVar[str] = "solar_building_insights" # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
__tablename__: ClassVar[str] = "solar" # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
property_id: int = Field(index=True, unique=True)
|
||||
insights: dict[str, Any] = Field(sa_column=Column(JSONB, nullable=False))
|
||||
id: Optional[int] = Field(
|
||||
default=None, sa_column=Column(BigInteger, primary_key=True)
|
||||
)
|
||||
uprn: int = Field(sa_column=Column(BigInteger, nullable=False, index=True))
|
||||
longitude: float = Field(sa_column=Column(Float, nullable=False))
|
||||
latitude: float = Field(sa_column=Column(Float, nullable=False))
|
||||
google_api_response: dict[str, Any] = Field(
|
||||
sa_column=Column(JSONB, nullable=False)
|
||||
)
|
||||
|
|
|
|||
1
next_claude_prompt.txt
Normal file
1
next_claude_prompt.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
/grill-me docs/grill-sessions/2026-06-10-pre-sap10-mapper-generalization.md
|
||||
|
|
@ -99,8 +99,20 @@ class IngestionOrchestrator:
|
|||
for item in fetched:
|
||||
if item.epc is not None:
|
||||
uow.epc.save(item.epc, property_id=item.property_id)
|
||||
if item.solar_insights is not None:
|
||||
uow.solar.save(item.property_id, item.solar_insights)
|
||||
# The live `solar` table is keyed by UPRN and needs the fetch's
|
||||
# coordinates; insights are only set when those coordinates were
|
||||
# resolved, so spatial.coordinates is non-None alongside them.
|
||||
if (
|
||||
item.solar_insights is not None
|
||||
and item.spatial is not None
|
||||
and item.spatial.coordinates is not None
|
||||
):
|
||||
uow.solar.save(
|
||||
item.uprn,
|
||||
longitude=item.spatial.coordinates.longitude,
|
||||
latitude=item.spatial.coordinates.latitude,
|
||||
insights=item.solar_insights,
|
||||
)
|
||||
if item.spatial is not None:
|
||||
uow.spatial.save(item.uprn, item.spatial)
|
||||
uow.commit()
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class ModellingOrchestrator:
|
|||
# threaded into the solar Generator (ADR-0026). None when no
|
||||
# solar data was fetched — the Generator then offers nothing.
|
||||
solar_potential: Optional[SolarPotential] = _solar_potential_for(
|
||||
uow.solar, property_id
|
||||
uow.solar, prop.identity.uprn
|
||||
)
|
||||
for scenario in scenarios:
|
||||
plan = self._plan_for(
|
||||
|
|
@ -228,12 +228,15 @@ def _bill_for(bill_derivation: BillDerivation, score: Score) -> Bill:
|
|||
|
||||
|
||||
def _solar_potential_for(
|
||||
solar_repo: SolarRepository, property_id: int
|
||||
solar_repo: SolarRepository, uprn: Optional[int]
|
||||
) -> Optional[SolarPotential]:
|
||||
"""Project the Property's persisted Google Solar `buildingInsights` JSON
|
||||
into a typed `SolarPotential` (ADR-0026), or None when none was fetched /
|
||||
the lookup returned an error payload (no `solarPotential` block)."""
|
||||
insights = solar_repo.get(property_id)
|
||||
"""Project the UPRN's persisted Google Solar `buildingInsights` JSON
|
||||
into a typed `SolarPotential` (ADR-0026), or None when there is no UPRN /
|
||||
none was fetched / the lookup returned an error payload (no `solarPotential`
|
||||
block). Solar is keyed by UPRN to match the live ``solar`` table."""
|
||||
if uprn is None:
|
||||
return None
|
||||
insights = solar_repo.get(uprn)
|
||||
if not insights or "solarPotential" not in insights:
|
||||
return None
|
||||
return SolarPotential.from_building_insights(insights)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,21 @@ from infrastructure.postgres.product_table import MaterialRow
|
|||
from repositories.product.product_repository import ProductRepository
|
||||
|
||||
|
||||
# The domain ``MeasureType`` vocabulary and the catalogue's ``material.type``
|
||||
# pgEnum drifted apart: these five measures are spelled differently on the
|
||||
# catalogue side (and querying the domain spelling raises a pgEnum DataError
|
||||
# that poisons the session's transaction). Translate them to the catalogue's
|
||||
# own vocabulary at this boundary so the domain enum stays stable. Every other
|
||||
# MeasureType already matches its material.type and maps to itself.
|
||||
_MATERIAL_TYPE_BY_MEASURE: dict[str, str] = {
|
||||
"low_energy_lighting": "low_energy_lighting_installation",
|
||||
"gas_boiler_upgrade": "boiler_upgrade",
|
||||
"system_tune_up": "roomstat_programmer_trvs",
|
||||
"system_tune_up_zoned": "time_temperature_zone_control",
|
||||
"sloping_ceiling_insulation": "room_roof_insulation",
|
||||
}
|
||||
|
||||
|
||||
class ProductPostgresRepository(ProductRepository):
|
||||
"""Reads the ``material`` catalogue table and maps an active row to a
|
||||
Product: `total_cost` becomes the fully-loaded `unit_cost_per_m2`, and the
|
||||
|
|
@ -17,13 +32,16 @@ class ProductPostgresRepository(ProductRepository):
|
|||
self._session = session
|
||||
|
||||
def get(self, measure_type: str) -> Product:
|
||||
# Resolve the domain MeasureType to the catalogue's ``material.type``
|
||||
# spelling (identity for all but the five drifted types above).
|
||||
catalogue_type = _MATERIAL_TYPE_BY_MEASURE.get(measure_type, measure_type)
|
||||
# The live catalogue holds many active rows per type; order by id so the
|
||||
# pick is deterministic (a re-seed prices the same) rather than relying
|
||||
# on the database's physical row order.
|
||||
row: MaterialRow | None = self._session.exec(
|
||||
select(MaterialRow)
|
||||
.where(
|
||||
col(MaterialRow.type) == measure_type,
|
||||
col(MaterialRow.type) == catalogue_type,
|
||||
col(MaterialRow.is_active).is_(True),
|
||||
)
|
||||
.order_by(col(MaterialRow.id))
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from typing import Any, Optional
|
|||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from infrastructure.postgres.solar_table import SolarBuildingInsightsRow
|
||||
from infrastructure.postgres.solar_table import SolarRow
|
||||
from repositories.solar.solar_repository import SolarRepository
|
||||
|
||||
|
||||
|
|
@ -12,24 +12,29 @@ class SolarPostgresRepository(SolarRepository):
|
|||
def __init__(self, session: Session) -> None:
|
||||
self._session = session
|
||||
|
||||
def save(self, property_id: int, insights: dict[str, Any]) -> None:
|
||||
def save(
|
||||
self, uprn: int, *, longitude: float, latitude: float, insights: dict[str, Any]
|
||||
) -> None:
|
||||
existing = self._session.exec(
|
||||
select(SolarBuildingInsightsRow).where(
|
||||
SolarBuildingInsightsRow.property_id == property_id
|
||||
)
|
||||
select(SolarRow).where(SolarRow.uprn == uprn)
|
||||
).first()
|
||||
if existing is None:
|
||||
self._session.add(
|
||||
SolarBuildingInsightsRow(property_id=property_id, insights=insights)
|
||||
SolarRow(
|
||||
uprn=uprn,
|
||||
longitude=longitude,
|
||||
latitude=latitude,
|
||||
google_api_response=insights,
|
||||
)
|
||||
)
|
||||
else:
|
||||
existing.insights = insights
|
||||
existing.longitude = longitude
|
||||
existing.latitude = latitude
|
||||
existing.google_api_response = insights
|
||||
self._session.add(existing)
|
||||
|
||||
def get(self, property_id: int) -> Optional[dict[str, Any]]:
|
||||
def get(self, uprn: int) -> Optional[dict[str, Any]]:
|
||||
row = self._session.exec(
|
||||
select(SolarBuildingInsightsRow).where(
|
||||
SolarBuildingInsightsRow.property_id == property_id
|
||||
)
|
||||
select(SolarRow).where(SolarRow.uprn == uprn)
|
||||
).first()
|
||||
return row.insights if row is not None else None
|
||||
return row.google_api_response if row is not None else None
|
||||
|
|
|
|||
|
|
@ -5,15 +5,19 @@ from typing import Any, Optional
|
|||
|
||||
|
||||
class SolarRepository(ABC):
|
||||
"""Persists and loads a Property's Google Solar building insights.
|
||||
"""Persists and loads a UPRN's Google Solar building insights.
|
||||
|
||||
Thin save/get over the raw fetched insights (a future SolarPotential domain
|
||||
type will derive its fields from these). Written by Ingestion, read by
|
||||
Baseline/Modelling — never re-fetched downstream (ADR-0003).
|
||||
Baseline/Modelling — never re-fetched downstream (ADR-0003). Keyed by
|
||||
``uprn`` to match the live ``solar`` table; ``longitude``/``latitude`` are
|
||||
the coordinates the fetch was made against (NOT NULL on the live table).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def save(self, property_id: int, insights: dict[str, Any]) -> None: ...
|
||||
def save(
|
||||
self, uprn: int, *, longitude: float, latitude: float, insights: dict[str, Any]
|
||||
) -> None: ...
|
||||
|
||||
@abstractmethod
|
||||
def get(self, property_id: int) -> Optional[dict[str, Any]]: ...
|
||||
def get(self, uprn: int) -> Optional[dict[str, Any]]: ...
|
||||
|
|
|
|||
114
scripts/eon/find_epc_data.py
Normal file
114
scripts/eon/find_epc_data.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||||
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService
|
||||
|
||||
# UPRNs to compare. Most are RdSAP 20.0.0 (pre-SAP10) certs — the ones the
|
||||
# Reduced-Field Synthesis mapper (ADR-0027) re-maps so the SAP10 calculator can
|
||||
# re-score them. The commented rows are non-20.0.0 neighbours kept for context.
|
||||
UPRNS: list[int] = [
|
||||
10003318624, # 20.0.0 Flat 1, 6 Alexandra Gardens, PO38 1EE
|
||||
# 10003318625, # 20.0.0 Flat 2, 6 Alexandra Gardens, PO38 1EE
|
||||
# 10003318626, # 20.0.0 Flat 3, 6 Alexandra Gardens, PO38 1EE
|
||||
# 10003318698, # 17.1 Flat 4, 6 Alexandra Gardens, PO38 1EE
|
||||
# 100062430247, # 20.0.0 Flat 5, Adelaide Court, Adelaide Place, PO33 3DG
|
||||
# 100062430248, # 20.0.0 Flat 6, Adelaide Court, Adelaide Place, PO33 3DG
|
||||
# 100062430250, # 20.0.0 Flat 8, Adelaide Court, Adelaide Place, PO33 3DG
|
||||
# 100062429797, # 20.0.0 Flat 1, 10-11 Cross Street, PO33 2AD
|
||||
# 10003320577, # 20.0.0 Flat 3, 10-11 Cross Street, PO33 2AD
|
||||
# 10003320573, # 18.0 Flat 7, 10-11 Cross Street, PO33 2AD
|
||||
# 10024248769, # 20.0.0 Flat 8, 10-11 Cross Street, PO33 2AD
|
||||
# 10024248772, # 18.0 Flat 9, 10-11 Cross Street, PO33 2AD
|
||||
]
|
||||
|
||||
|
||||
def fetch_raw_cert(service: EpcClientService, uprn: int) -> Optional[dict[str, Any]]:
|
||||
"""Pull the latest raw certificate dict for a UPRN straight off the EPC
|
||||
client. We want the RAW cert (not the mapped EpcPropertyData) because the
|
||||
lodged SAP score lives there as `energy_rating_current` — the mapper does
|
||||
not carry it onto the domain object.
|
||||
"""
|
||||
results = service._search(uprn=uprn) # pyright: ignore[reportPrivateUsage]
|
||||
if not results:
|
||||
return None
|
||||
latest = max(results, key=lambda r: r.registration_date)
|
||||
return service._fetch_certificate( # pyright: ignore[reportPrivateUsage]
|
||||
latest.certificate_number
|
||||
)
|
||||
|
||||
|
||||
def compare_sap(raw: dict[str, Any]) -> dict[str, object]:
|
||||
"""Re-score a raw cert through our SAP10 calculator and line it up against
|
||||
the figure the surveyor lodged. For a 20.0.0 cert the calculated value is
|
||||
the counterfactual "what EPC would this get under today's spec" (ADR-0027).
|
||||
"""
|
||||
epc = EpcPropertyDataMapper.from_api_response(raw)
|
||||
result = calculate_sap_from_inputs(cert_to_inputs(epc))
|
||||
|
||||
# Lodged Performance: the surveyor's original SAP score, read directly from
|
||||
# the raw cert. Bands are derived from the score the same way for both sides.
|
||||
lodged_score = raw.get("energy_rating_current")
|
||||
lodged_band = (
|
||||
Epc.from_sap_score(lodged_score).value if lodged_score is not None else "?"
|
||||
)
|
||||
our_band = Epc.from_sap_score(result.sap_score).value
|
||||
|
||||
return {
|
||||
"address": epc.address_line_1,
|
||||
"postcode": epc.postcode,
|
||||
# The SAP methodology version (RdSAP 2012 lodges 9.9x); the *schema*
|
||||
# version (20.0.0) is annotated in the UPRNS list above.
|
||||
"sap_ver": raw.get("sap_version"),
|
||||
"lodged_sap": lodged_score,
|
||||
"lodged_band": lodged_band,
|
||||
"our_sap": result.sap_score,
|
||||
"our_band": our_band,
|
||||
"delta": (
|
||||
result.sap_score - lodged_score if lodged_score is not None else None
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
# Mirror conftest.py: pull OPEN_EPC_API_TOKEN out of backend/.env so the
|
||||
# script runs standalone (`python scripts/eon/find_epc_data.py`).
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(repo_root / "backend" / ".env")
|
||||
|
||||
token = os.getenv("OPEN_EPC_API_TOKEN")
|
||||
if token is None:
|
||||
raise RuntimeError("OPEN_EPC_API_TOKEN not defined in env")
|
||||
service = EpcClientService(auth_token=token)
|
||||
|
||||
rows: list[dict[str, object]] = []
|
||||
for uprn in UPRNS:
|
||||
raw = fetch_raw_cert(service, uprn)
|
||||
if raw is None:
|
||||
print(f"UPRN {uprn}: no EPC found")
|
||||
continue
|
||||
try:
|
||||
rows.append({"uprn": uprn, **compare_sap(raw)})
|
||||
except Exception as exc: # surface, don't abort the whole sweep
|
||||
print(f"UPRN {uprn}: failed to score — {type(exc).__name__}: {exc}")
|
||||
|
||||
if not rows:
|
||||
print("No certs scored.")
|
||||
return
|
||||
|
||||
table = pd.DataFrame(rows)
|
||||
with pd.option_context("display.max_columns", None, "display.width", None):
|
||||
print(table.to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
101
scripts/eon/harvest_certs.py
Normal file
101
scripts/eon/harvest_certs.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""Harvest raw EPC certificates into a JSONL corpus for mapper tests.
|
||||
|
||||
Source: the bulk EPC dumps in downloads/certificates-YYYY.json. Each line is
|
||||
|
||||
{"certificate_number": "...", "document": "<json string>", ...}
|
||||
|
||||
where ``document`` is the cert in the exact shape
|
||||
``EpcClientService._fetch_certificate`` returns and
|
||||
``EpcPropertyDataMapper.from_api_response`` consumes (it has ``schema_type``,
|
||||
``roofs``, ``walls`` ... and matches the committed json_samples).
|
||||
|
||||
We want a balanced sample per schema so we can build out and regression-test
|
||||
the mappers (notably the incomplete ``RdSapSchema20.0.0``). Schema version
|
||||
tracks the dump year, so we read each target schema from a year that's rich in
|
||||
it and stop once its cap is full — no need to stream whole multi-GB files.
|
||||
|
||||
Year -> dominant schema (see downloads/README.txt):
|
||||
2026 -> RdSAP-Schema-21.0.1
|
||||
2021-2024 -> RdSAP-Schema-20.0.0
|
||||
|
||||
SAP-Schema-18.0.0 is a minority schema (~12% of the 2021 dump) but each year
|
||||
holds ~1.6M lines, so 2021 still yields well over 1000 — it just scans deeper
|
||||
before the cap fills. SAP-Schema-17.1 is richest in the 2019 dump (~20%).
|
||||
|
||||
21.0.0 is skipped — it's effectively absent from these dumps.
|
||||
|
||||
Run cell by cell. No API token needed — this is pure local streaming.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
DOWNLOADS = Path("downloads")
|
||||
SAMPLES = Path("backend/epc_api/json_samples")
|
||||
|
||||
# One corpus per schema, written into that schema's own json_samples folder
|
||||
# (alongside its epc.json) as corpus.jsonl. Each schema is read from a year
|
||||
# where it dominates, so we hit the cap within the first few-thousand lines.
|
||||
SOURCES: list[tuple[str, str, int]] = [
|
||||
# ("certificates-2026.json", "RdSAP-Schema-21.0.1", 1000),
|
||||
# ("certificates-2022.json", "RdSAP-Schema-20.0.0", 1000),
|
||||
# pre-SAP10 RdSAP family — NOT the SAP-Schema-* full/design-SAP family.
|
||||
# schema_type scan (first 300k lines of each dump):
|
||||
# 18.0 ~82% of certificates-2018.json
|
||||
# 17.1 dominant in 2017
|
||||
# 19.0 dominant in certificates-2020.json (~59%); only ~21% in 2019
|
||||
# (behind 18.0), so harvest from 2020.
|
||||
# 17.0 dominant in certificates-2015.json (~89%); 2016 a fallback.
|
||||
# ("certificates-2018.json", "RdSAP-Schema-18.0", 1000),
|
||||
# ("certificates-2017.json", "RdSAP-Schema-17.1", 1000),
|
||||
# ("certificates-2020.json", "RdSAP-Schema-19.0", 1000),
|
||||
("certificates-2015.json", "RdSAP-Schema-17.0", 1000),
|
||||
]
|
||||
|
||||
|
||||
def corpus_path(schema: str) -> Path:
|
||||
return SAMPLES / schema / "corpus.jsonl"
|
||||
|
||||
|
||||
# %%
|
||||
def harvest_one(filename: str, schema: str, cap: int) -> list[dict[str, object]]:
|
||||
"""Stream `filename`, returning up to `cap` cert docs of `schema`."""
|
||||
path = DOWNLOADS / filename
|
||||
docs: list[dict[str, object]] = []
|
||||
scanned = 0
|
||||
with path.open() as fh:
|
||||
for line in fh:
|
||||
if len(docs) >= cap:
|
||||
break
|
||||
scanned += 1
|
||||
try:
|
||||
doc = json.loads(json.loads(line)["document"])
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
continue
|
||||
if doc.get("schema_type") == schema:
|
||||
docs.append(doc)
|
||||
print(f"{schema}: {len(docs)}/{cap} from {filename} (scanned {scanned} lines)")
|
||||
return docs
|
||||
|
||||
|
||||
# %%
|
||||
# Build one corpus per schema, into that schema's json_samples folder.
|
||||
# Overwrites each run — deterministic and cheap.
|
||||
for filename, schema, cap in SOURCES:
|
||||
out_path = corpus_path(schema)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w") as out:
|
||||
for doc in harvest_one(filename, schema, cap):
|
||||
out.write(json.dumps(doc) + "\n")
|
||||
print(f"wrote {out_path}")
|
||||
|
||||
# %%
|
||||
# Sanity-check each corpus: line count per schema.
|
||||
for _, schema, _ in SOURCES:
|
||||
path = corpus_path(schema)
|
||||
n = sum(1 for line in path.read_text().splitlines() if line.strip())
|
||||
print(f"{schema}: {n} ({path})")
|
||||
104
scripts/eon/profile_corpus.py
Normal file
104
scripts/eon/profile_corpus.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
"""Profile a harvested RdSAP corpus — the ADR-0028 "seeing the data" table.
|
||||
|
||||
For a pre-SAP10 RdSAP corpus this prints the evidence that the inherited
|
||||
ADR-0027 coefficients transfer safely to the spec (ADR-0028 §Context):
|
||||
|
||||
* glazed_area band mix — the windowless-majority structure that forces
|
||||
synthesis (the corpus structurally cannot self-fit band-1);
|
||||
* the Validation Cohort — certs that lodge a real per-window `sap_windows`
|
||||
array, used directly rather than synthesised over;
|
||||
* observed glazing/floor ratio per band vs the inherited model's prediction
|
||||
(`0.148 x band_multiplier`) — the per-spec transfer check;
|
||||
* sentinel / shape counts (multiple_glazing_type "ND", dwelling_type as a
|
||||
plain str) that drive the schema's required->optional widening.
|
||||
|
||||
Usage (cell-by-cell or standalone):
|
||||
|
||||
python scripts/eon/profile_corpus.py RdSAP-Schema-19.0
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
SAMPLES = Path("backend/epc_api/json_samples")
|
||||
|
||||
# Inherited ADR-0027 coefficients (the single home is mapper.py; mirrored here
|
||||
# read-only for the transfer-check column).
|
||||
GLAZING_RATIO = 0.148
|
||||
BAND_MULTIPLIER = {1: 1.00, 2: 1.25, 3: 0.81, 4: 1.51, 5: 0.62}
|
||||
|
||||
|
||||
def _load(schema: str) -> list[dict[str, Any]]:
|
||||
path = SAMPLES / schema / "corpus.jsonl"
|
||||
return [
|
||||
json.loads(line) for line in path.read_text().splitlines() if line.strip()
|
||||
]
|
||||
|
||||
|
||||
def _measurement_value(raw: Any) -> Optional[float]:
|
||||
"""Window/floor areas lodge as {"value": x, ...} or a bare number."""
|
||||
if isinstance(raw, dict):
|
||||
v = raw.get("value")
|
||||
return float(v) if v is not None else None
|
||||
if isinstance(raw, (int, float)):
|
||||
return float(raw)
|
||||
return None
|
||||
|
||||
|
||||
def profile(schema: str) -> None:
|
||||
certs = _load(schema)
|
||||
n = len(certs)
|
||||
print(f"\n=== {schema} — {n} certs ===\n")
|
||||
|
||||
# glazed_area band mix
|
||||
bands = Counter(c.get("glazed_area") for c in certs)
|
||||
print("glazed_area band mix:")
|
||||
for band, count in sorted(bands.items(), key=lambda x: (x[0] is None, x[0])):
|
||||
print(f" band {band}: {count:4d} ({100 * count / n:.1f}%)")
|
||||
|
||||
# Validation Cohort — certs with a lodged per-window array
|
||||
cohort = [c for c in certs if c.get("sap_windows")]
|
||||
cohort_bands = Counter(c.get("glazed_area") for c in cohort)
|
||||
print(f"\nValidation Cohort (lodged sap_windows): {len(cohort)}/{n}")
|
||||
print(f" cohort bands: {dict(sorted(cohort_bands.items()))}")
|
||||
|
||||
# observed glazing/floor ratio per band (cohort only) vs inherited prediction
|
||||
by_band: dict[Any, list[float]] = defaultdict(list)
|
||||
for c in cohort:
|
||||
tfa = c.get("total_floor_area")
|
||||
areas = [
|
||||
_measurement_value(w.get("window_area")) for w in c["sap_windows"]
|
||||
]
|
||||
areas = [a for a in areas if a is not None]
|
||||
if tfa and areas:
|
||||
by_band[c.get("glazed_area")].append(sum(areas) / float(tfa))
|
||||
print("\nobserved glazing/floor ratio vs inherited 0.148 x multiplier:")
|
||||
print(" band observed (n) predicted")
|
||||
for band in sorted(by_band):
|
||||
obs = by_band[band]
|
||||
mean = sum(obs) / len(obs)
|
||||
pred = GLAZING_RATIO * BAND_MULTIPLIER.get(band, 1.0)
|
||||
print(f" {band:<4} {mean:.3f} (n={len(obs):>2}) {pred:.3f}")
|
||||
|
||||
# sentinels / shapes driving the schema widening
|
||||
mgt_int = Counter(
|
||||
c["multiple_glazing_type"]
|
||||
for c in certs
|
||||
if isinstance(c.get("multiple_glazing_type"), int)
|
||||
)
|
||||
mgt_nd = sum(1 for c in certs if c.get("multiple_glazing_type") == "ND")
|
||||
dt_str = sum(1 for c in certs if isinstance(c.get("dwelling_type"), str))
|
||||
print("\nsentinels / shapes:")
|
||||
print(f" multiple_glazing_type int codes: {dict(sorted(mgt_int.items()))}")
|
||||
print(f" multiple_glazing_type 'ND': {mgt_nd}/{n}")
|
||||
print(f" dwelling_type as plain str: {dt_str}/{n}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
schema = sys.argv[1] if len(sys.argv) > 1 else "RdSAP-Schema-19.0"
|
||||
profile(schema)
|
||||
|
|
@ -124,16 +124,16 @@ def _s3_parquet_reader(bucket: str) -> ParquetReader:
|
|||
return read
|
||||
|
||||
|
||||
def _spatial_for(
|
||||
repo: GeospatialS3Repository, uprn: int
|
||||
) -> Optional[SpatialReference]:
|
||||
def _spatial_for(repo: GeospatialS3Repository, uprn: int) -> Optional[SpatialReference]:
|
||||
"""The UPRN's spatial reference (coordinates + planning protections), or
|
||||
None when S3 doesn't cover it — a missing reference must not abort the run,
|
||||
so a lookup error degrades to None (unrestricted, no solar)."""
|
||||
try:
|
||||
return repo.spatial_for(uprn)
|
||||
except Exception as error: # noqa: BLE001 — S3/parquet hiccup is non-fatal
|
||||
print(f" spatial lookup failed for uprn {uprn}: {type(error).__name__}: {error}")
|
||||
print(
|
||||
f" spatial lookup failed for uprn {uprn}: {type(error).__name__}: {error}"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -186,7 +186,9 @@ def _parse_measures(raw: Optional[str]) -> Optional[frozenset[MeasureType]]:
|
|||
(consider every modelled measure) when unset. Raises on an unknown type."""
|
||||
if raw is None:
|
||||
return None
|
||||
return frozenset(MeasureType(token.strip()) for token in raw.split(",") if token.strip())
|
||||
return frozenset(
|
||||
MeasureType(token.strip()) for token in raw.split(",") if token.strip()
|
||||
)
|
||||
|
||||
|
||||
def _context_summary(
|
||||
|
|
@ -238,8 +240,17 @@ def _persist(
|
|||
uow.epc.save(epc, property_id=property_id, portfolio_id=portfolio_id)
|
||||
if spatial is not None:
|
||||
uow.spatial.save(uprn, spatial)
|
||||
# The live `solar` table is keyed by UPRN and needs the fetch's
|
||||
# coordinates; insights are only present when those coordinates were
|
||||
# (see `_solar_insights_for`), so `spatial.coordinates` is non-None here.
|
||||
if solar_insights is not None:
|
||||
uow.solar.save(property_id, solar_insights)
|
||||
assert spatial is not None and spatial.coordinates is not None
|
||||
uow.solar.save(
|
||||
uprn,
|
||||
longitude=spatial.coordinates.longitude,
|
||||
latitude=spatial.coordinates.latitude,
|
||||
insights=solar_insights,
|
||||
)
|
||||
uow.plan.save(
|
||||
plan,
|
||||
property_id=property_id,
|
||||
|
|
@ -252,8 +263,12 @@ def _persist(
|
|||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("property_ids", type=int, nargs="+", help="Property ids to model")
|
||||
parser.add_argument("--goal", default="C", help="target band when no --scenario-id (default C)")
|
||||
parser.add_argument(
|
||||
"property_ids", type=int, nargs="+", help="Property ids to model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--goal", default="C", help="target band when no --scenario-id (default C)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scenario-id", type=int, default=None, help="model against this DB Scenario"
|
||||
)
|
||||
|
|
@ -263,12 +278,16 @@ def main() -> None:
|
|||
help="comma-separated measure types to consider (default: all)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--portfolio-id", type=int, default=None, help="portfolio id (required for --persist)"
|
||||
"--portfolio-id",
|
||||
type=int,
|
||||
default=None,
|
||||
help="portfolio id (required for --persist)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--persist",
|
||||
action="store_true",
|
||||
help="WRITE the inputs + Plan to the DB (default: inspect only, no writes)",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-solar",
|
||||
|
|
@ -355,7 +374,14 @@ def main() -> None:
|
|||
solar_insights=solar_insights,
|
||||
plan=plan,
|
||||
)
|
||||
except Exception as error: # noqa: BLE001 — one bad property must not stop the run
|
||||
except (
|
||||
Exception
|
||||
) as error: # noqa: BLE001 — one bad property must not stop the run
|
||||
# A failed catalogue query (e.g. a `material.type` enum mismatch)
|
||||
# aborts the shared session's transaction; without a rollback every
|
||||
# subsequent property reports `InFailedSqlTransaction` and masks its
|
||||
# own real error. Reset so each property surfaces what's wrong.
|
||||
catalogue_session.rollback()
|
||||
line = f"property {property_id} (uprn {uprn}): ERROR — {type(error).__name__}: {error}"
|
||||
print(line + "\n")
|
||||
md_lines.append(f"## Property {property_id}\n\n`{line}`\n")
|
||||
|
|
|
|||
|
|
@ -2,9 +2,19 @@
|
|||
This script prepares the data for the financial model
|
||||
"""
|
||||
|
||||
import os
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(".env.local")
|
||||
# The retired `property_details_epc` table is no longer populated under the new
|
||||
# backend, so the EPC descriptive fields are sourced live from the EPC service
|
||||
# instead (which needs OPEN_EPC_API_TOKEN — also lives in backend/.env).
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_REPO_ROOT / "backend" / ".env")
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
|
@ -14,25 +24,97 @@ from backend.app.db.connection import db_engine, db_read_session
|
|||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
RecommendationMaterials,
|
||||
)
|
||||
from backend.app.db.models.portfolio import (
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyDetailsSpatial,
|
||||
)
|
||||
from backend.app.db.functions.materials_functions import get_materials
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService
|
||||
from collections import defaultdict
|
||||
from sqlalchemy import func
|
||||
|
||||
PORTFOLIO_ID = 632
|
||||
SCENARIOS = [1144]
|
||||
|
||||
def _description_text(item: Any) -> str:
|
||||
"""Display text for one raw-cert EPC feature. Handles both schema shapes:
|
||||
20.0.0 stores ``description`` as a plain string; 17.1 wraps it as a
|
||||
``{"value": ..., "language": ...}`` LanguageString."""
|
||||
if not isinstance(item, dict):
|
||||
return ""
|
||||
desc = item.get("description")
|
||||
if isinstance(desc, dict):
|
||||
desc = desc.get("value")
|
||||
return str(desc or "")
|
||||
|
||||
|
||||
def _join_descriptions(value: Any) -> str:
|
||||
"""Flatten a raw-cert EPC feature into a display string. The new EPC API
|
||||
returns these as a list of feature dicts (walls/roofs/floors/main_heating),
|
||||
a single feature dict (hot_water/window/lighting), or null."""
|
||||
if isinstance(value, list):
|
||||
return "; ".join(t for t in (_description_text(d) for d in value) if t)
|
||||
return _description_text(value)
|
||||
|
||||
|
||||
def _is_expired(registration_date: Optional[str]) -> Optional[bool]:
|
||||
"""An EPC is valid for 10 years from its lodgement (registration) date."""
|
||||
if not registration_date:
|
||||
return None
|
||||
try:
|
||||
lodged = datetime.fromisoformat(registration_date[:10]).date()
|
||||
except ValueError:
|
||||
return None
|
||||
return (date.today() - lodged).days > 365 * 10
|
||||
|
||||
|
||||
def epc_details_from_service(svc: EpcClientService, uprn: Optional[int]) -> dict[str, Any]:
|
||||
"""Mock the retired ``property_details_epc`` row from the live EPC service:
|
||||
fetch the UPRN's latest raw certificate and flatten the descriptive fields
|
||||
the export needs. Returns ``{}`` when the UPRN has no EPC (the property then
|
||||
carries blank EPC columns rather than being dropped)."""
|
||||
if uprn is None:
|
||||
return {}
|
||||
results = svc._search(uprn=uprn) # pyright: ignore[reportPrivateUsage]
|
||||
if not results:
|
||||
return {}
|
||||
latest = max(results, key=lambda r: r.registration_date)
|
||||
raw = svc._fetch_certificate(latest.certificate_number) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
def _to_int(value: Any) -> Optional[int]:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
current_sap = _to_int(raw.get("energy_rating_current"))
|
||||
return {
|
||||
"walls": _join_descriptions(raw.get("walls")),
|
||||
"roof": _join_descriptions(raw.get("roofs")),
|
||||
"floor": _join_descriptions(raw.get("floors")),
|
||||
"windows": _join_descriptions(raw.get("window")),
|
||||
"heating": _join_descriptions(raw.get("main_heating")),
|
||||
"heating_controls": _join_descriptions(raw.get("main_heating_controls")),
|
||||
"hot_water": _join_descriptions(raw.get("hot_water")),
|
||||
"lighting": _join_descriptions(raw.get("lighting")),
|
||||
"total_floor_area": raw.get("total_floor_area"),
|
||||
"lodgement_date": raw.get("registration_date"),
|
||||
"is_expired": _is_expired(raw.get("registration_date")),
|
||||
# Baseline SAP/band/postcode aren't on the new `property` table, so take
|
||||
# the lodged figures off the cert (the assessment re-scores from these).
|
||||
"postcode": raw.get("postcode"),
|
||||
"current_epc_rating": raw.get("current_energy_efficiency_band"),
|
||||
"current_sap_points": current_sap,
|
||||
"original_sap_points": current_sap,
|
||||
}
|
||||
|
||||
PORTFOLIO_ID = 785
|
||||
SCENARIOS = [1266]
|
||||
scenario_names = {
|
||||
1144: "EPC C",
|
||||
1266: "EPC C",
|
||||
}
|
||||
|
||||
project_name = "Calico Refresh"
|
||||
project_name = "Small request for EON"
|
||||
|
||||
|
||||
def get_data(portfolio_id, scenario_ids):
|
||||
|
|
@ -42,29 +124,29 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Properties
|
||||
# --------------------
|
||||
# `property_details_epc` is dead under the new backend, so read the base
|
||||
# Property rows and source the EPC descriptive fields live from the EPC
|
||||
# service (one cert fetch per property).
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
session.query(PropertyModel)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
properties_data = [
|
||||
{
|
||||
**{
|
||||
col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for p in properties_query
|
||||
]
|
||||
epc_service = EpcClientService(os.environ["OPEN_EPC_API_TOKEN"])
|
||||
properties_data = []
|
||||
for p in properties_query:
|
||||
base = {col.name: getattr(p, col.name) for col in PropertyModel.__table__.columns}
|
||||
# `property_id` is the key the recommendations merge joins on; the
|
||||
# Property's own PK is its `id`.
|
||||
base["property_id"] = p.id
|
||||
# Fill EPC fields from the service; for columns that also exist on the
|
||||
# Property row (postcode, SAP points, rating), only fill when the row's
|
||||
# value is missing so genuine Property data is never clobbered.
|
||||
for key, value in epc_details_from_service(epc_service, p.uprn).items():
|
||||
if base.get(key) is None:
|
||||
base[key] = value
|
||||
properties_data.append(base)
|
||||
|
||||
# --------------------
|
||||
# Plans
|
||||
|
|
@ -115,17 +197,13 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendations (NO materials yet)
|
||||
# --------------------
|
||||
# The `plan_recommendations` m2m is retired (ADR-0017): a Recommendation
|
||||
# links to its Plan directly via `recommendation.plan_id`.
|
||||
recommendations_query = (
|
||||
session.query(
|
||||
Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id
|
||||
)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
|
||||
session.query(Recommendation, PlanModel.scenario_id)
|
||||
.join(PlanModel, PlanModel.id == Recommendation.plan_id)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False),
|
||||
)
|
||||
|
|
@ -135,7 +213,7 @@ def get_data(portfolio_id, scenario_ids):
|
|||
recommendations_data = [
|
||||
{
|
||||
**{
|
||||
col.name: getattr(r.Recommendation, col.name)
|
||||
col.name: getattr(r[0], col.name)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"scenario_id": r.scenario_id,
|
||||
|
|
@ -328,7 +406,33 @@ for scenario_id in SCENARIOS:
|
|||
if col not in df.columns:
|
||||
df[col] = ""
|
||||
|
||||
# A per-recommendation detail sheet (one row per recommended measure) so the
|
||||
# measures and their costs are readable directly, not just pivoted into the
|
||||
# wide `properties` sheet.
|
||||
recs_detail = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
].copy()
|
||||
recs_detail = recs_detail[recs_detail["default"]]
|
||||
detail_cols = [
|
||||
c
|
||||
for c in [
|
||||
"property_id",
|
||||
"measure_type",
|
||||
"description",
|
||||
"estimated_cost",
|
||||
"sap_points",
|
||||
"co2_equivalent_savings",
|
||||
"kwh_savings",
|
||||
"energy_cost_savings",
|
||||
]
|
||||
if c in recs_detail.columns
|
||||
]
|
||||
recs_detail = recs_detail[detail_cols].sort_values(
|
||||
["property_id", "estimated_cost"], ascending=[True, False]
|
||||
)
|
||||
|
||||
# Create excel to store to
|
||||
filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx"
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
recs_detail.to_excel(writer, sheet_name="recommendations", index=False)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from infrastructure.epc_client.epc_client_service import EpcClientService
|
|||
from datatypes.epc.search import EpcSearchResult
|
||||
from infrastructure.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from infrastructure.epc_client.tests.conftest import make_search_row
|
||||
from tests.infrastructure.epc_client.conftest import make_search_row
|
||||
|
||||
|
||||
def _mock_response(status_code=200, json_data=None, headers=None):
|
||||
105
tests/infrastructure/epc_client/test_mapper_corpus.py
Normal file
105
tests/infrastructure/epc_client/test_mapper_corpus.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Drive EpcPropertyDataMapper against the harvested cert corpus.
|
||||
|
||||
The corpus (backend/epc_api/json_samples/cert_corpus.jsonl) is a balanced
|
||||
sample of raw API certs across schema versions, produced by
|
||||
scripts/eon/harvest_certs.py. Each line is one cert in the exact shape
|
||||
``from_api_response`` consumes.
|
||||
|
||||
* 21.0.0 / 21.0.1 — supported today; these are a regression guard.
|
||||
* 20.0.0 — pre-SAP10 Reduced-Field Synthesis (ADR-0027).
|
||||
* 19.0/18.0/17.1/17.0 — pre-SAP10 family, inherited coefficients (ADR-0028).
|
||||
All 1000 certs each parse and map; the strict guard
|
||||
below keeps the whole bucket honest.
|
||||
|
||||
If the corpus hasn't been harvested yet, every parametrisation is skipped.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
SAMPLES = Path("backend/epc_api/json_samples")
|
||||
SUPPORTED = {
|
||||
"RdSAP-Schema-21.0.1",
|
||||
"RdSAP-Schema-20.0.0",
|
||||
"RdSAP-Schema-19.0",
|
||||
"RdSAP-Schema-18.0",
|
||||
"RdSAP-Schema-17.1",
|
||||
"RdSAP-Schema-17.0",
|
||||
}
|
||||
|
||||
|
||||
def _load(schema: str) -> list[dict[str, Any]]:
|
||||
"""Load one schema's harvested corpus (json_samples/<schema>/corpus.jsonl)."""
|
||||
path = SAMPLES / schema / "corpus.jsonl"
|
||||
if not path.exists():
|
||||
return []
|
||||
return [
|
||||
json.loads(line) for line in path.read_text().splitlines() if line.strip()
|
||||
]
|
||||
|
||||
|
||||
def _cases(schemas: set[str]) -> list[Any]:
|
||||
certs = [(s, c) for s in schemas for c in _load(s)]
|
||||
if not certs:
|
||||
return [
|
||||
pytest.param(
|
||||
None,
|
||||
marks=pytest.mark.skip(reason=f"no {schemas} corpus harvested"),
|
||||
id="empty",
|
||||
)
|
||||
]
|
||||
return [
|
||||
pytest.param(c, id=f"{s}-{i}") for i, (s, c) in enumerate(certs)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cert", _cases(SUPPORTED))
|
||||
def test_supported_schemas_map(cert: dict[str, Any]) -> None:
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
def test_rdsap_19_0_synthesises_windows_for_windowless_cert() -> None:
|
||||
"""ADR-0028 Reduced-Field Synthesis: a reduced 19.0 cert lodges no
|
||||
per-window geometry, only a glazed_area band — the mapper synthesises a
|
||||
4-way N/E/S/W split rather than leaving the dwelling windowless."""
|
||||
# Arrange — a band-1 cert with no lodged sap_windows array
|
||||
certs = _load("RdSAP-Schema-19.0")
|
||||
if not certs:
|
||||
pytest.skip("no RdSAP-Schema-19.0 corpus harvested")
|
||||
cert = next(
|
||||
c for c in certs if c.get("glazed_area") == 1 and not c.get("sap_windows")
|
||||
)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — N, E, S, W (SAP orientation codes 1, 3, 5, 7)
|
||||
assert [w.orientation for w in result.sap_windows] == [1, 3, 5, 7]
|
||||
|
||||
|
||||
def test_rdsap_17_0_synthesises_windows_for_windowless_cert() -> None:
|
||||
"""ADR-0028 Reduced-Field Synthesis: a reduced 17.0 cert lodges no
|
||||
per-window geometry, only a glazed_area band — the mapper synthesises a
|
||||
4-way N/E/S/W split rather than leaving the dwelling windowless."""
|
||||
# Arrange — a band-1 cert with no lodged sap_windows array
|
||||
certs = _load("RdSAP-Schema-17.0")
|
||||
if not certs:
|
||||
pytest.skip("no RdSAP-Schema-17.0 corpus harvested")
|
||||
cert = next(
|
||||
c for c in certs if c.get("glazed_area") == 1 and not c.get("sap_windows")
|
||||
)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert — N, E, S, W (SAP orientation codes 1, 3, 5, 7)
|
||||
assert [w.orientation for w in result.sap_windows] == [1, 3, 5, 7]
|
||||
|
|
@ -105,23 +105,24 @@ class FakeEpcRepo(EpcRepository):
|
|||
|
||||
|
||||
class FakeSolarRepo(SolarRepository):
|
||||
"""In-memory Google Solar insights store. Seed `by_property` to hydrate a
|
||||
Property's potential for a Modelling read; `saved` records writes for an
|
||||
Ingestion-side assertion. Returns None for an unseeded Property (no solar
|
||||
data fetched) — the solar Generator then offers nothing."""
|
||||
"""In-memory Google Solar insights store keyed by UPRN. Seed `by_uprn` to
|
||||
hydrate a UPRN's potential for a Modelling read; `saved` records writes
|
||||
(uprn, longitude, latitude, insights) for an Ingestion-side assertion.
|
||||
Returns None for an unseeded UPRN (no solar data fetched) — the solar
|
||||
Generator then offers nothing."""
|
||||
|
||||
def __init__(
|
||||
self, by_property: Optional[dict[int, dict[str, Any]]] = None
|
||||
def __init__(self, by_uprn: Optional[dict[int, dict[str, Any]]] = None) -> None:
|
||||
self.saved: list[tuple[int, float, float, dict[str, Any]]] = []
|
||||
self._by_uprn: dict[int, dict[str, Any]] = dict(by_uprn or {})
|
||||
|
||||
def save(
|
||||
self, uprn: int, *, longitude: float, latitude: float, insights: dict[str, Any]
|
||||
) -> None:
|
||||
self.saved: list[tuple[int, dict[str, Any]]] = []
|
||||
self._by_property: dict[int, dict[str, Any]] = dict(by_property or {})
|
||||
self.saved.append((uprn, longitude, latitude, insights))
|
||||
self._by_uprn[uprn] = insights
|
||||
|
||||
def save(self, property_id: int, insights: dict[str, Any]) -> None:
|
||||
self.saved.append((property_id, insights))
|
||||
self._by_property[property_id] = insights
|
||||
|
||||
def get(self, property_id: int) -> Optional[dict[str, Any]]:
|
||||
return self._by_property.get(property_id)
|
||||
def get(self, uprn: int) -> Optional[dict[str, Any]]:
|
||||
return self._by_uprn.get(uprn)
|
||||
|
||||
|
||||
class FakeSpatialRepo(SpatialRepository):
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ def test_first_run_baselines_through_repos_and_is_idempotent_on_rerun(
|
|||
),
|
||||
MaterialRow(
|
||||
id=4,
|
||||
type="low_energy_lighting",
|
||||
type="low_energy_lighting_installation",
|
||||
total_cost=8.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
@ -170,7 +170,7 @@ def test_first_run_baselines_through_repos_and_is_idempotent_on_rerun(
|
|||
),
|
||||
MaterialRow(
|
||||
id=6,
|
||||
type="gas_boiler_upgrade",
|
||||
type="boiler_upgrade",
|
||||
total_cost=3000.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
@ -178,7 +178,7 @@ def test_first_run_baselines_through_repos_and_is_idempotent_on_rerun(
|
|||
),
|
||||
MaterialRow(
|
||||
id=7,
|
||||
type="system_tune_up",
|
||||
type="roomstat_programmer_trvs",
|
||||
total_cost=500.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
@ -186,7 +186,7 @@ def test_first_run_baselines_through_repos_and_is_idempotent_on_rerun(
|
|||
),
|
||||
MaterialRow(
|
||||
id=8,
|
||||
type="system_tune_up_zoned",
|
||||
type="time_temperature_zone_control",
|
||||
total_cost=900.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
@ -318,7 +318,7 @@ def test_modelling_optimises_and_persists_a_multi_measure_plan(
|
|||
),
|
||||
MaterialRow(
|
||||
id=4,
|
||||
type="low_energy_lighting",
|
||||
type="low_energy_lighting_installation",
|
||||
total_cost=8.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
@ -507,7 +507,7 @@ def test_modelling_recommends_nothing_when_already_at_the_target_band(
|
|||
),
|
||||
MaterialRow(
|
||||
id=13,
|
||||
type="low_energy_lighting",
|
||||
type="low_energy_lighting_installation",
|
||||
total_cost=8.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ def test_ingestion_persists_epc_and_threads_coords_into_solar() -> None:
|
|||
# fetcher, solar persisted, batch committed once.
|
||||
assert epc_repo.saved == [(epc, 10)]
|
||||
assert solar_fetcher.calls == [(-0.1278, 51.5074)]
|
||||
assert solar_repo.saved == [(10, insights)]
|
||||
assert solar_repo.saved == [(12345, -0.1278, 51.5074, insights)]
|
||||
assert uow.commits == 1
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -62,24 +62,24 @@ def test_solar_potential_for_returns_none_when_no_insights() -> None:
|
|||
solar = FakeSolarRepo()
|
||||
|
||||
# Act / Assert
|
||||
assert _solar_potential_for(solar, property_id=42) is None
|
||||
assert _solar_potential_for(solar, uprn=42) is None
|
||||
|
||||
|
||||
def test_solar_potential_for_returns_none_for_an_error_payload() -> None:
|
||||
# Arrange — the Solar API found no building; Ingestion persisted the error
|
||||
# dict (no `solarPotential` block).
|
||||
solar = FakeSolarRepo(by_property={7: {"error": "ENTITY_NOT_FOUND"}})
|
||||
solar = FakeSolarRepo(by_uprn={7: {"error": "ENTITY_NOT_FOUND"}})
|
||||
|
||||
# Act / Assert
|
||||
assert _solar_potential_for(solar, property_id=7) is None
|
||||
assert _solar_potential_for(solar, uprn=7) is None
|
||||
|
||||
|
||||
def test_solar_potential_for_projects_valid_insights() -> None:
|
||||
# Arrange
|
||||
solar = FakeSolarRepo(by_property={7: _insights()})
|
||||
solar = FakeSolarRepo(by_uprn={7: _insights()})
|
||||
|
||||
# Act
|
||||
potential = _solar_potential_for(solar, property_id=7)
|
||||
potential = _solar_potential_for(solar, uprn=7)
|
||||
|
||||
# Assert — the real London example projects to the 46-rung ladder.
|
||||
assert potential is not None
|
||||
|
|
@ -91,7 +91,7 @@ def test_candidate_recommendations_includes_solar_when_potential_present() -> No
|
|||
# Arrange — a solar-eligible house with a feasible potential.
|
||||
epc = _eligible_house()
|
||||
potential = _solar_potential_for(
|
||||
FakeSolarRepo(by_property={1: _insights()}), property_id=1
|
||||
FakeSolarRepo(by_uprn={1: _insights()}), uprn=1
|
||||
)
|
||||
|
||||
# Act
|
||||
|
|
@ -121,7 +121,7 @@ def test_considered_measures_restricts_candidates_to_the_allowlist() -> None:
|
|||
# unrestricted run offers Solar PV alongside any fabric/heating candidates.
|
||||
epc = _eligible_house()
|
||||
potential = _solar_potential_for(
|
||||
FakeSolarRepo(by_property={1: json.loads(_INSIGHTS_FIXTURE.read_text())}), 1
|
||||
FakeSolarRepo(by_uprn={1: json.loads(_INSIGHTS_FIXTURE.read_text())}), 1
|
||||
)
|
||||
|
||||
# Act — restrict the run to Solar PV only.
|
||||
|
|
|
|||
|
|
@ -42,6 +42,38 @@ def test_get_maps_active_material_to_product_with_contingency(
|
|||
assert abs(product.contingency_rate - 0.10) <= 1e-9
|
||||
|
||||
|
||||
def test_get_resolves_a_drifted_measure_type_to_the_catalogue_spelling(
|
||||
db_engine: Engine,
|
||||
) -> None:
|
||||
# Arrange — the domain MeasureType `low_energy_lighting` is spelled
|
||||
# `low_energy_lighting_installation` on the live `material.type` enum; the
|
||||
# repo must translate it so the lookup finds the catalogue row. The returned
|
||||
# Product keeps the *domain* measure type (what the Plan/Recommendation
|
||||
# store), not the catalogue spelling.
|
||||
with Session(db_engine) as session:
|
||||
session.add(
|
||||
MaterialRow(
|
||||
id=1,
|
||||
type="low_energy_lighting_installation",
|
||||
total_cost=5.0,
|
||||
cost_unit="gbp_per_unit",
|
||||
is_active=True,
|
||||
description="Low energy lighting",
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
|
||||
# Act
|
||||
with Session(db_engine) as session:
|
||||
product: Product = ProductPostgresRepository(session).get(
|
||||
"low_energy_lighting"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert product.measure_type == "low_energy_lighting"
|
||||
assert abs(product.unit_cost_per_m2 - 5.0) <= 1e-9
|
||||
|
||||
|
||||
def test_get_picks_the_lowest_id_when_several_active_rows_share_a_type(
|
||||
db_engine: Engine,
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,9 @@ def test_building_insights_round_trip(db_engine: Engine) -> None:
|
|||
|
||||
# Act
|
||||
with Session(db_engine) as session:
|
||||
SolarPostgresRepository(session).save(property_id=5, insights=insights)
|
||||
SolarPostgresRepository(session).save(
|
||||
5, longitude=-0.1, latitude=51.5, insights=insights
|
||||
)
|
||||
session.commit()
|
||||
with Session(db_engine) as session:
|
||||
reloaded = SolarPostgresRepository(session).get(5)
|
||||
|
|
|
|||
29
tests/utilities/floats.py
Normal file
29
tests/utilities/floats.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""Human-readable float assertions for tests.
|
||||
|
||||
Replaces the cryptic ``assert abs(actual - expected) <= 1e-9`` idiom with a
|
||||
named helper that says what it checks and prints a useful message on failure.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def assert_float_matches(
|
||||
actual: float,
|
||||
expected: float,
|
||||
*,
|
||||
tol: float = 1e-9,
|
||||
msg: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Assert ``actual`` equals ``expected`` within an absolute tolerance.
|
||||
|
||||
``tol`` defaults to ``1e-9`` for exact-arithmetic checks; pass a looser
|
||||
value (e.g. ``tol=1e-4``) where the comparison is physically approximate.
|
||||
"""
|
||||
diff = abs(actual - expected)
|
||||
detail = f"\n{msg}" if msg else ""
|
||||
assert diff <= tol, (
|
||||
f"expected {expected!r} but got {actual!r} "
|
||||
f"(|diff| {diff:g} > tol {tol:g}){detail}"
|
||||
)
|
||||
Loading…
Add table
Reference in a new issue