mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
added test for a 1000 examples
This commit is contained in:
parent
06cb4f7b6e
commit
3b7d26fe34
10 changed files with 2490 additions and 252 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -303,3 +303,6 @@ backlog/*
|
|||
|
||||
# Local Claude config files
|
||||
.claude/*modelling_cohort.csv
|
||||
|
||||
# Local EPC debug cache (scripts/eon)
|
||||
scripts/eon/epc_cache.pkl
|
||||
|
|
|
|||
1000
backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
1000
backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl
Normal file
1000
backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -6,7 +6,6 @@ from typing import Final, List, Optional, Union
|
|||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
|
||||
|
||||
_API_EXTENSION = re.compile(r"^Extension\s+(\d+)$")
|
||||
|
||||
|
||||
|
|
@ -36,9 +35,7 @@ class BuildingPartIdentifier(Enum):
|
|||
OTHER = "other"
|
||||
|
||||
@classmethod
|
||||
def from_api_string(
|
||||
cls, api_identifier: Optional[str]
|
||||
) -> "BuildingPartIdentifier":
|
||||
def from_api_string(cls, api_identifier: Optional[str]) -> "BuildingPartIdentifier":
|
||||
"""Map a gov-EPC API `BuildingPart.identifier` to its canonical
|
||||
member. "Main Dwelling" → MAIN; "Extension N" → EXTENSION_N
|
||||
(for N in 1..4). `None` (permitted by the 21_0_1 schema) and
|
||||
|
|
@ -76,6 +73,7 @@ class Addendum:
|
|||
Present on ~43% of real RdSAP certs (stone-walls / system-build / a list of
|
||||
numeric improvement codes the assessor wanted to call out).
|
||||
"""
|
||||
|
||||
stone_walls: Optional[bool] = None
|
||||
system_build: Optional[bool] = None
|
||||
addendum_numbers: Optional[List[int]] = None
|
||||
|
|
@ -184,10 +182,12 @@ class SapVentilation:
|
|||
flueless_gas_fires_count: Optional[int] = None
|
||||
ventilation_in_pcdf_database: Optional[bool] = None
|
||||
# SAP10.2 §2 cert lodgements not previously surfaced on this type.
|
||||
sheltered_sides: Optional[int] = None # (19) — cert assessor lodge, 0..4
|
||||
sheltered_sides: Optional[int] = None # (19) — cert assessor lodge, 0..4
|
||||
has_suspended_timber_floor: Optional[bool] = None # (12) gate
|
||||
suspended_timber_floor_sealed: Optional[bool] = None
|
||||
has_draught_lobby: Optional[bool] = None # (13) gate (overrides .draught_lobby for §2 cascade)
|
||||
has_draught_lobby: Optional[bool] = (
|
||||
None # (13) gate (overrides .draught_lobby for §2 cascade)
|
||||
)
|
||||
# SAP 10.2 §2 (17a) — air permeability at 4 Pa from the low-pressure
|
||||
# Pulse pressure test, m³/h per m² of envelope area. When present the
|
||||
# cascade routes (18) via the AP4 formula `0.263 × AP4^0.924 + (8)`.
|
||||
|
|
@ -302,10 +302,11 @@ class PhotovoltaicArray:
|
|||
measured PV configuration; `photovoltaic_supply` carries the fallback
|
||||
`percent_roof_area` estimate when the surveyor could not confirm details.
|
||||
"""
|
||||
|
||||
peak_power: float
|
||||
pitch: int
|
||||
orientation: int
|
||||
overshading: int
|
||||
orientation: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -515,7 +516,9 @@ class SapBuildingPart:
|
|||
floor_u_value_known: Optional[bool] = None
|
||||
|
||||
roof_construction: Optional[int] = None
|
||||
roof_construction_type: Optional[str] = None # str from site notes e.g. "PS Pitched, sloping ceiling"
|
||||
roof_construction_type: Optional[str] = (
|
||||
None # str from site notes e.g. "PS Pitched, sloping ceiling"
|
||||
)
|
||||
roof_insulation_location: Optional[Union[int, str]] = (
|
||||
None # TODO: make enum/mapping?
|
||||
)
|
||||
|
|
@ -592,6 +595,7 @@ class RenewableHeatIncentive:
|
|||
baseline `space_heating_kwh` and `hot_water_kwh` for SAP10 properties (used as ML
|
||||
training targets per ADR-0007).
|
||||
"""
|
||||
|
||||
space_heating_kwh: float
|
||||
water_heating_kwh: float
|
||||
impact_of_loft_insulation_kwh: Optional[float] = None
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -307,6 +307,36 @@ class TestFromRdSapSchema21_0_0:
|
|||
# photovoltaic_supply is None when the measured shape is present
|
||||
assert result.sap_energy_source.photovoltaic_supply is None
|
||||
|
||||
def test_photovoltaic_array_orientation_nd_nulls_only_that_field(self) -> None:
|
||||
# Arrange — a 3-array dwelling where the middle array lodges the RdSAP
|
||||
# 'ND' ("Not Defined") sentinel for orientation. Regression for the
|
||||
# real 21.0.1 cert 5236-4425-7600-0474-2292: 'ND' must null ONLY that
|
||||
# array's orientation, not crash the int() coercion and drop every
|
||||
# array (which happened when 'ND' was handled in the shared
|
||||
# _measurement_value coercer instead of field-scoped _pv_orientation).
|
||||
data = load("21_0_0.json")
|
||||
data["sap_energy_source"]["photovoltaic_supply"] = [
|
||||
[{"pitch": 3, "peak_power": 2.0, "orientation": 3, "overshading": 1}],
|
||||
[{"pitch": 1, "peak_power": 2.0, "orientation": "ND", "overshading": 1}],
|
||||
[{"pitch": 3, "peak_power": 2.0, "orientation": 7, "overshading": 1}],
|
||||
]
|
||||
schema = from_dict(RdSapSchema21_0_0, data)
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_rdsap_schema_21_0_0(schema)
|
||||
|
||||
# Assert — all three arrays survive; only the 'ND' orientation is None,
|
||||
# and its sibling fields + the other arrays keep their real values.
|
||||
arrays = result.sap_energy_source.photovoltaic_arrays
|
||||
assert arrays is not None
|
||||
assert len(arrays) == 3
|
||||
assert [a.orientation for a in arrays] == [3, None, 7]
|
||||
nd_array = arrays[1]
|
||||
assert nd_array.orientation is None
|
||||
assert nd_array.peak_power == 2.0
|
||||
assert nd_array.pitch == 1
|
||||
assert nd_array.overshading == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema 21.0.1 (most comprehensive — full field coverage)
|
||||
|
|
|
|||
|
|
@ -718,7 +718,7 @@ _HOURS_PER_DAY_OVER_1000: Final[float] = 0.024
|
|||
_DAYS_PER_MONTH: Final[tuple[int, ...]] = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
def _pv_annual_s_kwh_per_m2(
|
||||
orientation_code: int,
|
||||
orientation_code: Optional[int],
|
||||
pitch_code: int,
|
||||
climate: "int | PostcodeClimate",
|
||||
) -> float:
|
||||
|
|
@ -727,8 +727,10 @@ def _pv_annual_s_kwh_per_m2(
|
|||
monthly Appendix U3.2 surface flux over the year. `climate` selects
|
||||
Table U3/U4 region (UK average = 0 for the rating cascade) or a
|
||||
`PostcodeClimate` from PCDB Table 172 for the demand cascade.
|
||||
Returns 0.0 for unrecognised orientation codes (cert octants outside
|
||||
1..8) — these PV arrays contribute nothing."""
|
||||
Returns 0.0 for an unknown orientation (None when the cert lodged 'ND',
|
||||
or a code outside 1..8) — these PV arrays contribute nothing."""
|
||||
if orientation_code is None:
|
||||
return 0.0
|
||||
orientation = ORIENTATION_BY_SAP10_CODE.get(orientation_code)
|
||||
if orientation is None:
|
||||
return 0.0
|
||||
|
|
@ -2475,8 +2477,10 @@ def _pv_array_monthly_generation_kwh(
|
|||
E_PV,m = 0.8 × kWp × ZPV × (days_m × S_m × 24 / 1000)
|
||||
where S_m is the §U3.2 surface flux (W/m²). Returns a 12-zero tuple
|
||||
for arrays whose orientation isn't mapped in
|
||||
`ORIENTATION_BY_SAP10_CODE` (defensive — current cert lodgements
|
||||
always cover 1..8)."""
|
||||
`ORIENTATION_BY_SAP10_CODE` (defensive — None when the cert lodged
|
||||
'ND', else a code outside 1..8)."""
|
||||
if array.orientation is None:
|
||||
return (0.0,) * 12
|
||||
orientation = ORIENTATION_BY_SAP10_CODE.get(array.orientation)
|
||||
if orientation is None:
|
||||
return (0.0,) * 12
|
||||
|
|
|
|||
|
|
@ -1229,7 +1229,7 @@ def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]:
|
|||
total_power += a.peak_power
|
||||
weighted_pitch += a.pitch * a.peak_power
|
||||
weighted_overshading += a.overshading * a.peak_power
|
||||
if a.orientation in _OCTANT_NAMES:
|
||||
if a.orientation is not None and a.orientation in _OCTANT_NAMES:
|
||||
octant_power[_OCTANT_NAMES[a.orientation]] += a.peak_power
|
||||
aggregates["has_pv"] = True
|
||||
aggregates["pv_capacity_source"] = "measured"
|
||||
|
|
|
|||
68
infrastructure/epc_client/tests/test_mapper_corpus.py
Normal file
68
infrastructure/epc_client/tests/test_mapper_corpus.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
"""Drive EpcPropertyDataMapper against the harvested cert corpus.
|
||||
|
||||
The corpus (backend/epc_api/json_samples/cert_corpus.jsonl) is a balanced
|
||||
sample of raw API certs across schema versions, produced by
|
||||
scripts/eon/harvest_certs.py. Each line is one cert in the exact shape
|
||||
``from_api_response`` consumes.
|
||||
|
||||
* 21.0.0 / 21.0.1 — supported today; these are a regression guard.
|
||||
* 20.0.0 — mapper is incomplete, so these are xfail. As
|
||||
``from_rdsap_schema_20_0_0`` is built out they flip to
|
||||
xpass; when the whole bucket passes, drop the xfail
|
||||
marker and the strict guard below will keep it honest.
|
||||
|
||||
If the corpus hasn't been harvested yet, every parametrisation is skipped.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
SAMPLES = Path("backend/epc_api/json_samples")
|
||||
SUPPORTED = {"RdSAP-Schema-21.0.1"}
|
||||
WIP = {"RdSAP-Schema-20.0.0"}
|
||||
|
||||
|
||||
def _load(schema: str) -> list[dict[str, Any]]:
|
||||
"""Load one schema's harvested corpus (json_samples/<schema>/corpus.jsonl)."""
|
||||
path = SAMPLES / schema / "corpus.jsonl"
|
||||
if not path.exists():
|
||||
return []
|
||||
return [
|
||||
json.loads(line) for line in path.read_text().splitlines() if line.strip()
|
||||
]
|
||||
|
||||
|
||||
def _cases(schemas: set[str]) -> list[Any]:
|
||||
certs = [(s, c) for s in schemas for c in _load(s)]
|
||||
if not certs:
|
||||
return [
|
||||
pytest.param(
|
||||
None,
|
||||
marks=pytest.mark.skip(reason=f"no {schemas} corpus harvested"),
|
||||
id="empty",
|
||||
)
|
||||
]
|
||||
return [
|
||||
pytest.param(c, id=f"{s}-{i}") for i, (s, c) in enumerate(certs)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cert", _cases(SUPPORTED))
|
||||
def test_supported_schemas_map(cert: dict[str, Any]) -> None:
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="RdSapSchema20.0.0 mapper is incomplete", strict=False)
|
||||
@pytest.mark.parametrize("cert", _cases(WIP))
|
||||
def test_wip_schema_20_maps(cert: dict[str, Any]) -> None:
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
|
@ -25,5 +25,6 @@ testpaths =
|
|||
etl/epc_clean/tests
|
||||
etl/hubspot/tests
|
||||
etl/spatial/tests
|
||||
infrastructure/epc_client/tests/
|
||||
markers =
|
||||
integration: mark a test as an integration test
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue