slice 11a: PV array aggregates + capacity_source flag

Fifteen PV features land: has_pv (bool), pv_capacity_source (str
categorical: measured / estimated_from_roof_area / none),
pv_array_count, pv_total_peak_power_kw, eight peak-power-by-octant
columns (pv_peak_power_kw_{N..NW}), peak-power-weighted
pv_avg_pitch and pv_avg_overshading (nullable), and
pv_percent_roof_area (nullable — populated only on the estimated
branch).

Dispatches on the SAP10 EpcPropertyData.SapEnergySource shapes added
in slice 10.5: photovoltaic_arrays populates → measured;
photovoltaic_supply.none_or_no_details.percent_roof_area > 0 →
estimated; everything else → none. percent_roof_area == 0 is the
canonical no-PV payload and surfaces as 'none', not 'estimated'.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 16:04:15 +00:00
parent b050348927
commit 706d1b5b66
3 changed files with 260 additions and 0 deletions

View file

@ -15,6 +15,9 @@ from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
InstantaneousWwhrs,
MainHeatingDetail,
PhotovoltaicArray,
PhotovoltaicSupply,
PhotovoltaicSupplyNoneOrNoDetails,
RenewableHeatIncentive,
SapBuildingPart,
SapEnergySource,
@ -26,6 +29,22 @@ from datatypes.epc.domain.epc_property_data import (
)
def make_pv_array(
*,
peak_power: float = 2.0,
pitch: int = 2,
orientation: int = 5,
overshading: int = 1,
) -> PhotovoltaicArray:
"""Build a PhotovoltaicArray with SAP10 defaults (2 kW, S-facing)."""
return PhotovoltaicArray(
peak_power=peak_power,
pitch=pitch,
orientation=orientation,
overshading=overshading,
)
def make_main_heating_detail(
*,
main_fuel_type: Union[int, str] = 26, # mains gas (not community)
@ -191,6 +210,8 @@ def make_minimal_sap10_epc(
sap_windows: Optional[list[SapWindow]] = None,
sap_building_parts: Optional[list[SapBuildingPart]] = None,
sap_heating: Optional[SapHeating] = None,
photovoltaic_arrays: Optional[list[PhotovoltaicArray]] = None,
photovoltaic_supply_percent_roof_area: Optional[int] = None,
) -> EpcPropertyData:
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
return EpcPropertyData(
@ -221,6 +242,18 @@ def make_minimal_sap10_epc(
is_dwelling_export_capable=False,
wind_turbines_terrain_type="Suburban",
electricity_smart_meter_present=False,
photovoltaic_arrays=list(photovoltaic_arrays)
if photovoltaic_arrays is not None
else None,
photovoltaic_supply=(
PhotovoltaicSupply(
none_or_no_details=PhotovoltaicSupplyNoneOrNoDetails(
percent_roof_area=photovoltaic_supply_percent_roof_area
)
)
if photovoltaic_supply_percent_roof_area is not None
else None
),
),
sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [],
solar_water_heating=solar_water_heating,

View file

@ -9,6 +9,7 @@ from domain.ml.tests._fixtures import (
make_floor_dimension,
make_main_heating_detail,
make_minimal_sap10_epc,
make_pv_array,
make_sap_heating,
make_window,
)
@ -812,6 +813,135 @@ def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() ->
assert row["primary_central_heating_pump_age"] is None
_PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"has_pv": (bool, False, False),
"pv_capacity_source": (str, False, True),
"pv_array_count": (int, False, False),
"pv_total_peak_power_kw": (float, False, False),
"pv_peak_power_kw_N": (float, False, False),
"pv_peak_power_kw_NE": (float, False, False),
"pv_peak_power_kw_E": (float, False, False),
"pv_peak_power_kw_SE": (float, False, False),
"pv_peak_power_kw_S": (float, False, False),
"pv_peak_power_kw_SW": (float, False, False),
"pv_peak_power_kw_W": (float, False, False),
"pv_peak_power_kw_NW": (float, False, False),
"pv_avg_pitch": (float, True, False),
"pv_avg_overshading": (float, True, False),
"pv_percent_roof_area": (int, True, False),
}
def test_schema_advertises_pv_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_PV_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_aggregates_measured_pv_arrays() -> None:
# Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one
# with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW).
arrays = [
make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1),
make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2),
make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1),
]
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_arrays=arrays
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is True
assert row["pv_capacity_source"] == "measured"
assert row["pv_array_count"] == 3
assert row["pv_total_peak_power_kw"] == pytest.approx(4.9)
# Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0
assert row["pv_peak_power_kw_S"] == pytest.approx(3.9)
assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0)
for other in ("N", "NE", "E", "SE", "SW", "W"):
assert row[f"pv_peak_power_kw_{other}"] == 0.0
# Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380
assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9)
# Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379
assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9)
# No percent_roof_area when measured
assert row["pv_percent_roof_area"] is None
def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None:
# Arrange — surveyor couldn't confirm config; only percent_roof_area is known
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_supply_percent_roof_area=25
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is True
assert row["pv_capacity_source"] == "estimated_from_roof_area"
assert row["pv_array_count"] == 0
assert row["pv_total_peak_power_kw"] == 0.0
assert row["pv_percent_roof_area"] == 25
assert row["pv_avg_pitch"] is None
assert row["pv_avg_overshading"] is None
def test_to_row_returns_pv_no_when_no_pv_data() -> None:
# Arrange — no measured arrays, no percent_roof_area, no PV at all
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is False
assert row["pv_capacity_source"] == "none"
assert row["pv_array_count"] == 0
assert row["pv_total_peak_power_kw"] == 0.0
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0
assert row["pv_percent_roof_area"] is None
assert row["pv_avg_pitch"] is None
assert row["pv_avg_overshading"] is None
def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None:
# Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is
# the canonical "no PV" payload on schema-21 EPCs.
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_supply_percent_roof_area=0
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is False
assert row["pv_capacity_source"] == "none"
assert row["pv_percent_roof_area"] is None
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [

View file

@ -16,6 +16,7 @@ from datatypes.epc.domain.epc import Epc
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapEnergySource,
SapHeating,
SapWindow,
)
@ -337,6 +338,48 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=True, categorical=True,
description="Secondary heating fuel SAP10 code (shares main_fuel enum).",
),
# PV — has-pv + measured-vs-estimated capacity + array aggregates
"has_pv": ColumnSpec(
dtype=bool, nullable=False,
description="True if the property has any photovoltaic system (measured or estimated).",
),
"pv_capacity_source": ColumnSpec(
dtype=str, nullable=False, categorical=True,
description=(
"How PV capacity is known: 'measured' (per-array peak_power available), "
"'estimated_from_roof_area' (only percent_roof_area), or 'none'."
),
),
"pv_array_count": ColumnSpec(
dtype=int, nullable=False,
description="Number of measured PV arrays (0 unless capacity_source is 'measured').",
),
"pv_total_peak_power_kw": ColumnSpec(
dtype=float, nullable=False,
description="Sum of peak_power (kW) across measured PV arrays.",
),
**{
f"pv_peak_power_kw_{name}": ColumnSpec(
dtype=float, nullable=False,
description=(
f"Sum of peak_power (kW) for measured PV arrays facing {name} "
"(SAP orientation code)."
),
)
for name in _OCTANT_NAMES.values()
},
"pv_avg_pitch": ColumnSpec(
dtype=float, nullable=True,
description="Peak-power-weighted mean array pitch (SAP code); null when no measured arrays.",
),
"pv_avg_overshading": ColumnSpec(
dtype=float, nullable=True,
description="Peak-power-weighted mean overshading (SAP code); null when no measured arrays.",
),
"pv_percent_roof_area": ColumnSpec(
dtype=int, nullable=True,
description="Percent of roof covered by PV — populated only when capacity_source = 'estimated_from_roof_area'.",
),
}
@ -414,6 +457,7 @@ class EpcMlTransform:
window_aggregates = _window_aggregates(epc.sap_windows)
building_part_aggregates = _building_part_aggregates(epc.sap_building_parts)
heating_aggregates = _heating_aggregates(epc.sap_heating)
pv_aggregates = _pv_aggregates(epc.sap_energy_source)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -450,6 +494,8 @@ class EpcMlTransform:
**building_part_aggregates,
# Features — heating system (primary slot + water + secondary)
**heating_aggregates,
# Features — PV (capacity source + array aggregates by SAP octant)
**pv_aggregates,
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -472,6 +518,57 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
return apply_ucl_correction(float(epc.energy_consumption_current), band)
def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]:
"""Aggregate the PV side of sap_energy_source into 15 columns.
`pv_capacity_source` discriminates the three PV states:
- 'measured': es.photovoltaic_arrays is non-empty array aggregates populate
- 'estimated_from_roof_area': only percent_roof_area > 0 is known
- 'none': no PV (either no payload, or percent_roof_area == 0)
"""
octant_power: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()}
aggregates: dict[str, Any] = {
"has_pv": False,
"pv_capacity_source": "none",
"pv_array_count": 0,
"pv_total_peak_power_kw": 0.0,
**{f"pv_peak_power_kw_{name}": 0.0 for name in _OCTANT_NAMES.values()},
"pv_avg_pitch": None,
"pv_avg_overshading": None,
"pv_percent_roof_area": None,
}
arrays = es.photovoltaic_arrays
if arrays:
total_power = 0.0
weighted_pitch = 0.0
weighted_overshading = 0.0
for a in arrays:
total_power += a.peak_power
weighted_pitch += a.pitch * a.peak_power
weighted_overshading += a.overshading * a.peak_power
if a.orientation in _OCTANT_NAMES:
octant_power[_OCTANT_NAMES[a.orientation]] += a.peak_power
aggregates["has_pv"] = True
aggregates["pv_capacity_source"] = "measured"
aggregates["pv_array_count"] = len(arrays)
aggregates["pv_total_peak_power_kw"] = total_power
for name, power in octant_power.items():
aggregates[f"pv_peak_power_kw_{name}"] = power
if total_power > 0:
aggregates["pv_avg_pitch"] = weighted_pitch / total_power
aggregates["pv_avg_overshading"] = weighted_overshading / total_power
return aggregates
supply = es.photovoltaic_supply
if supply is not None and supply.none_or_no_details.percent_roof_area > 0:
aggregates["has_pv"] = True
aggregates["pv_capacity_source"] = "estimated_from_roof_area"
aggregates["pv_percent_roof_area"] = supply.none_or_no_details.percent_roof_area
return aggregates
def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
"""Aggregate sap_heating into 15 heating-feature columns.