From 706d1b5b662b71d5bedabf184eb18d81766db322 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 16:04:15 +0000 Subject: [PATCH] slice 11a: PV array aggregates + capacity_source flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fifteen PV features land: has_pv (bool), pv_capacity_source (str categorical: measured / estimated_from_roof_area / none), pv_array_count, pv_total_peak_power_kw, eight peak-power-by-octant columns (pv_peak_power_kw_{N..NW}), peak-power-weighted pv_avg_pitch and pv_avg_overshading (nullable), and pv_percent_roof_area (nullable — populated only on the estimated branch). Dispatches on the SAP10 EpcPropertyData.SapEnergySource shapes added in slice 10.5: photovoltaic_arrays populates → measured; photovoltaic_supply.none_or_no_details.percent_roof_area > 0 → estimated; everything else → none. percent_roof_area == 0 is the canonical no-PV payload and surfaces as 'none', not 'estimated'. Co-Authored-By: Claude Opus 4.7 --- .../domain/src/domain/ml/tests/_fixtures.py | 33 +++++ .../src/domain/ml/tests/test_transform.py | 130 ++++++++++++++++++ packages/domain/src/domain/ml/transform.py | 97 +++++++++++++ 3 files changed, 260 insertions(+) diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index 5f34cfca..51cde6dc 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -15,6 +15,9 @@ from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, InstantaneousWwhrs, MainHeatingDetail, + PhotovoltaicArray, + PhotovoltaicSupply, + PhotovoltaicSupplyNoneOrNoDetails, RenewableHeatIncentive, SapBuildingPart, SapEnergySource, @@ -26,6 +29,22 @@ from datatypes.epc.domain.epc_property_data import ( ) +def make_pv_array( + *, + peak_power: float = 2.0, + pitch: int = 2, + orientation: int = 5, + overshading: int = 1, +) -> PhotovoltaicArray: + """Build a PhotovoltaicArray with SAP10 defaults (2 kW, S-facing).""" + return PhotovoltaicArray( + peak_power=peak_power, + pitch=pitch, + orientation=orientation, + overshading=overshading, + ) + + def make_main_heating_detail( *, main_fuel_type: Union[int, str] = 26, # mains gas (not community) @@ -191,6 +210,8 @@ def make_minimal_sap10_epc( sap_windows: Optional[list[SapWindow]] = None, sap_building_parts: Optional[list[SapBuildingPart]] = None, sap_heating: Optional[SapHeating] = None, + photovoltaic_arrays: Optional[list[PhotovoltaicArray]] = None, + photovoltaic_supply_percent_roof_area: Optional[int] = None, ) -> EpcPropertyData: """Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets.""" return EpcPropertyData( @@ -221,6 +242,18 @@ def make_minimal_sap10_epc( is_dwelling_export_capable=False, wind_turbines_terrain_type="Suburban", electricity_smart_meter_present=False, + photovoltaic_arrays=list(photovoltaic_arrays) + if photovoltaic_arrays is not None + else None, + photovoltaic_supply=( + PhotovoltaicSupply( + none_or_no_details=PhotovoltaicSupplyNoneOrNoDetails( + percent_roof_area=photovoltaic_supply_percent_roof_area + ) + ) + if photovoltaic_supply_percent_roof_area is not None + else None + ), ), sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [], solar_water_heating=solar_water_heating, diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 0e43344e..c358893b 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -9,6 +9,7 @@ from domain.ml.tests._fixtures import ( make_floor_dimension, make_main_heating_detail, make_minimal_sap10_epc, + make_pv_array, make_sap_heating, make_window, ) @@ -812,6 +813,135 @@ def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> assert row["primary_central_heating_pump_age"] is None +_PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { + # name → (dtype, nullable, categorical) + "has_pv": (bool, False, False), + "pv_capacity_source": (str, False, True), + "pv_array_count": (int, False, False), + "pv_total_peak_power_kw": (float, False, False), + "pv_peak_power_kw_N": (float, False, False), + "pv_peak_power_kw_NE": (float, False, False), + "pv_peak_power_kw_E": (float, False, False), + "pv_peak_power_kw_SE": (float, False, False), + "pv_peak_power_kw_S": (float, False, False), + "pv_peak_power_kw_SW": (float, False, False), + "pv_peak_power_kw_W": (float, False, False), + "pv_peak_power_kw_NW": (float, False, False), + "pv_avg_pitch": (float, True, False), + "pv_avg_overshading": (float, True, False), + "pv_percent_roof_area": (int, True, False), +} + + +def test_schema_advertises_pv_features() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + for name, (expected_dtype, expected_nullable, expected_categorical) in ( + _PV_FEATURES_NULLABLE.items() + ): + assert name in schema.feature_columns, name + column = schema.feature_columns[name] + assert column.dtype is expected_dtype, name + assert column.nullable is expected_nullable, name + assert column.categorical is expected_categorical, name + + +def test_to_row_aggregates_measured_pv_arrays() -> None: + # Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one + # with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW). + arrays = [ + make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1), + make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2), + make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1), + ] + epc = make_minimal_sap10_epc( + energy_rating_current=82, photovoltaic_arrays=arrays + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_pv"] is True + assert row["pv_capacity_source"] == "measured" + assert row["pv_array_count"] == 3 + assert row["pv_total_peak_power_kw"] == pytest.approx(4.9) + # Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0 + assert row["pv_peak_power_kw_S"] == pytest.approx(3.9) + assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0) + for other in ("N", "NE", "E", "SE", "SW", "W"): + assert row[f"pv_peak_power_kw_{other}"] == 0.0 + # Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380 + assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9) + # Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379 + assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9) + # No percent_roof_area when measured + assert row["pv_percent_roof_area"] is None + + +def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None: + # Arrange — surveyor couldn't confirm config; only percent_roof_area is known + epc = make_minimal_sap10_epc( + energy_rating_current=82, photovoltaic_supply_percent_roof_area=25 + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_pv"] is True + assert row["pv_capacity_source"] == "estimated_from_roof_area" + assert row["pv_array_count"] == 0 + assert row["pv_total_peak_power_kw"] == 0.0 + assert row["pv_percent_roof_area"] == 25 + assert row["pv_avg_pitch"] is None + assert row["pv_avg_overshading"] is None + + +def test_to_row_returns_pv_no_when_no_pv_data() -> None: + # Arrange — no measured arrays, no percent_roof_area, no PV at all + epc = make_minimal_sap10_epc(energy_rating_current=82) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_pv"] is False + assert row["pv_capacity_source"] == "none" + assert row["pv_array_count"] == 0 + assert row["pv_total_peak_power_kw"] == 0.0 + for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"): + assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0 + assert row["pv_percent_roof_area"] is None + assert row["pv_avg_pitch"] is None + assert row["pv_avg_overshading"] is None + + +def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None: + # Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is + # the canonical "no PV" payload on schema-21 EPCs. + epc = make_minimal_sap10_epc( + energy_rating_current=82, photovoltaic_supply_percent_roof_area=0 + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_pv"] is False + assert row["pv_capacity_source"] == "none" + assert row["pv_percent_roof_area"] is None + + def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None: # Arrange — two windows with transmission details; one without. sap_windows = [ diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 95ddf45b..9aa9a595 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -16,6 +16,7 @@ from datatypes.epc.domain.epc import Epc from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, SapBuildingPart, + SapEnergySource, SapHeating, SapWindow, ) @@ -337,6 +338,48 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, categorical=True, description="Secondary heating fuel SAP10 code (shares main_fuel enum).", ), + # PV — has-pv + measured-vs-estimated capacity + array aggregates + "has_pv": ColumnSpec( + dtype=bool, nullable=False, + description="True if the property has any photovoltaic system (measured or estimated).", + ), + "pv_capacity_source": ColumnSpec( + dtype=str, nullable=False, categorical=True, + description=( + "How PV capacity is known: 'measured' (per-array peak_power available), " + "'estimated_from_roof_area' (only percent_roof_area), or 'none'." + ), + ), + "pv_array_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of measured PV arrays (0 unless capacity_source is 'measured').", + ), + "pv_total_peak_power_kw": ColumnSpec( + dtype=float, nullable=False, + description="Sum of peak_power (kW) across measured PV arrays.", + ), + **{ + f"pv_peak_power_kw_{name}": ColumnSpec( + dtype=float, nullable=False, + description=( + f"Sum of peak_power (kW) for measured PV arrays facing {name} " + "(SAP orientation code)." + ), + ) + for name in _OCTANT_NAMES.values() + }, + "pv_avg_pitch": ColumnSpec( + dtype=float, nullable=True, + description="Peak-power-weighted mean array pitch (SAP code); null when no measured arrays.", + ), + "pv_avg_overshading": ColumnSpec( + dtype=float, nullable=True, + description="Peak-power-weighted mean overshading (SAP code); null when no measured arrays.", + ), + "pv_percent_roof_area": ColumnSpec( + dtype=int, nullable=True, + description="Percent of roof covered by PV — populated only when capacity_source = 'estimated_from_roof_area'.", + ), } @@ -414,6 +457,7 @@ class EpcMlTransform: window_aggregates = _window_aggregates(epc.sap_windows) building_part_aggregates = _building_part_aggregates(epc.sap_building_parts) heating_aggregates = _heating_aggregates(epc.sap_heating) + pv_aggregates = _pv_aggregates(epc.sap_energy_source) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -450,6 +494,8 @@ class EpcMlTransform: **building_part_aggregates, # Features — heating system (primary slot + water + secondary) **heating_aggregates, + # Features — PV (capacity source + array aggregates by SAP octant) + **pv_aggregates, # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -472,6 +518,57 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]: return apply_ucl_correction(float(epc.energy_consumption_current), band) +def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]: + """Aggregate the PV side of sap_energy_source into 15 columns. + + `pv_capacity_source` discriminates the three PV states: + - 'measured': es.photovoltaic_arrays is non-empty — array aggregates populate + - 'estimated_from_roof_area': only percent_roof_area > 0 is known + - 'none': no PV (either no payload, or percent_roof_area == 0) + """ + octant_power: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()} + aggregates: dict[str, Any] = { + "has_pv": False, + "pv_capacity_source": "none", + "pv_array_count": 0, + "pv_total_peak_power_kw": 0.0, + **{f"pv_peak_power_kw_{name}": 0.0 for name in _OCTANT_NAMES.values()}, + "pv_avg_pitch": None, + "pv_avg_overshading": None, + "pv_percent_roof_area": None, + } + + arrays = es.photovoltaic_arrays + if arrays: + total_power = 0.0 + weighted_pitch = 0.0 + weighted_overshading = 0.0 + for a in arrays: + total_power += a.peak_power + weighted_pitch += a.pitch * a.peak_power + weighted_overshading += a.overshading * a.peak_power + if a.orientation in _OCTANT_NAMES: + octant_power[_OCTANT_NAMES[a.orientation]] += a.peak_power + aggregates["has_pv"] = True + aggregates["pv_capacity_source"] = "measured" + aggregates["pv_array_count"] = len(arrays) + aggregates["pv_total_peak_power_kw"] = total_power + for name, power in octant_power.items(): + aggregates[f"pv_peak_power_kw_{name}"] = power + if total_power > 0: + aggregates["pv_avg_pitch"] = weighted_pitch / total_power + aggregates["pv_avg_overshading"] = weighted_overshading / total_power + return aggregates + + supply = es.photovoltaic_supply + if supply is not None and supply.none_or_no_details.percent_roof_area > 0: + aggregates["has_pv"] = True + aggregates["pv_capacity_source"] = "estimated_from_roof_area" + aggregates["pv_percent_roof_area"] = supply.none_or_no_details.percent_roof_area + + return aggregates + + def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: """Aggregate sap_heating into 15 heating-feature columns.