mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 11a: PV array aggregates + capacity_source flag
Fifteen PV features land: has_pv (bool), pv_capacity_source (str
categorical: measured / estimated_from_roof_area / none),
pv_array_count, pv_total_peak_power_kw, eight peak-power-by-octant
columns (pv_peak_power_kw_{N..NW}), peak-power-weighted
pv_avg_pitch and pv_avg_overshading (nullable), and
pv_percent_roof_area (nullable — populated only on the estimated
branch).
Dispatches on the SAP10 EpcPropertyData.SapEnergySource shapes added
in slice 10.5: photovoltaic_arrays populates → measured;
photovoltaic_supply.none_or_no_details.percent_roof_area > 0 →
estimated; everything else → none. percent_roof_area == 0 is the
canonical no-PV payload and surfaces as 'none', not 'estimated'.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
b050348927
commit
706d1b5b66
3 changed files with 260 additions and 0 deletions
|
|
@ -15,6 +15,9 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
EpcPropertyData,
|
||||
InstantaneousWwhrs,
|
||||
MainHeatingDetail,
|
||||
PhotovoltaicArray,
|
||||
PhotovoltaicSupply,
|
||||
PhotovoltaicSupplyNoneOrNoDetails,
|
||||
RenewableHeatIncentive,
|
||||
SapBuildingPart,
|
||||
SapEnergySource,
|
||||
|
|
@ -26,6 +29,22 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
)
|
||||
|
||||
|
||||
def make_pv_array(
|
||||
*,
|
||||
peak_power: float = 2.0,
|
||||
pitch: int = 2,
|
||||
orientation: int = 5,
|
||||
overshading: int = 1,
|
||||
) -> PhotovoltaicArray:
|
||||
"""Build a PhotovoltaicArray with SAP10 defaults (2 kW, S-facing)."""
|
||||
return PhotovoltaicArray(
|
||||
peak_power=peak_power,
|
||||
pitch=pitch,
|
||||
orientation=orientation,
|
||||
overshading=overshading,
|
||||
)
|
||||
|
||||
|
||||
def make_main_heating_detail(
|
||||
*,
|
||||
main_fuel_type: Union[int, str] = 26, # mains gas (not community)
|
||||
|
|
@ -191,6 +210,8 @@ def make_minimal_sap10_epc(
|
|||
sap_windows: Optional[list[SapWindow]] = None,
|
||||
sap_building_parts: Optional[list[SapBuildingPart]] = None,
|
||||
sap_heating: Optional[SapHeating] = None,
|
||||
photovoltaic_arrays: Optional[list[PhotovoltaicArray]] = None,
|
||||
photovoltaic_supply_percent_roof_area: Optional[int] = None,
|
||||
) -> EpcPropertyData:
|
||||
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
|
||||
return EpcPropertyData(
|
||||
|
|
@ -221,6 +242,18 @@ def make_minimal_sap10_epc(
|
|||
is_dwelling_export_capable=False,
|
||||
wind_turbines_terrain_type="Suburban",
|
||||
electricity_smart_meter_present=False,
|
||||
photovoltaic_arrays=list(photovoltaic_arrays)
|
||||
if photovoltaic_arrays is not None
|
||||
else None,
|
||||
photovoltaic_supply=(
|
||||
PhotovoltaicSupply(
|
||||
none_or_no_details=PhotovoltaicSupplyNoneOrNoDetails(
|
||||
percent_roof_area=photovoltaic_supply_percent_roof_area
|
||||
)
|
||||
)
|
||||
if photovoltaic_supply_percent_roof_area is not None
|
||||
else None
|
||||
),
|
||||
),
|
||||
sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [],
|
||||
solar_water_heating=solar_water_heating,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from domain.ml.tests._fixtures import (
|
|||
make_floor_dimension,
|
||||
make_main_heating_detail,
|
||||
make_minimal_sap10_epc,
|
||||
make_pv_array,
|
||||
make_sap_heating,
|
||||
make_window,
|
||||
)
|
||||
|
|
@ -812,6 +813,135 @@ def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() ->
|
|||
assert row["primary_central_heating_pump_age"] is None
|
||||
|
||||
|
||||
_PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
||||
# name → (dtype, nullable, categorical)
|
||||
"has_pv": (bool, False, False),
|
||||
"pv_capacity_source": (str, False, True),
|
||||
"pv_array_count": (int, False, False),
|
||||
"pv_total_peak_power_kw": (float, False, False),
|
||||
"pv_peak_power_kw_N": (float, False, False),
|
||||
"pv_peak_power_kw_NE": (float, False, False),
|
||||
"pv_peak_power_kw_E": (float, False, False),
|
||||
"pv_peak_power_kw_SE": (float, False, False),
|
||||
"pv_peak_power_kw_S": (float, False, False),
|
||||
"pv_peak_power_kw_SW": (float, False, False),
|
||||
"pv_peak_power_kw_W": (float, False, False),
|
||||
"pv_peak_power_kw_NW": (float, False, False),
|
||||
"pv_avg_pitch": (float, True, False),
|
||||
"pv_avg_overshading": (float, True, False),
|
||||
"pv_percent_roof_area": (int, True, False),
|
||||
}
|
||||
|
||||
|
||||
def test_schema_advertises_pv_features() -> None:
|
||||
# Arrange
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
schema = transform.schema()
|
||||
|
||||
# Assert
|
||||
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
||||
_PV_FEATURES_NULLABLE.items()
|
||||
):
|
||||
assert name in schema.feature_columns, name
|
||||
column = schema.feature_columns[name]
|
||||
assert column.dtype is expected_dtype, name
|
||||
assert column.nullable is expected_nullable, name
|
||||
assert column.categorical is expected_categorical, name
|
||||
|
||||
|
||||
def test_to_row_aggregates_measured_pv_arrays() -> None:
|
||||
# Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one
|
||||
# with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW).
|
||||
arrays = [
|
||||
make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1),
|
||||
make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2),
|
||||
make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1),
|
||||
]
|
||||
epc = make_minimal_sap10_epc(
|
||||
energy_rating_current=82, photovoltaic_arrays=arrays
|
||||
)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["has_pv"] is True
|
||||
assert row["pv_capacity_source"] == "measured"
|
||||
assert row["pv_array_count"] == 3
|
||||
assert row["pv_total_peak_power_kw"] == pytest.approx(4.9)
|
||||
# Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0
|
||||
assert row["pv_peak_power_kw_S"] == pytest.approx(3.9)
|
||||
assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0)
|
||||
for other in ("N", "NE", "E", "SE", "SW", "W"):
|
||||
assert row[f"pv_peak_power_kw_{other}"] == 0.0
|
||||
# Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380
|
||||
assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9)
|
||||
# Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379
|
||||
assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9)
|
||||
# No percent_roof_area when measured
|
||||
assert row["pv_percent_roof_area"] is None
|
||||
|
||||
|
||||
def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None:
|
||||
# Arrange — surveyor couldn't confirm config; only percent_roof_area is known
|
||||
epc = make_minimal_sap10_epc(
|
||||
energy_rating_current=82, photovoltaic_supply_percent_roof_area=25
|
||||
)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["has_pv"] is True
|
||||
assert row["pv_capacity_source"] == "estimated_from_roof_area"
|
||||
assert row["pv_array_count"] == 0
|
||||
assert row["pv_total_peak_power_kw"] == 0.0
|
||||
assert row["pv_percent_roof_area"] == 25
|
||||
assert row["pv_avg_pitch"] is None
|
||||
assert row["pv_avg_overshading"] is None
|
||||
|
||||
|
||||
def test_to_row_returns_pv_no_when_no_pv_data() -> None:
|
||||
# Arrange — no measured arrays, no percent_roof_area, no PV at all
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["has_pv"] is False
|
||||
assert row["pv_capacity_source"] == "none"
|
||||
assert row["pv_array_count"] == 0
|
||||
assert row["pv_total_peak_power_kw"] == 0.0
|
||||
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
|
||||
assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0
|
||||
assert row["pv_percent_roof_area"] is None
|
||||
assert row["pv_avg_pitch"] is None
|
||||
assert row["pv_avg_overshading"] is None
|
||||
|
||||
|
||||
def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None:
|
||||
# Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is
|
||||
# the canonical "no PV" payload on schema-21 EPCs.
|
||||
epc = make_minimal_sap10_epc(
|
||||
energy_rating_current=82, photovoltaic_supply_percent_roof_area=0
|
||||
)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["has_pv"] is False
|
||||
assert row["pv_capacity_source"] == "none"
|
||||
assert row["pv_percent_roof_area"] is None
|
||||
|
||||
|
||||
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
|
||||
# Arrange — two windows with transmission details; one without.
|
||||
sap_windows = [
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from datatypes.epc.domain.epc import Epc
|
|||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
SapBuildingPart,
|
||||
SapEnergySource,
|
||||
SapHeating,
|
||||
SapWindow,
|
||||
)
|
||||
|
|
@ -337,6 +338,48 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
|||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary heating fuel SAP10 code (shares main_fuel enum).",
|
||||
),
|
||||
# PV — has-pv + measured-vs-estimated capacity + array aggregates
|
||||
"has_pv": ColumnSpec(
|
||||
dtype=bool, nullable=False,
|
||||
description="True if the property has any photovoltaic system (measured or estimated).",
|
||||
),
|
||||
"pv_capacity_source": ColumnSpec(
|
||||
dtype=str, nullable=False, categorical=True,
|
||||
description=(
|
||||
"How PV capacity is known: 'measured' (per-array peak_power available), "
|
||||
"'estimated_from_roof_area' (only percent_roof_area), or 'none'."
|
||||
),
|
||||
),
|
||||
"pv_array_count": ColumnSpec(
|
||||
dtype=int, nullable=False,
|
||||
description="Number of measured PV arrays (0 unless capacity_source is 'measured').",
|
||||
),
|
||||
"pv_total_peak_power_kw": ColumnSpec(
|
||||
dtype=float, nullable=False,
|
||||
description="Sum of peak_power (kW) across measured PV arrays.",
|
||||
),
|
||||
**{
|
||||
f"pv_peak_power_kw_{name}": ColumnSpec(
|
||||
dtype=float, nullable=False,
|
||||
description=(
|
||||
f"Sum of peak_power (kW) for measured PV arrays facing {name} "
|
||||
"(SAP orientation code)."
|
||||
),
|
||||
)
|
||||
for name in _OCTANT_NAMES.values()
|
||||
},
|
||||
"pv_avg_pitch": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Peak-power-weighted mean array pitch (SAP code); null when no measured arrays.",
|
||||
),
|
||||
"pv_avg_overshading": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Peak-power-weighted mean overshading (SAP code); null when no measured arrays.",
|
||||
),
|
||||
"pv_percent_roof_area": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Percent of roof covered by PV — populated only when capacity_source = 'estimated_from_roof_area'.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -414,6 +457,7 @@ class EpcMlTransform:
|
|||
window_aggregates = _window_aggregates(epc.sap_windows)
|
||||
building_part_aggregates = _building_part_aggregates(epc.sap_building_parts)
|
||||
heating_aggregates = _heating_aggregates(epc.sap_heating)
|
||||
pv_aggregates = _pv_aggregates(epc.sap_energy_source)
|
||||
return {
|
||||
# Features — geometry
|
||||
"total_floor_area_m2": epc.total_floor_area_m2,
|
||||
|
|
@ -450,6 +494,8 @@ class EpcMlTransform:
|
|||
**building_part_aggregates,
|
||||
# Features — heating system (primary slot + water + secondary)
|
||||
**heating_aggregates,
|
||||
# Features — PV (capacity source + array aggregates by SAP octant)
|
||||
**pv_aggregates,
|
||||
# Targets
|
||||
"sap_score": epc.energy_rating_current,
|
||||
"co2_emissions": epc.co2_emissions_current,
|
||||
|
|
@ -472,6 +518,57 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
|
|||
return apply_ucl_correction(float(epc.energy_consumption_current), band)
|
||||
|
||||
|
||||
def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]:
|
||||
"""Aggregate the PV side of sap_energy_source into 15 columns.
|
||||
|
||||
`pv_capacity_source` discriminates the three PV states:
|
||||
- 'measured': es.photovoltaic_arrays is non-empty — array aggregates populate
|
||||
- 'estimated_from_roof_area': only percent_roof_area > 0 is known
|
||||
- 'none': no PV (either no payload, or percent_roof_area == 0)
|
||||
"""
|
||||
octant_power: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()}
|
||||
aggregates: dict[str, Any] = {
|
||||
"has_pv": False,
|
||||
"pv_capacity_source": "none",
|
||||
"pv_array_count": 0,
|
||||
"pv_total_peak_power_kw": 0.0,
|
||||
**{f"pv_peak_power_kw_{name}": 0.0 for name in _OCTANT_NAMES.values()},
|
||||
"pv_avg_pitch": None,
|
||||
"pv_avg_overshading": None,
|
||||
"pv_percent_roof_area": None,
|
||||
}
|
||||
|
||||
arrays = es.photovoltaic_arrays
|
||||
if arrays:
|
||||
total_power = 0.0
|
||||
weighted_pitch = 0.0
|
||||
weighted_overshading = 0.0
|
||||
for a in arrays:
|
||||
total_power += a.peak_power
|
||||
weighted_pitch += a.pitch * a.peak_power
|
||||
weighted_overshading += a.overshading * a.peak_power
|
||||
if a.orientation in _OCTANT_NAMES:
|
||||
octant_power[_OCTANT_NAMES[a.orientation]] += a.peak_power
|
||||
aggregates["has_pv"] = True
|
||||
aggregates["pv_capacity_source"] = "measured"
|
||||
aggregates["pv_array_count"] = len(arrays)
|
||||
aggregates["pv_total_peak_power_kw"] = total_power
|
||||
for name, power in octant_power.items():
|
||||
aggregates[f"pv_peak_power_kw_{name}"] = power
|
||||
if total_power > 0:
|
||||
aggregates["pv_avg_pitch"] = weighted_pitch / total_power
|
||||
aggregates["pv_avg_overshading"] = weighted_overshading / total_power
|
||||
return aggregates
|
||||
|
||||
supply = es.photovoltaic_supply
|
||||
if supply is not None and supply.none_or_no_details.percent_roof_area > 0:
|
||||
aggregates["has_pv"] = True
|
||||
aggregates["pv_capacity_source"] = "estimated_from_roof_area"
|
||||
aggregates["pv_percent_roof_area"] = supply.none_or_no_details.percent_roof_area
|
||||
|
||||
return aggregates
|
||||
|
||||
|
||||
def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
|
||||
"""Aggregate sap_heating into 15 heating-feature columns.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue