"""Tests for EpcMlTransform v0.1.0 — schema-contract surface and target extraction.""" import pandas as pd import pytest from datatypes.epc.domain.epc_property_data import ( BuildingPartIdentifier, SapRoomInRoof, WindowTransmissionDetails, ) from domain.sap10_ml.schema import ColumnSpec, TransformSchema from domain.sap10_ml.tests._fixtures import ( make_building_part, make_floor_dimension, make_main_heating_detail, make_minimal_sap10_epc, make_pv_array, make_sap_heating, make_window, ) from domain.sap10_ml.transform import EpcMlTransform _EXPECTED_TARGET_DTYPES: dict[str, type] = { "sap_score": int, "co2_emissions": float, "peui_raw": int, "peui_ucl": float, "space_heating_kwh": float, "hot_water_kwh": float, } def test_transform_advertises_version_and_target_columns() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert assert isinstance(schema, TransformSchema) assert schema.transform_version == "2.7.1" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): column = schema.target_columns[target_name] assert isinstance(column, ColumnSpec) assert column.dtype is expected_dtype def test_to_row_extracts_targets_from_epc_property_data() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, co2_emissions_current=2.7, energy_consumption_current=232, space_heating_kwh=10128.81, water_heating_kwh=2166.19, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["sap_score"] == 82 assert row["co2_emissions"] == 2.7 assert row["peui_raw"] == 232 assert row["space_heating_kwh"] == 10128.81 assert row["hot_water_kwh"] == 2166.19 def test_to_row_applies_ucl_correction_in_band_e() -> None: # Arrange — SAP 45 = band E; Few et al. 2023 band-E correction is non-trivial epc = make_minimal_sap10_epc( energy_rating_current=45, energy_consumption_current=300, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # Band E: gradient=-0.70, intercept=160 → cd = -0.70*300 + 160 = -50 # adjusted = 300 + (-50) = 250.0 assert row["peui_ucl"] == 250.0 def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None: # Arrange — SAP 82 = band B; per-band linear correction yields a *positive* # consumption_difference for this PEUI, which must be clamped to zero # (EPCs over-predict only — we never adjust upwards). epc = make_minimal_sap10_epc( energy_rating_current=82, energy_consumption_current=232, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0 # adjusted = 232 + 0 = 232.0 assert row["peui_ucl"] == 232.0 def test_schema_advertises_total_floor_area_m2_feature() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert assert "total_floor_area_m2" in schema.feature_columns column = schema.feature_columns["total_floor_area_m2"] assert isinstance(column, ColumnSpec) assert column.dtype is float assert column.nullable is False def test_to_row_extracts_total_floor_area_m2() -> None: # Arrange epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # make_minimal_sap10_epc sets total_floor_area_m2=70.0 by default assert row["total_floor_area_m2"] == 70.0 _EXPECTED_COUNT_FEATURES: dict[str, type] = { "door_count": int, "habitable_rooms_count": int, "heated_rooms_count": int, "wet_rooms_count": int, "extensions_count": int, "open_chimneys_count": int, "insulated_door_count": int, "cfl_fixed_lighting_bulbs_count": int, "led_fixed_lighting_bulbs_count": int, "incandescent_fixed_lighting_bulbs_count": int, } def test_schema_advertises_count_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for feature_name, expected_dtype in _EXPECTED_COUNT_FEATURES.items(): assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert isinstance(column, ColumnSpec) assert column.dtype is expected_dtype assert column.nullable is False def test_to_row_extracts_count_features() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, door_count=3, habitable_rooms_count=5, heated_rooms_count=4, wet_rooms_count=1, extensions_count=1, open_chimneys_count=0, insulated_door_count=2, cfl_fixed_lighting_bulbs_count=0, led_fixed_lighting_bulbs_count=8, incandescent_fixed_lighting_bulbs_count=2, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["door_count"] == 3 assert row["habitable_rooms_count"] == 5 assert row["heated_rooms_count"] == 4 assert row["wet_rooms_count"] == 1 assert row["extensions_count"] == 1 assert row["open_chimneys_count"] == 0 assert row["insulated_door_count"] == 2 assert row["cfl_fixed_lighting_bulbs_count"] == 0 assert row["led_fixed_lighting_bulbs_count"] == 8 assert row["incandescent_fixed_lighting_bulbs_count"] == 2 _EXPECTED_FLAT_BOOLEAN_FEATURES: tuple[str, ...] = ( "solar_water_heating", "has_hot_water_cylinder", "has_fixed_air_conditioning", ) _EXPECTED_OPTIONAL_INT_FEATURES: tuple[str, ...] = ( "percent_draughtproofed", ) def test_schema_advertises_boolean_and_optional_int_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for feature_name in _EXPECTED_FLAT_BOOLEAN_FEATURES: assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert column.dtype is bool assert column.nullable is False for feature_name in _EXPECTED_OPTIONAL_INT_FEATURES: assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert column.dtype is int assert column.nullable is True def test_to_row_extracts_boolean_and_optional_int_features() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, solar_water_heating=True, has_hot_water_cylinder=True, has_fixed_air_conditioning=False, percent_draughtproofed=100, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["solar_water_heating"] is True assert row["has_hot_water_cylinder"] is True assert row["has_fixed_air_conditioning"] is False assert row["percent_draughtproofed"] == 100 _NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( "property_type", "built_form", "region_code", "country_code", ) _NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( "dwelling_type", "transaction_type", ) def test_schema_advertises_categorical_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for feature_name in _NULLABLE_CATEGORICAL_FEATURES: assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert column.dtype is str assert column.categorical is True assert column.nullable is True for feature_name in _NON_NULLABLE_CATEGORICAL_FEATURES: assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert column.dtype is str assert column.categorical is True assert column.nullable is False def test_to_row_extracts_categorical_features() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, dwelling_type="End-terrace house", transaction_type="8", property_type="0", built_form="2", region_code="6", country_code="ENG", ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["dwelling_type"] == "End-terrace house" assert "tenure" not in row assert row["transaction_type"] == "8" assert row["property_type"] == "0" assert row["built_form"] == "2" assert row["region_code"] == "6" assert row["country_code"] == "ENG" _WINDOW_PHYSICS_FEATURES_NULLABLE: dict[str, tuple[type, bool]] = { "window_count": (int, False), "window_total_area_m2": (float, False), "window_area_orientation_N": (float, False), "window_area_orientation_NE": (float, False), "window_area_orientation_E": (float, False), "window_area_orientation_SE": (float, False), "window_area_orientation_S": (float, False), "window_area_orientation_SW": (float, False), "window_area_orientation_W": (float, False), "window_area_orientation_NW": (float, False), "window_pct_draught_proofed": (float, True), "window_avg_u_value": (float, True), "window_avg_solar_transmittance": (float, True), } def test_schema_advertises_window_physics_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for feature_name, (expected_dtype, expected_nullable) in _WINDOW_PHYSICS_FEATURES_NULLABLE.items(): assert feature_name in schema.feature_columns, feature_name column = schema.feature_columns[feature_name] assert column.dtype is expected_dtype assert column.nullable is expected_nullable assert column.categorical is False def test_to_row_aggregates_window_physics_and_orientation() -> None: # Arrange — 3 windows: 2.0 m² S, 1.5 m² N, 1.0 m² E (orientations 5/1/3) sap_windows = [ make_window(orientation=5, width=1.0, height=2.0, draught_proofed=True), make_window(orientation=1, width=1.0, height=1.5, draught_proofed=False), make_window(orientation=3, width=1.0, height=1.0, draught_proofed=True), ] epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["window_count"] == 3 assert row["window_total_area_m2"] == pytest.approx(4.5) assert row["window_area_orientation_N"] == pytest.approx(1.5) assert row["window_area_orientation_NE"] == 0.0 assert row["window_area_orientation_E"] == pytest.approx(1.0) assert row["window_area_orientation_SE"] == 0.0 assert row["window_area_orientation_S"] == pytest.approx(2.0) assert row["window_area_orientation_SW"] == 0.0 assert row["window_area_orientation_W"] == 0.0 assert row["window_area_orientation_NW"] == 0.0 # area-weighted draught-proofing: (2.0 + 1.0) / 4.5 * 100 = 66.66...% assert row["window_pct_draught_proofed"] == pytest.approx(66.666, abs=0.01) assert row["window_avg_u_value"] is None assert row["window_avg_solar_transmittance"] is None def test_to_row_skips_windows_with_unrecorded_orientation() -> None: # Arrange — two S windows + one with orientation=0 (horizontal/unrecorded); # the unrecorded one contributes to count and total_area but to no octant. sap_windows = [ make_window(orientation=5, width=1.0, height=2.0), make_window(orientation=5, width=1.0, height=1.0), make_window(orientation=0, width=1.0, height=0.5), ] epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["window_count"] == 3 assert row["window_total_area_m2"] == pytest.approx(3.5) assert row["window_area_orientation_S"] == pytest.approx(3.0) # The horizontal window's 0.5 m² is not assigned to any octant sum_octants = sum( row[f"window_area_orientation_{c}"] for c in ("N", "NE", "E", "SE", "S", "SW", "W", "NW") ) assert sum_octants == pytest.approx(3.0) def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None: # Arrange epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["window_count"] == 0 assert row["window_total_area_m2"] == 0.0 for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"): assert row[f"window_area_orientation_{cardinal}"] == 0.0 assert row["window_pct_draught_proofed"] is None assert row["window_avg_u_value"] is None assert row["window_avg_solar_transmittance"] is None _GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) def test_schema_advertises_window_categorical_share_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert — one float share per known glazed_type code + `_other`, plus pvc_frame share for code in _GLAZED_TYPE_CODES: name = f"window_pct_glazed_type_{code}" assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is float assert column.nullable is False assert column.categorical is False assert "window_pct_glazed_type_other" in schema.feature_columns assert "window_pct_pvc_frame" in schema.feature_columns assert schema.feature_columns["window_pct_pvc_frame"].dtype is float assert schema.feature_columns["window_pct_pvc_frame"].nullable is True def test_to_row_aggregates_glazed_type_and_pvc_frame_shares() -> None: # Arrange — three windows: 3.0 m² glazed_type=2 PVC, 1.5 m² glazed_type=13 PVC, # 0.5 m² glazed_type=5 (single, no PVC). Total area = 5.0 m². sap_windows = [ make_window(width=1.5, height=2.0, glazing_type=2, frame_material="PVC"), make_window(width=1.0, height=1.5, glazing_type=13, frame_material="PVC"), make_window(width=0.5, height=1.0, glazing_type=5, frame_material=None), ] epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # Shares (area-weighted) — 3.0/5.0=0.6 type 2; 1.5/5.0=0.3 type 13; 0.5/5.0=0.1 type 5. assert row["window_pct_glazed_type_2"] == pytest.approx(0.6) assert row["window_pct_glazed_type_13"] == pytest.approx(0.3) assert row["window_pct_glazed_type_5"] == pytest.approx(0.1) # All other known glazed_type codes are zero. for code in _GLAZED_TYPE_CODES: if code not in (2, 5, 13): assert row[f"window_pct_glazed_type_{code}"] == 0.0 assert row["window_pct_glazed_type_other"] == 0.0 # PVC frame area share: (3.0 + 1.5) / 5.0 = 0.9 assert row["window_pct_pvc_frame"] == pytest.approx(0.9) def test_to_row_routes_unknown_glazed_type_to_other_bucket() -> None: # Arrange — one window has glazing_type=99 (not in the SAP10 enum 1-15) sap_windows = [ make_window(width=2.0, height=1.0, glazing_type=2, frame_material="PVC"), make_window(width=1.0, height=1.0, glazing_type=99, frame_material="PVC"), ] epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # Total area = 3.0; known type 2 = 2.0/3.0; unknown 99 → _other = 1.0/3.0 assert row["window_pct_glazed_type_2"] == pytest.approx(2 / 3) assert row["window_pct_glazed_type_other"] == pytest.approx(1 / 3) def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> None: # Arrange epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert for code in _GLAZED_TYPE_CODES: assert row[f"window_pct_glazed_type_{code}"] == 0.0 assert row["window_pct_glazed_type_other"] == 0.0 assert row["window_pct_pvc_frame"] is None _BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { # name → (dtype, nullable, categorical) "building_parts_count": (int, False, False), "total_heat_loss_perimeter_m": (float, False, False), "total_party_wall_length_m": (float, False, False), "total_floor_area_from_parts_m2": (float, False, False), "avg_room_height_m": (float, True, False), "main_dwelling_heat_loss_perimeter_m": (float, True, False), "main_dwelling_party_wall_length_m": (float, True, False), "main_dwelling_total_floor_area_m2": (float, True, False), "main_dwelling_avg_room_height_m": (float, True, False), "main_dwelling_has_room_in_roof": (bool, True, False), "main_dwelling_construction_age_band": (str, True, True), "main_dwelling_wall_construction": (int, True, True), "main_dwelling_roof_construction": (int, True, True), } def test_schema_advertises_building_part_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for name, (expected_dtype, expected_nullable, expected_categorical) in ( _BUILDING_PART_FEATURES_NULLABLE.items() ): assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is expected_dtype, name assert column.nullable is expected_nullable, name assert column.categorical is expected_categorical, name def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None: # Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension. main = make_building_part( identifier=BuildingPartIdentifier.MAIN, construction_age_band="B", wall_construction=3, roof_construction=4, floor_dimensions=[ make_floor_dimension( total_floor_area_m2=30.0, room_height_m=2.5, party_wall_length_m=6.0, heat_loss_perimeter_m=20.0, ), make_floor_dimension( total_floor_area_m2=28.0, room_height_m=2.4, party_wall_length_m=6.0, heat_loss_perimeter_m=18.0, ), ], ) extension = make_building_part( identifier=BuildingPartIdentifier.EXTENSION_1, construction_age_band="L", wall_construction=4, roof_construction=5, floor_dimensions=[ make_floor_dimension( total_floor_area_m2=12.0, room_height_m=2.6, party_wall_length_m=0.0, heat_loss_perimeter_m=10.0, ), ], ) epc = make_minimal_sap10_epc( energy_rating_current=82, sap_building_parts=[main, extension], ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert — cross-all aggregates assert row["building_parts_count"] == 2 assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0) assert row["total_party_wall_length_m"] == pytest.approx(12.0) assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0) # avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70 # = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771... assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001) # Main Dwelling aggregates assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0) assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0) assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0) # main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517 assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001) assert row["main_dwelling_has_room_in_roof"] is False # Main Dwelling categoricals assert row["main_dwelling_construction_age_band"] == "B" assert row["main_dwelling_wall_construction"] == 3 assert row["main_dwelling_roof_construction"] == 4 def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None: # Arrange main = make_building_part( identifier=BuildingPartIdentifier.MAIN, sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"), ) epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main]) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["main_dwelling_has_room_in_roof"] is True def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None: # Arrange — single part with identifier that doesn't match "Main Dwelling" sole_part = make_building_part(identifier=BuildingPartIdentifier.EXTENSION_1) epc = make_minimal_sap10_epc( energy_rating_current=82, sap_building_parts=[sole_part] ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert — cross-all aggregates still populate assert row["building_parts_count"] == 1 assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0) # Main-dwelling-specific columns are None — honest about data quality assert row["main_dwelling_heat_loss_perimeter_m"] is None assert row["main_dwelling_party_wall_length_m"] is None assert row["main_dwelling_total_floor_area_m2"] is None assert row["main_dwelling_avg_room_height_m"] is None assert row["main_dwelling_has_room_in_roof"] is None assert row["main_dwelling_construction_age_band"] is None assert row["main_dwelling_wall_construction"] is None assert row["main_dwelling_roof_construction"] is None def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None: # Arrange epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["building_parts_count"] == 0 assert row["total_heat_loss_perimeter_m"] == 0.0 assert row["total_party_wall_length_m"] == 0.0 assert row["total_floor_area_from_parts_m2"] == 0.0 assert row["avg_room_height_m"] is None assert row["main_dwelling_heat_loss_perimeter_m"] is None assert row["main_dwelling_construction_age_band"] is None assert row["main_dwelling_wall_construction"] is None _HEATING_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { # name → (dtype, nullable, categorical) "main_heating_count": (int, False, False), "primary_main_fuel_type": (int, True, True), "primary_heat_emitter_type": (int, True, True), "primary_main_heating_control": (int, True, True), "primary_main_heating_category": (int, True, True), "primary_has_fghrs": (bool, True, False), "primary_fan_flue_present": (bool, True, False), "primary_boiler_flue_type": (int, True, True), "primary_central_heating_pump_age": (int, True, True), "water_heating_code": (int, True, True), "water_heating_fuel": (int, True, True), "cylinder_size": (int, True, False), "cylinder_insulation_thickness_mm": (int, True, False), "has_secondary_heating": (bool, False, False), "secondary_fuel_type": (int, True, True), } def test_schema_advertises_heating_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for name, (expected_dtype, expected_nullable, expected_categorical) in ( _HEATING_FEATURES_NULLABLE.items() ): assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is expected_dtype, name assert column.nullable is expected_nullable, name assert column.categorical is expected_categorical, name def test_to_row_extracts_primary_heating_from_first_main_heating_detail() -> None: # Arrange — mains-gas boiler with a fan flue, modern control, no FGHRS primary = make_main_heating_detail( main_fuel_type=26, # mains gas (not community) heat_emitter_type=1, main_heating_control=2106, main_heating_category=2, has_fghrs=False, fan_flue_present=True, boiler_flue_type=2, central_heating_pump_age=0, ) epc = make_minimal_sap10_epc( energy_rating_current=82, sap_heating=make_sap_heating(main_heating_details=[primary]), ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["main_heating_count"] == 1 assert row["primary_main_fuel_type"] == 26 assert row["primary_heat_emitter_type"] == 1 assert row["primary_main_heating_control"] == 2106 assert row["primary_main_heating_category"] == 2 assert row["primary_has_fghrs"] is False assert row["primary_fan_flue_present"] is True assert row["primary_boiler_flue_type"] == 2 assert row["primary_central_heating_pump_age"] == 0 def test_to_row_extracts_water_heating_fields() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, sap_heating=make_sap_heating( water_heating_code=901, water_heating_fuel=26, cylinder_size=2, cylinder_insulation_thickness_mm=38, ), ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["water_heating_code"] == 901 assert row["water_heating_fuel"] == 26 assert row["cylinder_size"] == 2 assert row["cylinder_insulation_thickness_mm"] == 38 def test_to_row_flags_secondary_heating_when_present() -> None: # Arrange — secondary heating: bottled-LPG (code 38) epc = make_minimal_sap10_epc( energy_rating_current=82, sap_heating=make_sap_heating(secondary_fuel_type=38), ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_secondary_heating"] is True assert row["secondary_fuel_type"] == 38 def test_to_row_returns_no_secondary_heating_when_absent() -> None: # Arrange epc = make_minimal_sap10_epc( energy_rating_current=82, sap_heating=make_sap_heating(secondary_fuel_type=None), ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_secondary_heating"] is False assert row["secondary_fuel_type"] is None def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> None: # Arrange — sap_heating present but main_heating_details is empty epc = make_minimal_sap10_epc( energy_rating_current=82, sap_heating=make_sap_heating(main_heating_details=[]), ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["main_heating_count"] == 0 assert row["primary_main_fuel_type"] is None assert row["primary_heat_emitter_type"] is None assert row["primary_main_heating_control"] is None assert row["primary_main_heating_category"] is None assert row["primary_has_fghrs"] is None assert row["primary_fan_flue_present"] is None assert row["primary_boiler_flue_type"] is None assert row["primary_central_heating_pump_age"] is None _PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { # name → (dtype, nullable, categorical) "has_pv": (bool, False, False), "pv_capacity_source": (str, False, True), "pv_array_count": (int, False, False), "pv_total_peak_power_kw": (float, False, False), "pv_peak_power_kw_N": (float, False, False), "pv_peak_power_kw_NE": (float, False, False), "pv_peak_power_kw_E": (float, False, False), "pv_peak_power_kw_SE": (float, False, False), "pv_peak_power_kw_S": (float, False, False), "pv_peak_power_kw_SW": (float, False, False), "pv_peak_power_kw_W": (float, False, False), "pv_peak_power_kw_NW": (float, False, False), "pv_avg_pitch": (float, True, False), "pv_avg_overshading": (float, True, False), "pv_percent_roof_area": (int, True, False), } def test_schema_advertises_pv_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for name, (expected_dtype, expected_nullable, expected_categorical) in ( _PV_FEATURES_NULLABLE.items() ): assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is expected_dtype, name assert column.nullable is expected_nullable, name assert column.categorical is expected_categorical, name def test_to_row_aggregates_measured_pv_arrays() -> None: # Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one # with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW). arrays = [ make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1), make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2), make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1), ] epc = make_minimal_sap10_epc( energy_rating_current=82, photovoltaic_arrays=arrays ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv"] is True assert row["pv_capacity_source"] == "measured" assert row["pv_array_count"] == 3 assert row["pv_total_peak_power_kw"] == pytest.approx(4.9) # Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0 assert row["pv_peak_power_kw_S"] == pytest.approx(3.9) assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0) for other in ("N", "NE", "E", "SE", "SW", "W"): assert row[f"pv_peak_power_kw_{other}"] == 0.0 # Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380 assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9) # Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379 assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9) # No percent_roof_area when measured assert row["pv_percent_roof_area"] is None def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None: # Arrange — surveyor couldn't confirm config; only percent_roof_area is known epc = make_minimal_sap10_epc( energy_rating_current=82, photovoltaic_supply_percent_roof_area=25 ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv"] is True assert row["pv_capacity_source"] == "estimated_from_roof_area" assert row["pv_array_count"] == 0 assert row["pv_total_peak_power_kw"] == 0.0 assert row["pv_percent_roof_area"] == 25 assert row["pv_avg_pitch"] is None assert row["pv_avg_overshading"] is None def test_to_row_returns_pv_no_when_no_pv_data() -> None: # Arrange — no measured arrays, no percent_roof_area, no PV at all epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv"] is False assert row["pv_capacity_source"] == "none" assert row["pv_array_count"] == 0 assert row["pv_total_peak_power_kw"] == 0.0 for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"): assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0 assert row["pv_percent_roof_area"] is None assert row["pv_avg_pitch"] is None assert row["pv_avg_overshading"] is None def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None: # Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is # the canonical "no PV" payload on schema-21 EPCs. epc = make_minimal_sap10_epc( energy_rating_current=82, photovoltaic_supply_percent_roof_area=0 ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv"] is False assert row["pv_capacity_source"] == "none" assert row["pv_percent_roof_area"] is None _ENERGY_SOURCE_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { # name → (dtype, nullable, categorical) "has_pv_battery": (bool, False, False), "pv_battery_count": (int, False, False), "pv_battery_capacity_kwh": (float, True, False), "has_wind_turbine": (bool, False, False), "wind_turbine_count": (int, False, False), "mains_gas": (bool, False, False), "electricity_smart_meter_present": (bool, False, False), "gas_smart_meter_present": (bool, False, False), "is_dwelling_export_capable": (bool, False, False), } def test_schema_advertises_energy_source_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for name, (expected_dtype, expected_nullable, expected_categorical) in ( _ENERGY_SOURCE_FEATURES_NULLABLE.items() ): assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is expected_dtype, name assert column.nullable is expected_nullable, name assert column.categorical is expected_categorical, name def test_to_row_extracts_pv_battery_and_capacity() -> None: # Arrange — two batteries of 5.0 kWh each epc = make_minimal_sap10_epc( energy_rating_current=82, pv_battery_count=2, pv_battery_capacity_per_unit_kwh=5.0, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv_battery"] is True assert row["pv_battery_count"] == 2 assert row["pv_battery_capacity_kwh"] == pytest.approx(10.0) def test_to_row_returns_no_pv_battery_when_count_zero() -> None: # Arrange — no battery epc = make_minimal_sap10_epc(energy_rating_current=82) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_pv_battery"] is False assert row["pv_battery_count"] == 0 assert row["pv_battery_capacity_kwh"] is None def test_to_row_flags_wind_turbine() -> None: # Arrange epc = make_minimal_sap10_epc(energy_rating_current=82, wind_turbines_count=1) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["has_wind_turbine"] is True assert row["wind_turbine_count"] == 1 def test_to_row_extracts_energy_source_booleans() -> None: # Arrange — gas + electricity smart meters, export capable epc = make_minimal_sap10_epc( energy_rating_current=82, mains_gas=True, electricity_smart_meter_present=True, gas_smart_meter_present=True, is_dwelling_export_capable=True, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["mains_gas"] is True assert row["electricity_smart_meter_present"] is True assert row["gas_smart_meter_present"] is True assert row["is_dwelling_export_capable"] is True _VENTILATION_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { "mechanical_ventilation": (int, True, True), "mechanical_vent_duct_type": (int, True, True), "blocked_chimneys_count": (int, True, False), "pressure_test": (int, True, False), } def test_schema_advertises_ventilation_features() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert for name, (expected_dtype, expected_nullable, expected_categorical) in ( _VENTILATION_FEATURES_NULLABLE.items() ): assert name in schema.feature_columns, name column = schema.feature_columns[name] assert column.dtype is expected_dtype, name assert column.nullable is expected_nullable, name assert column.categorical is expected_categorical, name def test_to_row_extracts_ventilation_features() -> None: # Arrange — MVHR (mechanical_ventilation code 4), duct type 3 epc = make_minimal_sap10_epc( energy_rating_current=82, mechanical_ventilation=4, mechanical_vent_duct_type=3, blocked_chimneys_count=1, pressure_test=4, ) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert assert row["mechanical_ventilation"] == 4 assert row["mechanical_vent_duct_type"] == 3 assert row["blocked_chimneys_count"] == 1 assert row["pressure_test"] == 4 def test_to_rows_returns_dataframe_with_one_row_per_property() -> None: # Arrange — two properties with different floor areas + SAP scores epcs = [ make_minimal_sap10_epc(energy_rating_current=82, total_floor_area_m2=70.0), make_minimal_sap10_epc(energy_rating_current=45, total_floor_area_m2=120.0), ] transform = EpcMlTransform() # Act df = transform.to_rows(epcs) # Assert assert isinstance(df, pd.DataFrame) assert len(df) == 2 assert df.loc[0, "sap_score"] == 82 assert df.loc[1, "sap_score"] == 45 assert df.loc[0, "total_floor_area_m2"] == 70.0 assert df.loc[1, "total_floor_area_m2"] == 120.0 def test_to_rows_returns_empty_dataframe_for_empty_input() -> None: # Arrange transform = EpcMlTransform() # Act df = transform.to_rows([]) # Assert assert isinstance(df, pd.DataFrame) assert len(df) == 0 # Every advertised column appears as an output column even for empty input. schema = transform.schema() for name in schema.feature_columns: assert name in df.columns for name in schema.target_columns: assert name in df.columns def test_to_rows_casts_categorical_columns_to_pd_categorical_dtype() -> None: # Arrange — minimal property with a categorical feature populated epcs = [ make_minimal_sap10_epc( energy_rating_current=82, dwelling_type="Mid-terrace house" ), make_minimal_sap10_epc( energy_rating_current=45, dwelling_type="Detached house" ), ] transform = EpcMlTransform() # Act df = transform.to_rows(epcs) # Assert — every column flagged ColumnSpec.categorical=True is a pd.Categorical schema = transform.schema() for name, spec in schema.feature_columns.items(): if spec.categorical: assert isinstance(df[name].dtype, pd.CategoricalDtype), name def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None: # Arrange — two windows with transmission details; one without. sap_windows = [ make_window( orientation=5, width=2.0, height=1.0, window_transmission_details=WindowTransmissionDetails( u_value=1.4, data_source=2, solar_transmittance=0.72 ), ), make_window( orientation=1, width=1.0, height=1.0, window_transmission_details=WindowTransmissionDetails( u_value=2.0, data_source=2, solar_transmittance=0.60 ), ), make_window(orientation=3, width=1.0, height=1.0), # no details ] epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows) transform = EpcMlTransform() # Act row = transform.to_row(epc) # Assert # Area-weighted u: (1.4 * 2.0 + 2.0 * 1.0) / (2.0 + 1.0) = 4.8 / 3.0 = 1.6 assert row["window_avg_u_value"] == pytest.approx(1.6) # Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68 assert row["window_avg_solar_transmittance"] == pytest.approx(0.68) def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None: # Arrange from datatypes.epc.domain.epc_property_data import SapBuildingPart, SapFloorDimension ground = SapFloorDimension( room_height_m=2.4, total_floor_area_m2=50.0, party_wall_length_m=5.0, heat_loss_perimeter_m=20.0, floor=0, floor_insulation=2, floor_construction=1, ) upstairs = SapFloorDimension( room_height_m=2.4, total_floor_area_m2=50.0, party_wall_length_m=5.0, heat_loss_perimeter_m=20.0, floor=1, floor_insulation=0, floor_construction=0, ) main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, construction_age_band="C", wall_construction=3, wall_insulation_type=4, wall_thickness_measured=True, party_wall_construction=2, sap_floor_dimensions=[ground, upstairs], wall_dry_lined=False, wall_thickness_mm=300, wall_insulation_thickness="50mm", floor_heat_loss=7, floor_insulation_thickness="100mm", roof_construction=5, roof_insulation_location=6, roof_insulation_thickness="270mm", ) epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main]) # Act row = EpcMlTransform().to_row(epc) # Assert — wall fabric assert row["main_dwelling_wall_insulation_type"] == 4 assert row["main_dwelling_wall_insulation_thickness_mm"] == 50 assert row["main_dwelling_wall_dry_lined"] is False assert row["main_dwelling_wall_thickness_mm"] == 300 assert row["main_dwelling_party_wall_construction"] == 2 # Assert — roof fabric assert row["main_dwelling_roof_insulation_location"] == 6 assert row["main_dwelling_roof_insulation_thickness_mm"] == 270 # Assert — floor fabric, taken from ground-floor SapFloorDimension assert row["main_dwelling_floor_construction"] == 1 assert row["main_dwelling_floor_insulation"] == 2 assert row["main_dwelling_floor_insulation_thickness_mm"] == 100 assert row["main_dwelling_floor_heat_loss"] == 7 def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None: # Arrange from datatypes.epc.domain.epc_property_data import SapBuildingPart main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, construction_age_band="C", wall_construction=3, wall_insulation_type=4, wall_thickness_measured=True, party_wall_construction=2, sap_floor_dimensions=[], wall_insulation_thickness="NI", roof_insulation_thickness="ND", # unparseable sentinel ) epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main]) # Act row = EpcMlTransform().to_row(epc) # Assert assert row["main_dwelling_wall_insulation_thickness_mm"] == 0 assert row["main_dwelling_roof_insulation_thickness_mm"] is None def test_schema_advertises_envelope_heat_loss_feature() -> None: # Arrange transform = EpcMlTransform() # Act schema = transform.schema() # Assert assert "envelope_heat_loss_w_per_k" in schema.feature_columns column = schema.feature_columns["envelope_heat_loss_w_per_k"] assert column.dtype is float assert column.nullable is False def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None: # Arrange from domain.sap10_ml.tests._fixtures import make_building_part, make_floor_dimension main = make_building_part( identifier=BuildingPartIdentifier.MAIN, construction_age_band="G", wall_construction=4, wall_insulation_type=4, party_wall_construction=1, roof_construction=4, floor_dimensions=[ make_floor_dimension( total_floor_area_m2=100.0, room_height_m=2.5, party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, ) ], ) epc = make_minimal_sap10_epc( energy_rating_current=70, sap_building_parts=[main], total_floor_area_m2=100.0, country_code="ENG", ) # Act row = EpcMlTransform().to_row(epc) # Assert — envelope_heat_loss in plausible range for a 100 m^2 age-G semi. assert row["envelope_heat_loss_w_per_k"] > 100.0 assert row["envelope_heat_loss_w_per_k"] < 400.0 def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None: # Arrange from dataclasses import replace base = make_minimal_sap10_epc(energy_rating_current=72) epc = replace( base, multiple_glazed_proportion=85, extract_fans_count=2, sap_heating=replace(base.sap_heating, number_baths=2, number_baths_wwhrs=1), ) # Act row = EpcMlTransform().to_row(epc) # Assert assert row["multiple_glazed_proportion"] == 85 assert row["extract_fans_count"] == 2 assert row["number_baths"] == 2 assert row["number_baths_wwhrs"] == 1