mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 13: to_rows(properties) returns pd.DataFrame
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
ebceb4bf2b
commit
3abcee6a53
3 changed files with 90 additions and 2 deletions
|
|
@ -3,7 +3,10 @@ name = "domna-domain"
|
|||
version = "0.1.0"
|
||||
description = "Shared domain types for the Ara modelling pipeline and sibling Domna services."
|
||||
requires-python = ">=3.11"
|
||||
dependencies = []
|
||||
dependencies = [
|
||||
"pandas>=2.0",
|
||||
"pandas-stubs",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Tests for EpcMlTransform v0.1.0 — schema-contract surface and target extraction."""
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import SapRoomInRoof, WindowTransmissionDetails
|
||||
|
|
@ -1087,6 +1088,66 @@ def test_to_row_extracts_ventilation_features() -> None:
|
|||
assert row["pressure_test"] == 4
|
||||
|
||||
|
||||
def test_to_rows_returns_dataframe_with_one_row_per_property() -> None:
|
||||
# Arrange — two properties with different floor areas + SAP scores
|
||||
epcs = [
|
||||
make_minimal_sap10_epc(energy_rating_current=82, total_floor_area_m2=70.0),
|
||||
make_minimal_sap10_epc(energy_rating_current=45, total_floor_area_m2=120.0),
|
||||
]
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
df = transform.to_rows(epcs)
|
||||
|
||||
# Assert
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df) == 2
|
||||
assert df.loc[0, "sap_score"] == 82
|
||||
assert df.loc[1, "sap_score"] == 45
|
||||
assert df.loc[0, "total_floor_area_m2"] == 70.0
|
||||
assert df.loc[1, "total_floor_area_m2"] == 120.0
|
||||
|
||||
|
||||
def test_to_rows_returns_empty_dataframe_for_empty_input() -> None:
|
||||
# Arrange
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
df = transform.to_rows([])
|
||||
|
||||
# Assert
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df) == 0
|
||||
# Every advertised column appears as an output column even for empty input.
|
||||
schema = transform.schema()
|
||||
for name in schema.feature_columns:
|
||||
assert name in df.columns
|
||||
for name in schema.target_columns:
|
||||
assert name in df.columns
|
||||
|
||||
|
||||
def test_to_rows_casts_categorical_columns_to_pd_categorical_dtype() -> None:
|
||||
# Arrange — minimal property with a categorical feature populated
|
||||
epcs = [
|
||||
make_minimal_sap10_epc(
|
||||
energy_rating_current=82, dwelling_type="Mid-terrace house"
|
||||
),
|
||||
make_minimal_sap10_epc(
|
||||
energy_rating_current=45, dwelling_type="Detached house"
|
||||
),
|
||||
]
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
df = transform.to_rows(epcs)
|
||||
|
||||
# Assert — every column flagged ColumnSpec.categorical=True is a pd.Categorical
|
||||
schema = transform.schema()
|
||||
for name, spec in schema.feature_columns.items():
|
||||
if spec.categorical:
|
||||
assert isinstance(df[name].dtype, pd.CategoricalDtype), name
|
||||
|
||||
|
||||
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
|
||||
# Arrange — two windows with transmission details; one without.
|
||||
sap_windows = [
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ are added in subsequent slices.
|
|||
See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale.
|
||||
"""
|
||||
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
|
|
@ -502,6 +504,28 @@ class EpcMlTransform:
|
|||
target_columns=dict(_TARGET_COLUMNS),
|
||||
)
|
||||
|
||||
def to_rows(self, properties: Iterable[EpcPropertyData]) -> pd.DataFrame:
|
||||
"""Apply `to_row` across many properties and return a typed DataFrame.
|
||||
|
||||
Columns flagged `categorical=True` in the schema are cast to
|
||||
`pd.Categorical`; everything else is left at pandas-inferred dtype.
|
||||
The DataFrame always carries every advertised column, even when the
|
||||
input is empty.
|
||||
"""
|
||||
schema = self.schema()
|
||||
all_columns = list(schema.feature_columns.keys()) + list(
|
||||
schema.target_columns.keys()
|
||||
)
|
||||
rows = [self.to_row(epc) for epc in properties]
|
||||
df = pd.DataFrame(rows, columns=all_columns)
|
||||
for name, spec in schema.feature_columns.items():
|
||||
if spec.categorical:
|
||||
df[name] = df[name].astype("category")
|
||||
for name, spec in schema.target_columns.items():
|
||||
if spec.categorical:
|
||||
df[name] = df[name].astype("category")
|
||||
return df
|
||||
|
||||
def to_row(self, epc: EpcPropertyData) -> dict[str, Any]:
|
||||
"""Map an EpcPropertyData to a single row of features + targets.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue