tooling: per-end-use PEUI decomposition in parity probe

Adds primary-energy breakdown (space heating, hot water, lighting,
pumps, PV) per cert plus stratified bias reports by main_heating_
category, construction_age_band, and dwelling_type. Used to localise
the +51 kWh/m² PEUI bias to envelope-side over-prediction on pre-1996
fabric, which the bare SAP-residual ranking didn't surface.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-18 20:14:39 +00:00
parent 743f77d54c
commit 1c0cb9ac07

View file

@ -75,6 +75,20 @@ def main(argv: list[str] | None = None) -> None:
inputs = cert_to_inputs(epc, prices=prices)
result = calculate_sap_from_inputs(inputs)
cert_primary = epc.energy_consumption_current
tfa = epc.total_floor_area_m2 or 1.0
pf_space = inputs.space_heating_primary_factor
pf_hw = inputs.hot_water_primary_factor
pf_other = inputs.other_primary_factor
main_detail = (
epc.sap_heating.main_heating_details[0]
if epc.sap_heating and epc.sap_heating.main_heating_details
else None
)
age_band = (
epc.sap_building_parts[0].construction_age_band
if epc.sap_building_parts
else None
)
results.append({
"cert": cn,
"actual": actual,
@ -91,6 +105,23 @@ def main(argv: list[str] | None = None) -> None:
if cert_primary is not None
else None
),
# End-use primary-energy split, kWh/m² of TFA (calculator side).
"space_pe_m2": round(
(result.main_heating_fuel_kwh_per_yr + result.secondary_heating_fuel_kwh_per_yr)
* pf_space / tfa, 1
),
"hw_pe_m2": round(result.hot_water_kwh_per_yr * pf_hw / tfa, 1),
"lighting_pe_m2": round(result.lighting_kwh_per_yr * pf_other / tfa, 1),
"pumps_pe_m2": round(result.pumps_fans_kwh_per_yr * pf_other / tfa, 1),
"pv_pe_m2": round(
-inputs.pv_generation_kwh_per_yr * pf_other / tfa, 1
),
# Strata for residual decomposition.
"main_cat": main_detail.main_heating_category if main_detail else None,
"main_fuel": main_detail.main_fuel_type if main_detail else None,
"age": age_band,
"water_code": epc.sap_heating.water_heating_code if epc.sap_heating else None,
"has_cyl": epc.sap_heating.cylinder_size is not None if epc.sap_heating else False,
})
except Exception as e: # noqa: BLE001 — exploratory probe
errors.append({"cert": cn, "actual": actual, "error": f"{type(e).__name__}: {e}"})
@ -108,10 +139,77 @@ def main(argv: list[str] | None = None) -> None:
print(f"within ±{thr}: {pct:.1f}%")
print("\nresidual distribution:")
print(df["residual"].describe(percentiles=[0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95]))
print("\nworst 15 by |residual|:")
print("\nworst 15 by |residual| (SAP):")
print(df.nlargest(15, "abs_resid")[
["cert", "actual", "predicted", "residual", "ecf", "tfa", "ext", "dwelling"]
].to_string(index=False))
# ------------------------------------------------------------------
# Primary-energy decomposition. The cert reports a single PEUI
# (`energy_consumption_current`, kWh/m² TFA), so we can't see the
# cert's per-end-use mix — but we can see ours, and stratify the
# residual by cert attributes to localise which population drives
# the bias.
# ------------------------------------------------------------------
pe_df = df.dropna(subset=["primary_resid"]).copy()
if not pe_df.empty:
pe_df["abs_pe_resid"] = pe_df["primary_resid"].abs()
print(f"\n=== Primary energy (kWh/m² TFA) ===")
print(f"PE MAE: {pe_df['abs_pe_resid'].mean():.2f}")
print(f"PE bias: {pe_df['primary_resid'].mean():.2f}")
print(f"PE RMSE: {((pe_df['primary_resid'] ** 2).mean()) ** 0.5:.2f}")
print("PE residual distribution:")
print(pe_df["primary_resid"].describe(
percentiles=[0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
))
print("\nCorpus mean OUR primary energy by end-use (kWh/m² TFA):")
mix_cols = ["space_pe_m2", "hw_pe_m2", "lighting_pe_m2", "pumps_pe_m2", "pv_pe_m2"]
for col in mix_cols:
print(f" {col:14s} {pe_df[col].mean():6.1f}")
print(f" {'TOTAL (ours)':14s} {pe_df['our_primary_kwh_m2'].mean():6.1f}")
print(f" {'TOTAL (cert)':14s} {pe_df['cert_primary_kwh_m2'].mean():6.1f}")
# Stratified bias by main heating category.
print("\nPE residual stratified by main_heating_category:")
by_cat = pe_df.groupby("main_cat", dropna=False).agg(
n=("cert", "count"),
pe_mae=("abs_pe_resid", "mean"),
pe_bias=("primary_resid", "mean"),
our_pe=("our_primary_kwh_m2", "mean"),
cert_pe=("cert_primary_kwh_m2", "mean"),
space_pe=("space_pe_m2", "mean"),
hw_pe=("hw_pe_m2", "mean"),
light_pe=("lighting_pe_m2", "mean"),
).round(1)
print(by_cat.to_string())
# Stratified bias by age band — narrows the envelope-side hypothesis.
print("\nPE residual stratified by construction_age_band:")
by_age = pe_df.groupby("age", dropna=False).agg(
n=("cert", "count"),
pe_mae=("abs_pe_resid", "mean"),
pe_bias=("primary_resid", "mean"),
space_pe=("space_pe_m2", "mean"),
).round(1).sort_index()
print(by_age.to_string())
# Stratified bias by dwelling type — flat vs detached should split
# if envelope-side (party-wall surfaces) is the dominant residual.
print("\nPE residual stratified by dwelling_type:")
by_dwel = pe_df.groupby("dwelling", dropna=False).agg(
n=("cert", "count"),
pe_mae=("abs_pe_resid", "mean"),
pe_bias=("primary_resid", "mean"),
space_pe=("space_pe_m2", "mean"),
).round(1)
print(by_dwel.to_string())
print("\nworst 15 by |PE residual|:")
print(pe_df.nlargest(15, "abs_pe_resid")[
["cert", "actual", "primary_resid",
"our_primary_kwh_m2", "cert_primary_kwh_m2",
"space_pe_m2", "hw_pe_m2", "main_cat", "dwelling"]
].to_string(index=False))
if errors:
print("\nerrors:")
for e in errors[:10]: