Slice 99d: surface PV array from Elmhurst Summary §19.0

Cert 9501 lodges measured PV: 2.36 kWp South-West, 45° pitch, "None
Or Little" overshading. The worksheet's §10a credit (-250.02 GBP =
PV used in dwelling £-129.49 + PV exported £-120.53) depends on the
Appendix M / Appendix U3.3 cascade reading these from
`SapEnergySource.photovoltaic_arrays`. The prior extractor only
captured the `photovoltaic_panel: "Panel details"` label — the
actual kW / orientation / elevation / overshading were silently
dropped, so the cascade computed total cost ~£250 too high → ECF
2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (Δ -9.27).

Changes:
- Extend `surveys.elmhurst_site_notes.Renewables` with 4 new
  optional fields: pv_peak_power_kw / pv_orientation /
  pv_elevation_deg / pv_overshading.
- Add `ElmhurstSiteNotesExtractor._extract_pv_array_detail` —
  anchors on "Photovoltaic panel details" then reads the 4
  consecutive value lines (kWp, orientation, elevation, overshading).
- Add `_elmhurst_pv_arrays` mapper helper to build the
  `[PhotovoltaicArray(...)]` list when all 4 values are present;
  return None for the "PV absent" path the cascade already handles.
- Add `_ELMHURST_PV_OVERSHADING_TO_RDSAP` map: "None Or Little" → 1
  (ZPV=1.0 per cert_to_inputs._PV_OVERSHADING_FACTOR), "Modest" →
  2, "Significant" → 3, "Heavy" → 4. RdSAP omits SAP10.2 Table M1's
  5th "Severe" bucket.
- Wire `photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables)`
  into `from_elmhurst_site_notes`'s `SapEnergySource(...)` call.

Effect on cert 9501 Summary path:
- sap_continuous 59.2585 → 68.7577 (target 68.5252; Δ +0.23)
- total_fuel_cost £1099 → £843 (worksheet £849; -£6 over-credit)
- ECF 2.92 → 2.24 (worksheet 2.26; -0.02 over-credit)

The remaining +0.23 SAP / +£6 cost drift is a precision gap in the
Appendix M cost-offset cascade for measured PV (not a missing-data
gap); next slice closes it to 1e-4.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-26 21:38:14 +00:00
parent e9575b529f
commit 4264e0ad4b
4 changed files with 149 additions and 0 deletions

View file

@ -1089,6 +1089,8 @@ class ElmhurstSiteNotesExtractor:
hydro_raw = self._next_val("Electricity generated [kWh/year]")
hydro = float(hydro_raw) if hydro_raw else 0.0
pv = self._extract_pv_array_detail()
return Renewables(
solar_water_heating=self._bool_val("Solar Water Heating"),
wwhrs_present=self._bool_val("Is WWHRS present in the property?"),
@ -1098,8 +1100,70 @@ class ElmhurstSiteNotesExtractor:
wind_turbine_present=self._bool_val("Wind turbine present?"),
wind_turbines_terrain_type=terrain,
hydro_electricity_generated_kwh=hydro,
pv_peak_power_kw=pv[0],
pv_orientation=pv[1],
pv_elevation_deg=pv[2],
pv_overshading=pv[3],
)
def _extract_pv_array_detail(
self,
) -> tuple[Optional[float], Optional[str], Optional[int], Optional[str]]:
"""Parse the Elmhurst Summary §19.0 PV Panel section. Returns
(kw_peak, orientation, elevation_deg, overshading) when the cert
lodges measured PV; (None, None, None, None) when absent.
The Summary's PV block looks like:
Photovoltaic panel details
PV Cells kW Peak Orientation
Elevation
Overshading
2.36
South-West
45°
None Or Little
the 4 values follow the header block in a known order, one
per line. Anchor on "Photovoltaic panel details" skip the
header lines read 4 values.
"""
anchor = "Photovoltaic panel details"
try:
idx = next(i for i, l in enumerate(self._lines) if l == anchor)
except StopIteration:
return (None, None, None, None)
# The 4 header lines after the anchor are:
# "PV Cells kW Peak Orientation", "Elevation", "Overshading"
# followed by 4 value lines. Slice the next ~10 lines and
# filter the first 4 entries that look like values (not
# headers).
tail = self._lines[idx + 1 : idx + 12]
header_tokens = {"pv cells", "kw peak", "orientation", "elevation", "overshading"}
values: List[str] = []
for line in tail:
stripped = line.strip()
if not stripped:
continue
lower = stripped.lower()
if any(h in lower for h in header_tokens):
continue
values.append(stripped)
if len(values) == 4:
break
if len(values) < 4:
return (None, None, None, None)
try:
kwp = float(values[0])
except ValueError:
return (None, None, None, None)
orientation = values[1]
# Elevation lodged as "45°" — strip trailing degree symbol.
m = re.match(r"^(\d+)", values[2])
elevation = int(m.group(1)) if m else None
overshading = values[3]
return (kwp, orientation, elevation, overshading)
def extract(self) -> ElmhurstSiteNotes:
emissions_raw = self._next_val("Emissions (t/year)")
co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0

View file

@ -381,6 +381,32 @@ def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None:
assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2
def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None:
# Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured
# array detail (2.36 kWp, South-West orientation, 45° elevation,
# "None Or Little" overshading). The worksheet's §10a PV credit
# of -250.02 GBP (-129.49 used in dwelling + -120.53 exported)
# depends on Appendix M / Appendix U3.3 reading these from the
# cascade's `SapEnergySource.photovoltaic_arrays` list. Without
# the array surfacing the cascade computes total cost +£250 too
# high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current
# Δ -9.27 after Slice 99c closed the fabric heat loss).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
arrays = epc.sap_energy_source.photovoltaic_arrays
assert arrays is not None
assert len(arrays) == 1
assert abs(arrays[0].peak_power - 2.36) <= 1e-4
assert arrays[0].orientation == 6 # SAP octant: South-West
assert arrays[0].pitch == 45
assert arrays[0].overshading == 1 # RdSAP code: None or very little
def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf)
# is the first cohort cert with a real GOV.UK EPB API counterpart

View file

@ -67,6 +67,7 @@ from datatypes.epc.surveys.elmhurst_site_notes import (
ElmhurstSiteNotes,
FloorDetails as ElmhurstFloorDetails,
MainHeating as ElmhurstMainHeating,
Renewables as ElmhurstRenewables,
RoofDetails as ElmhurstRoofDetails,
RoomInRoof as ElmhurstRoomInRoof,
RoomInRoofSurface as ElmhurstRoomInRoofSurface,
@ -321,6 +322,7 @@ class EpcPropertyDataMapper:
is_dwelling_export_capable=survey.renewables.export_capable_meter,
wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type,
electricity_smart_meter_present=survey.meters.electricity_smart_meter,
photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables),
),
sap_building_parts=_map_elmhurst_building_parts(
survey, is_flat=property_type.lower() == "flat",
@ -2903,6 +2905,55 @@ def _map_elmhurst_room_in_roof(
)
# Elmhurst PV-overshading description → RdSAP code per SAP10.2 Table M1
# (collapsed to the 4 RdSAP buckets per cert_to_inputs._PV_OVERSHADING_
# FACTOR). Strings are the §19.0 PV-block values lodged by the Elmhurst
# Summary PDF; lower-cased for case-insensitive matching.
_ELMHURST_PV_OVERSHADING_TO_RDSAP: Dict[str, int] = {
"none or little": 1, # SAP "None or very little" — ZPV=1.0
"none or very little": 1,
"modest": 2,
"significant": 3,
"heavy": 4,
}
def _elmhurst_pv_arrays(
renewables: ElmhurstRenewables,
) -> Optional[List[PhotovoltaicArray]]:
"""Build the Appendix M / Appendix U3.3 cost-offset cascade's input
list from the Elmhurst Summary §19.0 PV detail. Returns None when
the cert hasn't lodged measured PV (no kW Peak value) — the cohort
PV-absent path the cascade already handles correctly.
All four §19.0 inputs (kW peak + orientation + elevation +
overshading) are required for a meaningful Appendix M output;
missing any of them collapses to None so the cascade defers to
the legacy `photovoltaic_supply.percent_roof_area` fallback.
"""
if renewables.pv_peak_power_kw is None or renewables.pv_peak_power_kw <= 0.0:
return None
if renewables.pv_orientation is None or renewables.pv_elevation_deg is None:
return None
return [
PhotovoltaicArray(
peak_power=renewables.pv_peak_power_kw,
pitch=renewables.pv_elevation_deg,
orientation=_elmhurst_orientation_int(renewables.pv_orientation),
overshading=_elmhurst_pv_overshading_int(renewables.pv_overshading),
)
]
def _elmhurst_pv_overshading_int(description: Optional[str]) -> int:
"""Map an Elmhurst PV-overshading description to the RdSAP integer
code. Falls back to 1 (None or very little, ZPV=1.0) when missing
or unrecognised modal lodging assumption."""
if description is None:
return 1
return _ELMHURST_PV_OVERSHADING_TO_RDSAP.get(description.strip().lower(), 1)
# Elmhurst orientation strings → SAP10 octant integer (1=N..8=NW).
# Covers the orderings the layout-style window parser produces, both
# single-direction ("East") and combined ("North-West") forms.

View file

@ -241,6 +241,14 @@ class Renewables:
wind_turbine_present: bool
wind_turbines_terrain_type: str
hydro_electricity_generated_kwh: float
# PV array detail (Elmhurst Summary §19.0 "Photovoltaic Panel"
# block: kW Peak, Orientation, Elevation, Overshading). Populated
# when the cert lodges measured PV; absent (None / "" / 0.0)
# otherwise. Drives Appendix M / Appendix U3.3 cost-offset cascade.
pv_peak_power_kw: Optional[float] = None
pv_orientation: Optional[str] = None # e.g. "South-West"
pv_elevation_deg: Optional[int] = None # e.g. 45
pv_overshading: Optional[str] = None # e.g. "None Or Little"
@dataclass