mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice S0380.9: multi-array PV support + close cert 0350 to ASHP spec floor
Refactors Elmhurst `Renewables` PV detail from four scalar fields
(pv_peak_power_kw / pv_orientation / pv_elevation_deg / pv_overshading
— single-array shape) to `pv_arrays: List[ElmhurstPvArray]`, then
walks the §19.0 PV Panel block in 4-tuples so dwellings with multiple
PV arrays surface every array.
Forced by cert 0350-2968-2650-2796-5255 (Summary_000903.pdf), the
second ASHP cohort cert through the Summary path and first to lodge
multiple PV arrays — the dr87 worksheet pins 2 arrays at 1.50 kWp
each (one SE at 45°, one NW at 45°). Pre-slice the extractor's
hardcoded "break at len(values) == 4" capped output at one array
regardless of how many the PDF lodged.
Three-layer end-to-end change:
1. `datatypes/epc/surveys/elmhurst_site_notes.py` — add
`ElmhurstPvArray` dataclass (kw, orientation, elevation_deg,
overshading); replace four `Renewables.pv_*` scalars with
`pv_arrays: List[ElmhurstPvArray] = field(default_factory=list)`.
2. `backend/documents_parser/elmhurst_extractor.py` — rename
`_extract_pv_array_detail` → `_extract_pv_arrays`; walk values
after the "Photovoltaic panel details" anchor in 4-tuples until a
stop token ("batteries"/"export"/etc.) or a §-header closes the
block. §-header regex tightened to `\d{1,2}\.\d\s+\w` so kWp
values like "1.50" don't trip the close (without the `\s+\w` the
regex matched both "20.0 Wind Turbine" AND "1.50").
3. `datatypes/epc/domain/mapper.py` — `_elmhurst_pv_arrays` iterates
the list and emits one `PhotovoltaicArray` per row; collapses
empty list → None so the cascade keeps its no-PV fallback.
Forcing function: cert 0350 first-attempt Summary SAP closes from
Δ -4.5829 (Slice 8 baseline) to Δ **+0.0458** — within the ±0.07
ASHP-cohort spec-precision floor. PV export credit GBP moves from
158.91 (one array surfaced) to 265.99 (both arrays surfaced) — the
extra ~107 GBP of avoided cost lifts cert 0350's SAP by ~4.6 points.
This validates the structural-debt-amortizes hypothesis: cert 0350
needed only TWO new slices (S0380.8 inheritance + S0380.9 multi-PV)
beyond the cert 0380 closure work, vs cert 0380's 6 slices from
scratch. Subsequent cohort certs should converge similarly fast as
fixture-specific gaps are paid down.
Added two tests:
- `test_summary_0350_surfaces_two_pv_arrays` — unit test pinning
the multi-array contract on the mapper boundary.
- `test_summary_0350_full_chain_sap_within_spec_floor_of_worksheet`
— chain test pinning Δ < ±0.07 (matches cert 0380's chain test).
Cert 0380 (single-array, 3 kWp) continues to pass its chain test +
all 6 unit-level pins — the refactor preserves single-array behaviour.
Pyright net-zero across all four edited files:
datatypes/epc/domain/mapper.py: 32 (baseline)
datatypes/epc/surveys/elmhurst_site_notes.py: 0
backend/documents_parser/elmhurst_extractor.py: 0
backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0
Regression suite: 677 pass + 10 fail (= handover baseline 669 + 10
+ 8 new GREEN unit+chain tests across Slices S0380.2..S0380.9).
Fixtures added: `backend/documents_parser/tests/fixtures/Summary_
000903.pdf` (copied from `sap worksheets/Additional data with api/
0350-2968-2650-2796-5255/`).
Spec refs:
- SAP 10.2 Appendix M (PDF p.103) — multiple PV arrays sum to total
electricity generation per Equation M-1 (each array's surface flux
computed independently per Appendix U3.3).
- SAP 10.2 Appendix U3.3 (PDF p.124) — per-array surface flux keyed
on orientation + tilt + overshading.
- Cert 0350 worksheet `dr87-0001-000903.pdf` (29a Main 19.4575 W/K
+ Ext1 1.3025 W/K = 20.7600 ≡ Summary cascade walls_w_per_k; (39)
avg HTC 173.4202 ≡ Summary cascade; (64) HW 2084.66 ÷ (216) HW eff
1.7285 = 1206.04 ≡ Summary cascade hot_water_kwh_per_yr).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
2f92edb050
commit
8e6560d744
5 changed files with 161 additions and 62 deletions
|
|
@ -21,6 +21,7 @@ from datatypes.epc.surveys.elmhurst_site_notes import (
|
|||
Shower,
|
||||
SurveyorInfo,
|
||||
VentilationAndCooling,
|
||||
ElmhurstPvArray,
|
||||
WallDetails,
|
||||
WaterHeating,
|
||||
Window,
|
||||
|
|
@ -1153,8 +1154,6 @@ class ElmhurstSiteNotesExtractor:
|
|||
hydro_raw = self._next_val("Electricity generated [kWh/year]")
|
||||
hydro = float(hydro_raw) if hydro_raw else 0.0
|
||||
|
||||
pv = self._extract_pv_array_detail()
|
||||
|
||||
return Renewables(
|
||||
solar_water_heating=self._bool_val("Solar Water Heating"),
|
||||
wwhrs_present=self._bool_val("Is WWHRS present in the property?"),
|
||||
|
|
@ -1164,69 +1163,94 @@ class ElmhurstSiteNotesExtractor:
|
|||
wind_turbine_present=self._bool_val("Wind turbine present?"),
|
||||
wind_turbines_terrain_type=terrain,
|
||||
hydro_electricity_generated_kwh=hydro,
|
||||
pv_peak_power_kw=pv[0],
|
||||
pv_orientation=pv[1],
|
||||
pv_elevation_deg=pv[2],
|
||||
pv_overshading=pv[3],
|
||||
pv_arrays=self._extract_pv_arrays(),
|
||||
)
|
||||
|
||||
def _extract_pv_array_detail(
|
||||
self,
|
||||
) -> tuple[Optional[float], Optional[str], Optional[int], Optional[str]]:
|
||||
def _extract_pv_arrays(self) -> List[ElmhurstPvArray]:
|
||||
"""Parse the Elmhurst Summary §19.0 PV Panel section. Returns
|
||||
(kw_peak, orientation, elevation_deg, overshading) when the cert
|
||||
lodges measured PV; (None, None, None, None) when absent.
|
||||
one `ElmhurstPvArray` per lodged array, or [] when absent.
|
||||
|
||||
The Summary's PV block looks like:
|
||||
The Summary's PV block looks like (single-array, e.g. cert 0380):
|
||||
Photovoltaic panel details
|
||||
PV Cells kW Peak Orientation
|
||||
Elevation
|
||||
Overshading
|
||||
|
||||
2.36
|
||||
South-West
|
||||
3.00
|
||||
South-East
|
||||
45°
|
||||
None Or Little
|
||||
|
||||
— the 4 values follow the header block in a known order, one
|
||||
per line. Anchor on "Photovoltaic panel details" → skip the
|
||||
header lines → read 4 values.
|
||||
Multi-array (e.g. cert 0350 lodges 2 arrays):
|
||||
...
|
||||
1.50
|
||||
South-East
|
||||
45°
|
||||
None Or Little
|
||||
1.50
|
||||
North-West
|
||||
45°
|
||||
None Or Little
|
||||
|
||||
— each array is 4 values in (kW Peak, Orientation, Elevation,
|
||||
Overshading) order. Anchor on "Photovoltaic panel details",
|
||||
skip header lines, then read values in 4-tuples until the
|
||||
section breaks at the next §header or end-of-array tokens
|
||||
(Batteries / Export / Capacity / etc.).
|
||||
"""
|
||||
anchor = "Photovoltaic panel details"
|
||||
try:
|
||||
idx = next(i for i, l in enumerate(self._lines) if l == anchor)
|
||||
except StopIteration:
|
||||
return (None, None, None, None)
|
||||
# The 4 header lines after the anchor are:
|
||||
# "PV Cells kW Peak Orientation", "Elevation", "Overshading"
|
||||
# followed by 4 value lines. Slice the next ~10 lines and
|
||||
# filter the first 4 entries that look like values (not
|
||||
# headers).
|
||||
tail = self._lines[idx + 1 : idx + 12]
|
||||
return []
|
||||
# The header lines after the anchor are: "PV Cells kW Peak
|
||||
# Orientation", "Elevation", "Overshading". Subsequent lines
|
||||
# carry values for one OR MORE arrays. Stop at the next
|
||||
# §-header (a "20.0" or "21.0") or post-PV section tokens
|
||||
# ("Batteries", "Connected to", "Diverter", "Capacity", etc.).
|
||||
header_tokens = {"pv cells", "kw peak", "orientation", "elevation", "overshading"}
|
||||
stop_tokens = {
|
||||
"batteries", "capacity known", "capacity",
|
||||
"connected to the dwelling's meter", "diverter present",
|
||||
"export capable meter",
|
||||
}
|
||||
values: List[str] = []
|
||||
for line in tail:
|
||||
for line in self._lines[idx + 1:]:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
lower = stripped.lower()
|
||||
if lower in stop_tokens:
|
||||
break
|
||||
# Next §-header (e.g. "20.0 Wind Turbine") closes the block —
|
||||
# match "<digits>.<digit><whitespace><word>" so kWp values
|
||||
# like "1.50" don't trip the close.
|
||||
if re.match(r"^\d{1,2}\.\d\s+\w", stripped):
|
||||
break
|
||||
if any(h in lower for h in header_tokens):
|
||||
continue
|
||||
values.append(stripped)
|
||||
if len(values) == 4:
|
||||
break
|
||||
if len(values) < 4:
|
||||
return (None, None, None, None)
|
||||
try:
|
||||
kwp = float(values[0])
|
||||
except ValueError:
|
||||
return (None, None, None, None)
|
||||
orientation = values[1]
|
||||
# Elevation lodged as "45°" — strip trailing degree symbol.
|
||||
m = re.match(r"^(\d+)", values[2])
|
||||
elevation = int(m.group(1)) if m else None
|
||||
overshading = values[3]
|
||||
return (kwp, orientation, elevation, overshading)
|
||||
# Walk values in 4-tuples; an incomplete trailing tuple is dropped.
|
||||
arrays: List[ElmhurstPvArray] = []
|
||||
for i in range(0, len(values) - 3, 4):
|
||||
try:
|
||||
kwp = float(values[i])
|
||||
except ValueError:
|
||||
continue
|
||||
orientation = values[i + 1]
|
||||
# Elevation lodged as "45°" — strip trailing degree symbol.
|
||||
m = re.match(r"^(\d+)", values[i + 2])
|
||||
if m is None:
|
||||
continue
|
||||
elevation = int(m.group(1))
|
||||
overshading = values[i + 3]
|
||||
arrays.append(ElmhurstPvArray(
|
||||
peak_power_kw=kwp,
|
||||
orientation=orientation,
|
||||
elevation_deg=elevation,
|
||||
overshading=overshading,
|
||||
))
|
||||
return arrays
|
||||
|
||||
def extract(self) -> ElmhurstSiteNotes:
|
||||
emissions_raw = self._next_val("Emissions (t/year)")
|
||||
|
|
|
|||
BIN
backend/documents_parser/tests/fixtures/Summary_000903.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_000903.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -621,6 +621,34 @@ def test_summary_0380_cylinder_block_surfaces_full_15_1_lodging() -> None:
|
|||
assert epc.sap_heating.cylinder_thermostat == "Y"
|
||||
|
||||
|
||||
def test_summary_0350_surfaces_two_pv_arrays() -> None:
|
||||
# Arrange — cert 0350's Summary §19.0 Photovoltaic Panel block
|
||||
# lodges TWO arrays (L 503-510):
|
||||
# 1.50 kWp / South-East / 45° / None Or Little
|
||||
# 1.50 kWp / North-West / 45° / None Or Little
|
||||
# The Elmhurst extractor's `_extract_pv_array_detail` hardcodes a
|
||||
# single 4-value reader (loop breaks at `len(values) == 4`) and
|
||||
# the `Renewables` dataclass exposes only 4 scalar PV fields —
|
||||
# together they cap output at one array regardless of how many the
|
||||
# PDF lodges. Cert 0380 (single-array) is unaffected; cert 0350
|
||||
# is the first multi-array cohort cert. Without both arrays the
|
||||
# cascade halves the PV export credit and the SAP score drops.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert
|
||||
assert epc.sap_energy_source is not None
|
||||
arrays = epc.sap_energy_source.photovoltaic_arrays
|
||||
assert arrays is not None
|
||||
assert len(arrays) == 2
|
||||
# Both arrays at 1.5 kWp; order matches PDF row order.
|
||||
assert arrays[0].peak_power == 1.5
|
||||
assert arrays[1].peak_power == 1.5
|
||||
|
||||
|
||||
def test_summary_0350_ext1_inherits_main_wall_insulation_thickness() -> None:
|
||||
# Arrange — cert 0350-2968-2650-2796-5255 is a multi-bp dwelling
|
||||
# (Main + 1st Extension). Its Summary §7 Walls block lodges
|
||||
|
|
@ -650,6 +678,39 @@ def test_summary_0350_ext1_inherits_main_wall_insulation_thickness() -> None:
|
|||
assert ext1_bp.wall_insulation_thickness == "100mm"
|
||||
|
||||
|
||||
def test_summary_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None:
|
||||
# Arrange — cert 0350-2968-2650-2796-5255 (Summary_000903.pdf /
|
||||
# dr87-0001-000903.pdf) is the second heat-pump cert under per-cert
|
||||
# Summary-path mapper validation and the first multi-bp cohort
|
||||
# cert: Mitsubishi PUZ-WM50VHA ASHP (PCDB index 104568), main
|
||||
# dwelling + 1 extension, 2 PV arrays (2x 1.5 kWp at SE / NW).
|
||||
# Worksheet PDF "SAP value" line lodges unrounded SAP **84.1367**.
|
||||
#
|
||||
# First-attempt closure (validating the structural-debt-amortizes
|
||||
# hypothesis): after Slices S0380.2..S0380.6 (which were forced by
|
||||
# cert 0380) the cohort HP routing + cylinder block were already
|
||||
# in place; cert 0350 needed only TWO new slices:
|
||||
# - Slice S0380.8: extension "As Main Wall" inheritance copies
|
||||
# `insulation_thickness_mm` (cert 0380 was single-bp, didn't
|
||||
# exercise the inheritance path).
|
||||
# - Slice S0380.9: refactor Elmhurst `Renewables` to support
|
||||
# multiple PV arrays per dwelling (cert 0380 was single-array,
|
||||
# didn't exercise multi-array PV).
|
||||
# Both fixes are structural and apply cohort-wide.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Act
|
||||
result = calculate_sap_from_inputs(
|
||||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||||
)
|
||||
|
||||
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
|
||||
worksheet_unrounded_sap = 84.1367
|
||||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
|
||||
|
||||
|
||||
def test_summary_0380_full_chain_sap_within_spec_floor_of_worksheet() -> None:
|
||||
# Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf /
|
||||
# dr87-0001-000899.pdf) is the first heat-pump cert under per-cert
|
||||
|
|
|
|||
|
|
@ -3067,26 +3067,29 @@ def _elmhurst_pv_arrays(
|
|||
) -> Optional[List[PhotovoltaicArray]]:
|
||||
"""Build the Appendix M / Appendix U3.3 cost-offset cascade's input
|
||||
list from the Elmhurst Summary §19.0 PV detail. Returns None when
|
||||
the cert hasn't lodged measured PV (no kW Peak value) — the cohort
|
||||
PV-absent path the cascade already handles correctly.
|
||||
the cert hasn't lodged measured PV — the cohort PV-absent path the
|
||||
cascade already handles correctly.
|
||||
|
||||
All four §19.0 inputs (kW peak + orientation + elevation +
|
||||
overshading) are required for a meaningful Appendix M output;
|
||||
missing any of them collapses to None so the cascade defers to
|
||||
the legacy `photovoltaic_supply.percent_roof_area` fallback.
|
||||
Each lodged §19.0 row (a `ElmhurstPvArray`) becomes one
|
||||
`PhotovoltaicArray` entry. Single-array dwellings (cohort cert
|
||||
0380: 3 kWp) and multi-array dwellings (cohort cert 0350: 2x 1.5
|
||||
kWp at distinct orientations) go through the same iterator.
|
||||
"""
|
||||
if renewables.pv_peak_power_kw is None or renewables.pv_peak_power_kw <= 0.0:
|
||||
if not renewables.pv_arrays:
|
||||
return None
|
||||
if renewables.pv_orientation is None or renewables.pv_elevation_deg is None:
|
||||
return None
|
||||
return [
|
||||
PhotovoltaicArray(
|
||||
peak_power=renewables.pv_peak_power_kw,
|
||||
pitch=_elmhurst_pv_pitch_code(renewables.pv_elevation_deg),
|
||||
orientation=_elmhurst_orientation_int(renewables.pv_orientation),
|
||||
overshading=_elmhurst_pv_overshading_int(renewables.pv_overshading),
|
||||
out: List[PhotovoltaicArray] = []
|
||||
for arr in renewables.pv_arrays:
|
||||
if arr.peak_power_kw <= 0.0:
|
||||
continue
|
||||
out.append(
|
||||
PhotovoltaicArray(
|
||||
peak_power=arr.peak_power_kw,
|
||||
pitch=_elmhurst_pv_pitch_code(arr.elevation_deg),
|
||||
orientation=_elmhurst_orientation_int(arr.orientation),
|
||||
overshading=_elmhurst_pv_overshading_int(arr.overshading),
|
||||
)
|
||||
)
|
||||
]
|
||||
return out or None
|
||||
|
||||
|
||||
# RdSAP 10 §11.1 PV pitch enum (degrees → integer code consumed by
|
||||
|
|
|
|||
|
|
@ -259,13 +259,24 @@ class Renewables:
|
|||
wind_turbines_terrain_type: str
|
||||
hydro_electricity_generated_kwh: float
|
||||
# PV array detail (Elmhurst Summary §19.0 "Photovoltaic Panel"
|
||||
# block: kW Peak, Orientation, Elevation, Overshading). Populated
|
||||
# when the cert lodges measured PV; absent (None / "" / 0.0)
|
||||
# otherwise. Drives Appendix M / Appendix U3.3 cost-offset cascade.
|
||||
pv_peak_power_kw: Optional[float] = None
|
||||
pv_orientation: Optional[str] = None # e.g. "South-West"
|
||||
pv_elevation_deg: Optional[int] = None # e.g. 45
|
||||
pv_overshading: Optional[str] = None # e.g. "None Or Little"
|
||||
# block: a list of (kW Peak, Orientation, Elevation, Overshading)
|
||||
# rows). Empty list when the cert hasn't lodged measured PV.
|
||||
# Drives Appendix M / Appendix U3.3 cost-offset cascade — both the
|
||||
# single-array (cohort cert 0380) and multi-array (cohort cert
|
||||
# 0350: 2x 1.5 kWp) layouts go through the same list.
|
||||
pv_arrays: List["ElmhurstPvArray"] = field(
|
||||
default_factory=lambda: [] # type: ignore[reportUnknownLambdaType]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ElmhurstPvArray:
|
||||
"""One Photovoltaic array row from Summary §19.0. The four fields
|
||||
match the columns in the PDF's PV Panel block."""
|
||||
peak_power_kw: float
|
||||
orientation: str # e.g. "South-West"
|
||||
elevation_deg: int # e.g. 45
|
||||
overshading: str # e.g. "None Or Little"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue