From 4264e0ad4b4cb8473ecb11be619ef64cbaeb4560 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 26 May 2026 21:38:14 +0000 Subject: [PATCH] =?UTF-8?q?Slice=2099d:=20surface=20PV=20array=20from=20El?= =?UTF-8?q?mhurst=20Summary=20=C2=A719.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cert 9501 lodges measured PV: 2.36 kWp South-West, 45° pitch, "None Or Little" overshading. The worksheet's §10a credit (-250.02 GBP = PV used in dwelling £-129.49 + PV exported £-120.53) depends on the Appendix M / Appendix U3.3 cascade reading these from `SapEnergySource.photovoltaic_arrays`. The prior extractor only captured the `photovoltaic_panel: "Panel details"` label — the actual kW / orientation / elevation / overshading were silently dropped, so the cascade computed total cost ~£250 too high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (Δ -9.27). Changes: - Extend `surveys.elmhurst_site_notes.Renewables` with 4 new optional fields: pv_peak_power_kw / pv_orientation / pv_elevation_deg / pv_overshading. - Add `ElmhurstSiteNotesExtractor._extract_pv_array_detail` — anchors on "Photovoltaic panel details" then reads the 4 consecutive value lines (kWp, orientation, elevation, overshading). - Add `_elmhurst_pv_arrays` mapper helper to build the `[PhotovoltaicArray(...)]` list when all 4 values are present; return None for the "PV absent" path the cascade already handles. - Add `_ELMHURST_PV_OVERSHADING_TO_RDSAP` map: "None Or Little" → 1 (ZPV=1.0 per cert_to_inputs._PV_OVERSHADING_FACTOR), "Modest" → 2, "Significant" → 3, "Heavy" → 4. RdSAP omits SAP10.2 Table M1's 5th "Severe" bucket. - Wire `photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables)` into `from_elmhurst_site_notes`'s `SapEnergySource(...)` call. Effect on cert 9501 Summary path: - sap_continuous 59.2585 → 68.7577 (target 68.5252; Δ +0.23) - total_fuel_cost £1099 → £843 (worksheet £849; -£6 over-credit) - ECF 2.92 → 2.24 (worksheet 2.26; -0.02 over-credit) The remaining +0.23 SAP / +£6 cost drift is a precision gap in the Appendix M cost-offset cascade for measured PV (not a missing-data gap); next slice closes it to 1e-4. Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 64 +++++++++++++++++++ .../tests/test_summary_pdf_mapper_chain.py | 26 ++++++++ datatypes/epc/domain/mapper.py | 51 +++++++++++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 8 +++ 4 files changed, 149 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 07b02248..78a86d97 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1089,6 +1089,8 @@ class ElmhurstSiteNotesExtractor: hydro_raw = self._next_val("Electricity generated [kWh/year]") hydro = float(hydro_raw) if hydro_raw else 0.0 + pv = self._extract_pv_array_detail() + return Renewables( solar_water_heating=self._bool_val("Solar Water Heating"), wwhrs_present=self._bool_val("Is WWHRS present in the property?"), @@ -1098,8 +1100,70 @@ class ElmhurstSiteNotesExtractor: wind_turbine_present=self._bool_val("Wind turbine present?"), wind_turbines_terrain_type=terrain, hydro_electricity_generated_kwh=hydro, + pv_peak_power_kw=pv[0], + pv_orientation=pv[1], + pv_elevation_deg=pv[2], + pv_overshading=pv[3], ) + def _extract_pv_array_detail( + self, + ) -> tuple[Optional[float], Optional[str], Optional[int], Optional[str]]: + """Parse the Elmhurst Summary §19.0 PV Panel section. Returns + (kw_peak, orientation, elevation_deg, overshading) when the cert + lodges measured PV; (None, None, None, None) when absent. + + The Summary's PV block looks like: + Photovoltaic panel details + PV Cells kW Peak Orientation + Elevation + Overshading + + 2.36 + South-West + 45° + None Or Little + + — the 4 values follow the header block in a known order, one + per line. Anchor on "Photovoltaic panel details" → skip the + header lines → read 4 values. + """ + anchor = "Photovoltaic panel details" + try: + idx = next(i for i, l in enumerate(self._lines) if l == anchor) + except StopIteration: + return (None, None, None, None) + # The 4 header lines after the anchor are: + # "PV Cells kW Peak Orientation", "Elevation", "Overshading" + # followed by 4 value lines. Slice the next ~10 lines and + # filter the first 4 entries that look like values (not + # headers). + tail = self._lines[idx + 1 : idx + 12] + header_tokens = {"pv cells", "kw peak", "orientation", "elevation", "overshading"} + values: List[str] = [] + for line in tail: + stripped = line.strip() + if not stripped: + continue + lower = stripped.lower() + if any(h in lower for h in header_tokens): + continue + values.append(stripped) + if len(values) == 4: + break + if len(values) < 4: + return (None, None, None, None) + try: + kwp = float(values[0]) + except ValueError: + return (None, None, None, None) + orientation = values[1] + # Elevation lodged as "45°" — strip trailing degree symbol. + m = re.match(r"^(\d+)", values[2]) + elevation = int(m.group(1)) if m else None + overshading = values[3] + return (kwp, orientation, elevation, overshading) + def extract(self) -> ElmhurstSiteNotes: emissions_raw = self._next_val("Emissions (t/year)") co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0 diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index e8fd503d..82594163 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -381,6 +381,32 @@ def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None: assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2 +def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None: + # Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured + # array detail (2.36 kWp, South-West orientation, 45° elevation, + # "None Or Little" overshading). The worksheet's §10a PV credit + # of -250.02 GBP (-129.49 used in dwelling + -120.53 exported) + # depends on Appendix M / Appendix U3.3 reading these from the + # cascade's `SapEnergySource.photovoltaic_arrays` list. Without + # the array surfacing the cascade computes total cost +£250 too + # high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current + # Δ -9.27 after Slice 99c closed the fabric heat loss). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + arrays = epc.sap_energy_source.photovoltaic_arrays + assert arrays is not None + assert len(arrays) == 1 + assert abs(arrays[0].peak_power - 2.36) <= 1e-4 + assert arrays[0].orientation == 6 # SAP octant: South-West + assert arrays[0].pitch == 45 + assert arrays[0].overshading == 1 # RdSAP code: None or very little + + def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf) # is the first cohort cert with a real GOV.UK EPB API counterpart diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ea9bfec1..91a3a888 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -67,6 +67,7 @@ from datatypes.epc.surveys.elmhurst_site_notes import ( ElmhurstSiteNotes, FloorDetails as ElmhurstFloorDetails, MainHeating as ElmhurstMainHeating, + Renewables as ElmhurstRenewables, RoofDetails as ElmhurstRoofDetails, RoomInRoof as ElmhurstRoomInRoof, RoomInRoofSurface as ElmhurstRoomInRoofSurface, @@ -321,6 +322,7 @@ class EpcPropertyDataMapper: is_dwelling_export_capable=survey.renewables.export_capable_meter, wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type, electricity_smart_meter_present=survey.meters.electricity_smart_meter, + photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables), ), sap_building_parts=_map_elmhurst_building_parts( survey, is_flat=property_type.lower() == "flat", @@ -2903,6 +2905,55 @@ def _map_elmhurst_room_in_roof( ) +# Elmhurst PV-overshading description → RdSAP code per SAP10.2 Table M1 +# (collapsed to the 4 RdSAP buckets per cert_to_inputs._PV_OVERSHADING_ +# FACTOR). Strings are the §19.0 PV-block values lodged by the Elmhurst +# Summary PDF; lower-cased for case-insensitive matching. +_ELMHURST_PV_OVERSHADING_TO_RDSAP: Dict[str, int] = { + "none or little": 1, # SAP "None or very little" — ZPV=1.0 + "none or very little": 1, + "modest": 2, + "significant": 3, + "heavy": 4, +} + + +def _elmhurst_pv_arrays( + renewables: ElmhurstRenewables, +) -> Optional[List[PhotovoltaicArray]]: + """Build the Appendix M / Appendix U3.3 cost-offset cascade's input + list from the Elmhurst Summary §19.0 PV detail. Returns None when + the cert hasn't lodged measured PV (no kW Peak value) — the cohort + PV-absent path the cascade already handles correctly. + + All four §19.0 inputs (kW peak + orientation + elevation + + overshading) are required for a meaningful Appendix M output; + missing any of them collapses to None so the cascade defers to + the legacy `photovoltaic_supply.percent_roof_area` fallback. + """ + if renewables.pv_peak_power_kw is None or renewables.pv_peak_power_kw <= 0.0: + return None + if renewables.pv_orientation is None or renewables.pv_elevation_deg is None: + return None + return [ + PhotovoltaicArray( + peak_power=renewables.pv_peak_power_kw, + pitch=renewables.pv_elevation_deg, + orientation=_elmhurst_orientation_int(renewables.pv_orientation), + overshading=_elmhurst_pv_overshading_int(renewables.pv_overshading), + ) + ] + + +def _elmhurst_pv_overshading_int(description: Optional[str]) -> int: + """Map an Elmhurst PV-overshading description to the RdSAP integer + code. Falls back to 1 (None or very little, ZPV=1.0) when missing + or unrecognised — modal lodging assumption.""" + if description is None: + return 1 + return _ELMHURST_PV_OVERSHADING_TO_RDSAP.get(description.strip().lower(), 1) + + # Elmhurst orientation strings → SAP10 octant integer (1=N..8=NW). # Covers the orderings the layout-style window parser produces, both # single-direction ("East") and combined ("North-West") forms. diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index d4f95665..a110517b 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -241,6 +241,14 @@ class Renewables: wind_turbine_present: bool wind_turbines_terrain_type: str hydro_electricity_generated_kwh: float + # PV array detail (Elmhurst Summary §19.0 "Photovoltaic Panel" + # block: kW Peak, Orientation, Elevation, Overshading). Populated + # when the cert lodges measured PV; absent (None / "" / 0.0) + # otherwise. Drives Appendix M / Appendix U3.3 cost-offset cascade. + pv_peak_power_kw: Optional[float] = None + pv_orientation: Optional[str] = None # e.g. "South-West" + pv_elevation_deg: Optional[int] = None # e.g. 45 + pv_overshading: Optional[str] = None # e.g. "None Or Little" @dataclass