diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 07b02248..78a86d97 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1089,6 +1089,8 @@ class ElmhurstSiteNotesExtractor: hydro_raw = self._next_val("Electricity generated [kWh/year]") hydro = float(hydro_raw) if hydro_raw else 0.0 + pv = self._extract_pv_array_detail() + return Renewables( solar_water_heating=self._bool_val("Solar Water Heating"), wwhrs_present=self._bool_val("Is WWHRS present in the property?"), @@ -1098,8 +1100,70 @@ class ElmhurstSiteNotesExtractor: wind_turbine_present=self._bool_val("Wind turbine present?"), wind_turbines_terrain_type=terrain, hydro_electricity_generated_kwh=hydro, + pv_peak_power_kw=pv[0], + pv_orientation=pv[1], + pv_elevation_deg=pv[2], + pv_overshading=pv[3], ) + def _extract_pv_array_detail( + self, + ) -> tuple[Optional[float], Optional[str], Optional[int], Optional[str]]: + """Parse the Elmhurst Summary §19.0 PV Panel section. Returns + (kw_peak, orientation, elevation_deg, overshading) when the cert + lodges measured PV; (None, None, None, None) when absent. + + The Summary's PV block looks like: + Photovoltaic panel details + PV Cells kW Peak Orientation + Elevation + Overshading + + 2.36 + South-West + 45° + None Or Little + + — the 4 values follow the header block in a known order, one + per line. Anchor on "Photovoltaic panel details" → skip the + header lines → read 4 values. + """ + anchor = "Photovoltaic panel details" + try: + idx = next(i for i, l in enumerate(self._lines) if l == anchor) + except StopIteration: + return (None, None, None, None) + # The 4 header lines after the anchor are: + # "PV Cells kW Peak Orientation", "Elevation", "Overshading" + # followed by 4 value lines. Slice the next ~10 lines and + # filter the first 4 entries that look like values (not + # headers). + tail = self._lines[idx + 1 : idx + 12] + header_tokens = {"pv cells", "kw peak", "orientation", "elevation", "overshading"} + values: List[str] = [] + for line in tail: + stripped = line.strip() + if not stripped: + continue + lower = stripped.lower() + if any(h in lower for h in header_tokens): + continue + values.append(stripped) + if len(values) == 4: + break + if len(values) < 4: + return (None, None, None, None) + try: + kwp = float(values[0]) + except ValueError: + return (None, None, None, None) + orientation = values[1] + # Elevation lodged as "45°" — strip trailing degree symbol. + m = re.match(r"^(\d+)", values[2]) + elevation = int(m.group(1)) if m else None + overshading = values[3] + return (kwp, orientation, elevation, overshading) + def extract(self) -> ElmhurstSiteNotes: emissions_raw = self._next_val("Emissions (t/year)") co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0 diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index e8fd503d..82594163 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -381,6 +381,32 @@ def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None: assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2 +def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None: + # Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured + # array detail (2.36 kWp, South-West orientation, 45° elevation, + # "None Or Little" overshading). The worksheet's §10a PV credit + # of -250.02 GBP (-129.49 used in dwelling + -120.53 exported) + # depends on Appendix M / Appendix U3.3 reading these from the + # cascade's `SapEnergySource.photovoltaic_arrays` list. Without + # the array surfacing the cascade computes total cost +£250 too + # high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current + # Δ -9.27 after Slice 99c closed the fabric heat loss). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + arrays = epc.sap_energy_source.photovoltaic_arrays + assert arrays is not None + assert len(arrays) == 1 + assert abs(arrays[0].peak_power - 2.36) <= 1e-4 + assert arrays[0].orientation == 6 # SAP octant: South-West + assert arrays[0].pitch == 45 + assert arrays[0].overshading == 1 # RdSAP code: None or very little + + def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf) # is the first cohort cert with a real GOV.UK EPB API counterpart diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ea9bfec1..91a3a888 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -67,6 +67,7 @@ from datatypes.epc.surveys.elmhurst_site_notes import ( ElmhurstSiteNotes, FloorDetails as ElmhurstFloorDetails, MainHeating as ElmhurstMainHeating, + Renewables as ElmhurstRenewables, RoofDetails as ElmhurstRoofDetails, RoomInRoof as ElmhurstRoomInRoof, RoomInRoofSurface as ElmhurstRoomInRoofSurface, @@ -321,6 +322,7 @@ class EpcPropertyDataMapper: is_dwelling_export_capable=survey.renewables.export_capable_meter, wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type, electricity_smart_meter_present=survey.meters.electricity_smart_meter, + photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables), ), sap_building_parts=_map_elmhurst_building_parts( survey, is_flat=property_type.lower() == "flat", @@ -2903,6 +2905,55 @@ def _map_elmhurst_room_in_roof( ) +# Elmhurst PV-overshading description → RdSAP code per SAP10.2 Table M1 +# (collapsed to the 4 RdSAP buckets per cert_to_inputs._PV_OVERSHADING_ +# FACTOR). Strings are the §19.0 PV-block values lodged by the Elmhurst +# Summary PDF; lower-cased for case-insensitive matching. +_ELMHURST_PV_OVERSHADING_TO_RDSAP: Dict[str, int] = { + "none or little": 1, # SAP "None or very little" — ZPV=1.0 + "none or very little": 1, + "modest": 2, + "significant": 3, + "heavy": 4, +} + + +def _elmhurst_pv_arrays( + renewables: ElmhurstRenewables, +) -> Optional[List[PhotovoltaicArray]]: + """Build the Appendix M / Appendix U3.3 cost-offset cascade's input + list from the Elmhurst Summary §19.0 PV detail. Returns None when + the cert hasn't lodged measured PV (no kW Peak value) — the cohort + PV-absent path the cascade already handles correctly. + + All four §19.0 inputs (kW peak + orientation + elevation + + overshading) are required for a meaningful Appendix M output; + missing any of them collapses to None so the cascade defers to + the legacy `photovoltaic_supply.percent_roof_area` fallback. + """ + if renewables.pv_peak_power_kw is None or renewables.pv_peak_power_kw <= 0.0: + return None + if renewables.pv_orientation is None or renewables.pv_elevation_deg is None: + return None + return [ + PhotovoltaicArray( + peak_power=renewables.pv_peak_power_kw, + pitch=renewables.pv_elevation_deg, + orientation=_elmhurst_orientation_int(renewables.pv_orientation), + overshading=_elmhurst_pv_overshading_int(renewables.pv_overshading), + ) + ] + + +def _elmhurst_pv_overshading_int(description: Optional[str]) -> int: + """Map an Elmhurst PV-overshading description to the RdSAP integer + code. Falls back to 1 (None or very little, ZPV=1.0) when missing + or unrecognised — modal lodging assumption.""" + if description is None: + return 1 + return _ELMHURST_PV_OVERSHADING_TO_RDSAP.get(description.strip().lower(), 1) + + # Elmhurst orientation strings → SAP10 octant integer (1=N..8=NW). # Covers the orderings the layout-style window parser produces, both # single-direction ("East") and combined ("North-West") forms. diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index d4f95665..a110517b 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -241,6 +241,14 @@ class Renewables: wind_turbine_present: bool wind_turbines_terrain_type: str hydro_electricity_generated_kwh: float + # PV array detail (Elmhurst Summary §19.0 "Photovoltaic Panel" + # block: kW Peak, Orientation, Elevation, Overshading). Populated + # when the cert lodges measured PV; absent (None / "" / 0.0) + # otherwise. Drives Appendix M / Appendix U3.3 cost-offset cascade. + pv_peak_power_kw: Optional[float] = None + pv_orientation: Optional[str] = None # e.g. "South-West" + pv_elevation_deg: Optional[int] = None # e.g. 45 + pv_overshading: Optional[str] = None # e.g. "None Or Little" @dataclass