From bdf703ea0002c88cc443997fdb048f9c6df520db Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 22 May 2026 14:00:33 +0000 Subject: [PATCH] updated rdsap option; seperated s3 location in infrastrucutre; added open ai api --- datatypes/epc/domain/epc_property_data.py | 22 +- datatypes/epc/schema/rdsap_schema_17_0.py | 2 +- datatypes/epc/schema/rdsap_schema_17_1.py | 2 +- datatypes/epc/schema/rdsap_schema_18_0.py | 3 +- datatypes/epc/schema/rdsap_schema_19_0.py | 2 +- datatypes/epc/schema/rdsap_schema_20_0_0.py | 3 +- datatypes/epc/schema/rdsap_schema_21_0_0.py | 6 + datatypes/epc/schema/rdsap_schema_21_0_1.py | 7 +- domain/epc/__init__.py | 4 + domain/epc/epc_record.py | 21 ++ infrastructure/epc/__init__.py | 13 ++ infrastructure/epc/epc_client.py | 41 ++++ infrastructure/epc/exceptions.py | 17 ++ infrastructure/epc/gov_uk/__init__.py | 6 + infrastructure/epc/gov_uk/_retry.py | 34 +++ .../epc/gov_uk/gov_uk_epc_client.py | 132 +++++++++++ .../epc/gov_uk/gov_uk_property_type.py | 25 +++ .../__init__.py | 5 + ...orical_open_data_communities_epc_client.py | 24 ++ infrastructure/openai/__init__.py | 0 infrastructure/openai/exceptions.py | 2 + infrastructure/openai/openai_client.py | 60 +++++ tests/infrastructure/epc/__init__.py | 0 tests/infrastructure/epc/gov_uk/__init__.py | 0 tests/infrastructure/epc/gov_uk/conftest.py | 49 ++++ .../epc/gov_uk/test_gov_uk_epc_client.py | 211 ++++++++++++++++++ 26 files changed, 679 insertions(+), 12 deletions(-) create mode 100644 domain/epc/epc_record.py create mode 100644 infrastructure/epc/__init__.py create mode 100644 infrastructure/epc/epc_client.py create mode 100644 infrastructure/epc/exceptions.py create mode 100644 infrastructure/epc/gov_uk/__init__.py create mode 100644 infrastructure/epc/gov_uk/_retry.py create mode 100644 infrastructure/epc/gov_uk/gov_uk_epc_client.py create mode 100644 infrastructure/epc/gov_uk/gov_uk_property_type.py create mode 100644 infrastructure/epc/historical_open_data_communities/__init__.py create mode 100644 infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py create mode 100644 infrastructure/openai/__init__.py create mode 100644 infrastructure/openai/exceptions.py create mode 100644 infrastructure/openai/openai_client.py create mode 100644 tests/infrastructure/epc/__init__.py create mode 100644 tests/infrastructure/epc/gov_uk/__init__.py create mode 100644 tests/infrastructure/epc/gov_uk/conftest.py create mode 100644 tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index d0d4bf10..a8980174 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -98,7 +98,9 @@ class MainHeatingDetail: boiler_flue_type: Optional[int] = None # TODO: make enum? boiler_ignition_type: Optional[int] = None # TODO: make enum? central_heating_pump_age: Optional[int] = None - central_heating_pump_age_str: Optional[str] = None # str from site notes e.g. "Unknown", "Pre 2013" + central_heating_pump_age_str: Optional[str] = ( + None # str from site notes e.g. "Unknown", "Pre 2013" + ) main_heating_index_number: Optional[int] = None sap_main_heating_code: Optional[int] = None # TODO: make enum? main_heating_number: Optional[int] = None @@ -123,7 +125,7 @@ class ShowerOutlets: @dataclass class SapHeating: - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] has_fixed_air_conditioning: bool cylinder_size: Optional[Union[int, str]] = ( @@ -136,7 +138,9 @@ class SapHeating: cylinder_insulation_type: Optional[Union[int, str]] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None - secondary_heating_type: Optional[Union[int, str]] = None # int from API; str from site notes + secondary_heating_type: Optional[Union[int, str]] = ( + None # int from API; str from site notes + ) cylinder_insulation_thickness_mm: Optional[int] = None # SAP10 hot-water demand inputs from sap_heating. number_baths: Optional[int] = None @@ -159,7 +163,9 @@ class SapHeating: class SapVentilation: ventilation_type: Optional[str] = None draught_lobby: Optional[bool] = None - pressure_test: Optional[str] = None # str from site notes e.g. "No test"; int in API via mechanical_ventilation + pressure_test: Optional[str] = ( + None # str from site notes e.g. "No test"; int in API via mechanical_ventilation + ) open_flues_count: Optional[int] = None closed_flues_count: Optional[int] = None boiler_flues_count: Optional[int] = None @@ -467,8 +473,12 @@ class SapBuildingPart: None # TODO: make enum/mapping? ) floor_type: Optional[str] = None # str from site notes e.g. "Ground Floor" - floor_construction_type: Optional[str] = None # str from site notes; distinct from floor_construction: int in SapFloorDimension - floor_insulation_type_str: Optional[str] = None # str from site notes e.g. "As Built" + floor_construction_type: Optional[str] = ( + None # str from site notes; distinct from floor_construction: int in SapFloorDimension + ) + floor_insulation_type_str: Optional[str] = ( + None # str from site notes e.g. "As Built" + ) floor_u_value_known: Optional[bool] = None roof_construction: Optional[int] = None diff --git a/datatypes/epc/schema/rdsap_schema_17_0.py b/datatypes/epc/schema/rdsap_schema_17_0.py index 22aaded4..9cbedf97 100644 --- a/datatypes/epc/schema/rdsap_schema_17_0.py +++ b/datatypes/epc/schema/rdsap_schema_17_0.py @@ -37,7 +37,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] cylinder_insulation_type: int diff --git a/datatypes/epc/schema/rdsap_schema_17_1.py b/datatypes/epc/schema/rdsap_schema_17_1.py index a4c007ed..b0af07e6 100644 --- a/datatypes/epc/schema/rdsap_schema_17_1.py +++ b/datatypes/epc/schema/rdsap_schema_17_1.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] cylinder_insulation_type: int diff --git a/datatypes/epc/schema/rdsap_schema_18_0.py b/datatypes/epc/schema/rdsap_schema_18_0.py index a038dc9b..4ce2f887 100644 --- a/datatypes/epc/schema/rdsap_schema_18_0.py +++ b/datatypes/epc/schema/rdsap_schema_18_0.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -86,6 +86,7 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: """Room-in-roof details. floor_area is a Measurement object in schema 18.0.""" + floor_area: Measurement insulation: str roof_room_connected: str diff --git a/datatypes/epc/schema/rdsap_schema_19_0.py b/datatypes/epc/schema/rdsap_schema_19_0.py index b94d9bb3..b3c77ec4 100644 --- a/datatypes/epc/schema/rdsap_schema_19_0.py +++ b/datatypes/epc/schema/rdsap_schema_19_0.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str diff --git a/datatypes/epc/schema/rdsap_schema_20_0_0.py b/datatypes/epc/schema/rdsap_schema_20_0_0.py index 8f3986a2..9deb235e 100644 --- a/datatypes/epc/schema/rdsap_schema_20_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_20_0_0.py @@ -49,7 +49,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -103,6 +103,7 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: """Room-in-roof details. floor_area is a plain number in schema 20.0.0 (not a Measurement object).""" + floor_area: Union[int, float] insulation: str roof_room_connected: str diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py index 383a4a6e..a080a2ff 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py @@ -33,6 +33,7 @@ class ShowerOutlets: @dataclass class InstantaneousWwhrs: """Changed in 21.0.0: references WWHRS product index numbers instead of room counts.""" + wwhrs_index_number1: Optional[int] = None wwhrs_index_number2: Optional[int] = None @@ -61,6 +62,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -113,6 +115,7 @@ class PhotovoltaicArray: `PhotovoltaicSupply`. The Union type on SapEnergySource.photovoltaic_supply accepts either shape. """ + peak_power: float pitch: int orientation: int @@ -179,6 +182,7 @@ class RoomInRoofType1: full enum not yet mapped). `gable_wall_length_*` is the run of the external gable in metres. Heights are NOT lodged here — the cascade applies the §3.9.1 default storey height (2.45 m).""" + gable_wall_type_1: Optional[int] = None gable_wall_type_2: Optional[int] = None gable_wall_length_1: Optional[float] = None @@ -189,6 +193,7 @@ class RoomInRoofType1: class RoomInRoofDetails: """RdSAP §3.9 Detailed RR — per-surface lengths + heights + flat-ceiling detail. See `rdsap_schema_21_0_1.RoomInRoofDetails`.""" + gable_wall_type_1: Optional[int] = None gable_wall_type_2: Optional[int] = None gable_wall_length_1: Optional[float] = None @@ -204,6 +209,7 @@ class RoomInRoofDetails: @dataclass class SapRoomInRoof: """Room-in-roof details. insulation and roof_room_connected removed in schema 21.0.0.""" + floor_area: Union[int, float] construction_age_band: str room_in_roof_type_1: Optional[RoomInRoofType1] = None diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index e8925863..b1827083 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -50,7 +50,7 @@ class MainHeatingDetail: main_heating_data_source: int emitter_temperature: Optional[Union[int, str]] = None boiler_flue_type: Optional[int] = None - fan_flue_present: Optional[str] = None # TODO: make bool + fan_flue_present: Optional[str] = None # TODO: make bool boiler_ignition_type: Optional[int] = None central_heating_pump_age: Optional[int] = None main_heating_index_number: Optional[int] = None @@ -62,6 +62,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -118,6 +119,7 @@ class PhotovoltaicSupplyNoneOrNoDetails: @dataclass class SchemaPhotovoltaicArray: """One measured PV array under `photovoltaic_supply.pv_arrays`.""" + peak_power: Optional[float] = None pitch: Optional[int] = None orientation: Optional[int] = None @@ -143,6 +145,7 @@ class PhotovoltaicArray: `PhotovoltaicSupply`. The Union type on SapEnergySource.photovoltaic_supply accepts either shape. Some certs wrap the scalars in Measurement dicts. """ + peak_power: Union[Measurement, int, float] pitch: Union[Measurement, int] orientation: Union[Measurement, int] @@ -211,6 +214,7 @@ class RoomInRoofType1: full enum not yet mapped). `gable_wall_length_*` is the run of the external gable in metres. Heights are NOT lodged here — the cascade applies the §3.9.1 default storey height (2.45 m).""" + gable_wall_type_1: Optional[int] = None gable_wall_type_2: Optional[int] = None gable_wall_length_1: Optional[float] = None @@ -225,6 +229,7 @@ class RoomInRoofDetails: by `EpcPropertyDataMapper.from_api_response` to populate `SapRoomInRoof.detailed_surfaces` with `gable_wall_external` / `flat_ceiling` entries the cascade's Detailed-RR branch consumes.""" + gable_wall_type_1: Optional[int] = None gable_wall_type_2: Optional[int] = None gable_wall_length_1: Optional[float] = None diff --git a/domain/epc/__init__.py b/domain/epc/__init__.py index e69de29b..e49fea42 100644 --- a/domain/epc/__init__.py +++ b/domain/epc/__init__.py @@ -0,0 +1,4 @@ +from domain.epc.epc_record import EpcRecord +from domain.epc.property_type import PropertyType + +__all__ = ["EpcRecord", "PropertyType"] diff --git a/domain/epc/epc_record.py b/domain/epc/epc_record.py new file mode 100644 index 00000000..7194d1d6 --- /dev/null +++ b/domain/epc/epc_record.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from domain.epc.property_type import PropertyType + + +@dataclass(frozen=True) +class EpcRecord: + """A streamlined record of EPC property data. + + A focused subset of the full ``EpcPropertyData``: a property's identity + plus its typed property type. Grow this with further fields as the + domain needs them. + """ + + address_line_1: str + postcode: str + uprn: Optional[int] + property_type: PropertyType diff --git a/infrastructure/epc/__init__.py b/infrastructure/epc/__init__.py new file mode 100644 index 00000000..f99a7cb3 --- /dev/null +++ b/infrastructure/epc/__init__.py @@ -0,0 +1,13 @@ +from infrastructure.epc.epc_client import EpcClient +from infrastructure.epc.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) + +__all__ = [ + "EpcApiError", + "EpcClient", + "EpcNotFoundError", + "EpcRateLimitError", +] diff --git a/infrastructure/epc/epc_client.py b/infrastructure/epc/epc_client.py new file mode 100644 index 00000000..d1f8639c --- /dev/null +++ b/infrastructure/epc/epc_client.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Optional + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search import EpcSearchResult + + +class EpcClient(ABC): + """Interface for retrieving EPC (Energy Performance Certificate) data. + + Implementations fetch from a data source and return domain objects; + callers depend only on this interface, not on a concrete transport. + """ + + @abstractmethod + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + """Return the EPC certificates registered at ``postcode``. + + Returns an empty list when the postcode has no certificates. + """ + ... + + @abstractmethod + def get_by_certificate_number( + self, certificate_number: str + ) -> EpcPropertyData: + """Return the full EPC record for a certificate number. + + Raises EpcNotFoundError when no such certificate exists. + """ + ... + + @abstractmethod + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + """Return the most recent EPC record for ``uprn``. + + Returns None when the UPRN has no certificates. + """ + ... diff --git a/infrastructure/epc/exceptions.py b/infrastructure/epc/exceptions.py new file mode 100644 index 00000000..8e2e5165 --- /dev/null +++ b/infrastructure/epc/exceptions.py @@ -0,0 +1,17 @@ +from typing import Optional + + +class EpcApiError(Exception): + """Base for all EPC client errors.""" + + +class EpcNotFoundError(EpcApiError): + """Raised when the API returns 404 for a resource that must exist.""" + + +class EpcRateLimitError(EpcApiError): + """Raised when the API returns 429 and all retries are exhausted.""" + + def __init__(self, message: str, retry_after: Optional[float] = None) -> None: + super().__init__(message) + self.retry_after = retry_after diff --git a/infrastructure/epc/gov_uk/__init__.py b/infrastructure/epc/gov_uk/__init__.py new file mode 100644 index 00000000..d491a1ef --- /dev/null +++ b/infrastructure/epc/gov_uk/__init__.py @@ -0,0 +1,6 @@ +from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient +from infrastructure.epc.gov_uk.gov_uk_property_type import ( + property_type_from_gov_uk_code, +) + +__all__ = ["GovUkEpcClient", "property_type_from_gov_uk_code"] diff --git a/infrastructure/epc/gov_uk/_retry.py b/infrastructure/epc/gov_uk/_retry.py new file mode 100644 index 00000000..db92b131 --- /dev/null +++ b/infrastructure/epc/gov_uk/_retry.py @@ -0,0 +1,34 @@ +import time +from typing import Callable, Optional, TypeVar + +from infrastructure.epc.exceptions import EpcRateLimitError + +T = TypeVar("T") + + +def call_with_retry( + fn: Callable[[], T], + max_retries: int = 5, + backoff_base: float = 1.0, + backoff_multiplier: float = 2.0, + max_backoff: float = 60.0, +) -> T: + """Call ``fn``, retrying on EpcRateLimitError with exponential backoff. + + Honours the API's ``Retry-After`` header when present, otherwise backs off + ``backoff_base * backoff_multiplier ** attempt`` (capped at ``max_backoff``). + """ + last_exc: Optional[EpcRateLimitError] = None + for attempt in range(max_retries + 1): + try: + return fn() + except EpcRateLimitError as exc: + last_exc = exc + if attempt < max_retries: + if exc.retry_after is not None: + delay = exc.retry_after + else: + delay = backoff_base * (backoff_multiplier**attempt) + time.sleep(min(delay, max_backoff)) + assert last_exc is not None + raise last_exc diff --git a/infrastructure/epc/gov_uk/gov_uk_epc_client.py b/infrastructure/epc/gov_uk/gov_uk_epc_client.py new file mode 100644 index 00000000..ac0db09f --- /dev/null +++ b/infrastructure/epc/gov_uk/gov_uk_epc_client.py @@ -0,0 +1,132 @@ +# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml +from __future__ import annotations + +from typing import Any, Optional + +import httpx + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.search import EpcSearchResult +from infrastructure.epc.epc_client import EpcClient +from infrastructure.epc.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) +from infrastructure.epc.gov_uk._retry import call_with_retry + + +class GovUkEpcClient(EpcClient): + """EpcClient backed by the live gov.uk EPC API. + + Endpoint: https://api.get-energy-performance-data.communities.gov.uk + """ + + BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" + REQUEST_TIMEOUT = 10.0 + + def __init__(self, auth_token: str) -> None: + self._headers = { + "Authorization": f"Bearer {auth_token}", + "Accept": "application/json", + } + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + normalised = self._normalise_postcode(postcode) + return call_with_retry(lambda: self._search(postcode=normalised)) + + def get_by_certificate_number( + self, certificate_number: str + ) -> EpcPropertyData: + raw = call_with_retry(lambda: self._fetch_certificate(certificate_number)) + return EpcPropertyDataMapper.from_api_response(raw) + + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + results = call_with_retry(lambda: self._search(uprn=uprn)) + if not results: + return None + latest = max(results, key=lambda r: r.registration_date) + return self.get_by_certificate_number(latest.certificate_number) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + @staticmethod + def _normalise_postcode(postcode: str) -> str: + """Return the postcode with all spaces removed and uppercased.""" + return postcode.replace(" ", "").upper() + + @staticmethod + def _parse_retry_after(resp: httpx.Response) -> Optional[float]: + header = resp.headers.get("Retry-After") + if header is None: + return None + try: + return float(header) + except (TypeError, ValueError): + return None + + def _fetch_certificate(self, certificate_number: str) -> dict[str, Any]: + resp = httpx.get( + f"{self.BASE_URL}/api/certificate", + params={"certificate_number": certificate_number}, + headers=self._headers, + timeout=self.REQUEST_TIMEOUT, + ) + if resp.status_code == 404: + raise EpcNotFoundError(certificate_number) + if resp.status_code == 429: + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + return resp.json()["data"] + + def _search( + self, + postcode: Optional[str] = None, + uprn: Optional[int] = None, + ) -> list[EpcSearchResult]: + params: dict[str, str | int] = {} + if postcode: + params["postcode"] = postcode + if uprn is not None: + params["uprn"] = uprn + + resp = httpx.get( + f"{self.BASE_URL}/api/domestic/search", + params=params, + headers=self._headers, + timeout=self.REQUEST_TIMEOUT, + ) + if resp.status_code == 404: + return [] + if resp.status_code == 429: + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + + rows = resp.json().get("data", []) + return [self._parse_search_result(row) for row in rows] + + @staticmethod + def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult: + return EpcSearchResult( + certificate_number=row["certificateNumber"], + address_line_1=row["addressLine1"], + address_line_2=row.get("addressLine2"), + address_line_3=row.get("addressLine3"), + address_line_4=row.get("addressLine4"), + postcode=row["postcode"], + post_town=row["postTown"], + uprn=row.get("uprn"), + current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], + registration_date=row["registrationDate"], + ) diff --git a/infrastructure/epc/gov_uk/gov_uk_property_type.py b/infrastructure/epc/gov_uk/gov_uk_property_type.py new file mode 100644 index 00000000..a0f4a7a3 --- /dev/null +++ b/infrastructure/epc/gov_uk/gov_uk_property_type.py @@ -0,0 +1,25 @@ +from domain.epc.property_type import PropertyType + +# GOV.UK EPC API ``property_type`` integer codes mapped to the domain type. +# This translation is GOV.UK-specific and lives in the infrastructure layer so +# the domain ``PropertyType`` stays free of any source encoding. +_PROPERTY_TYPE_BY_GOV_UK_CODE: dict[int, PropertyType] = { + 0: PropertyType.HOUSE, + 1: PropertyType.BUNGALOW, + 2: PropertyType.FLAT, + 3: PropertyType.MAISONETTE, + 4: PropertyType.PARK_HOME, +} + + +def property_type_from_gov_uk_code(code: int) -> PropertyType: + """Translate a GOV.UK EPC ``property_type`` code to the domain PropertyType. + + Raises ValueError for a code GOV.UK has not been mapped here yet. + """ + try: + return _PROPERTY_TYPE_BY_GOV_UK_CODE[code] + except KeyError: + raise ValueError( + f"Unknown GOV.UK EPC property type code: {code}" + ) from None diff --git a/infrastructure/epc/historical_open_data_communities/__init__.py b/infrastructure/epc/historical_open_data_communities/__init__.py new file mode 100644 index 00000000..88a69081 --- /dev/null +++ b/infrastructure/epc/historical_open_data_communities/__init__.py @@ -0,0 +1,5 @@ +from infrastructure.epc.historical_open_data_communities.historical_open_data_communities_epc_client import ( + HistoricalOpenDataCommunitiesEpcClient, +) + +__all__ = ["HistoricalOpenDataCommunitiesEpcClient"] diff --git a/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py new file mode 100644 index 00000000..d8c7f9ac --- /dev/null +++ b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Optional + +from domain.epc.epc_record import EpcRecord + + +class HistoricalOpenDataCommunitiesEpcClient: + """EPC client backed by Open Data Communities' historical EPC data. + + Stub — not yet implemented. Every method raises NotImplementedError for + now. Unlike GovUkEpcClient it returns the domain ``EpcRecord`` directly; + once the ``EpcClient`` port is migrated to return ``EpcRecord``, this + adapter should implement it. + """ + + def search_by_postcode(self, postcode: str) -> list[EpcRecord]: + raise NotImplementedError + + def get_by_certificate_number(self, certificate_number: str) -> EpcRecord: + raise NotImplementedError + + def get_by_uprn(self, uprn: int) -> Optional[EpcRecord]: + raise NotImplementedError diff --git a/infrastructure/openai/__init__.py b/infrastructure/openai/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/openai/exceptions.py b/infrastructure/openai/exceptions.py new file mode 100644 index 00000000..14cf95a2 --- /dev/null +++ b/infrastructure/openai/exceptions.py @@ -0,0 +1,2 @@ +class OpenAiClientError(Exception): + """Base for all OpenAI client errors.""" diff --git a/infrastructure/openai/openai_client.py b/infrastructure/openai/openai_client.py new file mode 100644 index 00000000..34af4290 --- /dev/null +++ b/infrastructure/openai/openai_client.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import os +from typing import Optional + +from openai import OpenAI +from openai.types.chat import ChatCompletionMessageParam + +from infrastructure.openai.exceptions import OpenAiClientError + + +class OpenAiChatClient: + """Thin wrapper over the OpenAI Chat Completions API. + + Sends a single prompt and returns the assistant's reply as plain text. + """ + + DEFAULT_MODEL = "gpt-4o-mini" + + def __init__( + self, + api_key: Optional[str] = None, + model: Optional[str] = None, + ) -> None: + key = api_key or os.environ.get("OPENAI_API_KEY") + if not key: + raise OpenAiClientError( + "No OpenAI API key provided. " + "Pass api_key or set the OPENAI_API_KEY environment variable." + ) + self._client = OpenAI(api_key=key) + self._model = model or self.DEFAULT_MODEL + + def generate( + self, + prompt: str, + system_prompt: Optional[str] = None, + ) -> str: + """Send a prompt to the model and return its reply text. + + Args: + prompt: The user message to send. + system_prompt: Optional instruction that sets the model's behaviour. + + Raises: + OpenAiClientError: If the model returns an empty response. + """ + messages: list[ChatCompletionMessageParam] = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": prompt}) + + response = self._client.chat.completions.create( + model=self._model, + messages=messages, + ) + content = response.choices[0].message.content + if content is None: + raise OpenAiClientError("OpenAI returned an empty response.") + return content diff --git a/tests/infrastructure/epc/__init__.py b/tests/infrastructure/epc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/epc/gov_uk/__init__.py b/tests/infrastructure/epc/gov_uk/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/epc/gov_uk/conftest.py b/tests/infrastructure/epc/gov_uk/conftest.py new file mode 100644 index 00000000..8fbd3094 --- /dev/null +++ b/tests/infrastructure/epc/gov_uk/conftest.py @@ -0,0 +1,49 @@ +import json +import pathlib + +import pytest + +from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient + +SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples") + + +@pytest.fixture +def rdsap_21_0_0_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text()) + + +@pytest.fixture +def rdsap_21_0_1_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text()) + + +@pytest.fixture +def epc_client(): + return GovUkEpcClient(auth_token="test-token") + + +def make_search_row( + cert_num="CERT-001", + address_line_1="1 Test Street", + postcode="SW1A 1AA", + post_town="London", + uprn=100023336956, + band="D", + registration_date="2024-01-01", + address_line_2=None, + address_line_3=None, + address_line_4=None, +): + return { + "certificateNumber": cert_num, + "addressLine1": address_line_1, + "addressLine2": address_line_2, + "addressLine3": address_line_3, + "addressLine4": address_line_4, + "postcode": postcode, + "postTown": post_town, + "uprn": uprn, + "currentEnergyEfficiencyBand": band, + "registrationDate": registration_date, + } diff --git a/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py new file mode 100644 index 00000000..46164a0e --- /dev/null +++ b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py @@ -0,0 +1,211 @@ +from unittest.mock import MagicMock, call, patch + +import pytest + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search import EpcSearchResult +from infrastructure.epc.exceptions import EpcNotFoundError +from tests.infrastructure.epc.gov_uk.conftest import make_search_row + +_SLEEP = "infrastructure.epc.gov_uk._retry.time.sleep" + + +def _mock_response(status_code=200, json_data=None, headers=None): + resp = MagicMock() + resp.status_code = status_code + resp.is_success = 200 <= status_code < 300 + resp.json.return_value = json_data or {} + resp.text = str(json_data) + resp.headers = headers or {} + return resp + + +# --------------------------------------------------------------------------- +# Test 1: get_by_certificate_number happy path +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_returns_epc_property_data( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + with patch("httpx.get", return_value=_mock_response(200, cert_response)): + result = epc_client.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 2: get_by_certificate_number 404 -> EpcNotFoundError +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_404_raises_not_found(epc_client): + with patch("httpx.get", return_value=_mock_response(404)): + with pytest.raises(EpcNotFoundError): + epc_client.get_by_certificate_number("BAD-CERT") + + +# --------------------------------------------------------------------------- +# Test 3: 429 retried, succeeds on 3rd attempt +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_retries_on_429_and_succeeds( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP): + result = epc_client.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 3b: 429 with Retry-After header -> sleeps for that value +# --------------------------------------------------------------------------- + + +def test_429_retry_after_header_drives_sleep_duration( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "7"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(7.0) + + +# --------------------------------------------------------------------------- +# Test 3c: 429 without Retry-After -> falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_without_retry_after_uses_exponential_backoff( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + assert mock_sleep.call_args_list == [call(1.0), call(2.0)] + + +# --------------------------------------------------------------------------- +# Test 3d: malformed Retry-After header -> falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_malformed_retry_after_falls_back_to_backoff( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(1.0) + + +# --------------------------------------------------------------------------- +# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values +# --------------------------------------------------------------------------- + + +def test_429_retry_after_capped_by_max_backoff(epc_client, rdsap_21_0_1_cert): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "9999"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(60.0) + + +# --------------------------------------------------------------------------- +# Test 4: get_by_uprn empty search -> None +# --------------------------------------------------------------------------- + + +def test_get_by_uprn_returns_none_when_no_results(epc_client): + with patch("httpx.get", return_value=_mock_response(200, {"data": []})): + result = epc_client.get_by_uprn(100023336956) + + assert result is None + + +# --------------------------------------------------------------------------- +# Test 5: get_by_uprn multiple results -> fetches latest by registration_date +# --------------------------------------------------------------------------- + + +def test_get_by_uprn_picks_most_recent_certificate(epc_client, rdsap_21_0_1_cert): + search_rows = [ + make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"), + make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"), + make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"), + ] + cert_response = {"data": rdsap_21_0_1_cert} + + def fake_get(url, params=None, **kwargs): + if "search" in url: + return _mock_response(200, {"data": search_rows}) + return _mock_response(200, cert_response) + + with patch("httpx.get", side_effect=fake_get) as mock_get: + result = epc_client.get_by_uprn(100023336956) + + assert isinstance(result, EpcPropertyData) + # Second call must be for the most recent cert + cert_call = mock_get.call_args_list[1] + assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW" + + +# --------------------------------------------------------------------------- +# Test 6: search_by_postcode returns list[EpcSearchResult] +# --------------------------------------------------------------------------- + + +def test_search_by_postcode_returns_results(epc_client): + rows = [ + make_search_row(cert_num="CERT-A", address_line_1="1 High Street"), + make_search_row(cert_num="CERT-B", address_line_1="2 High Street"), + ] + with patch("httpx.get", return_value=_mock_response(200, {"data": rows})): + results = epc_client.search_by_postcode("SW1A 1AA") + + assert len(results) == 2 + assert all(isinstance(r, EpcSearchResult) for r in results) + assert results[0].certificate_number == "CERT-A" + assert results[1].address_line_1 == "2 High Street" + + +# --------------------------------------------------------------------------- +# Test 7: search_by_postcode 404 -> empty list +# --------------------------------------------------------------------------- + + +def test_search_by_postcode_404_returns_empty_list(epc_client): + with patch("httpx.get", return_value=_mock_response(404)): + results = epc_client.search_by_postcode("ZZ9 9ZZ") + + assert results == []