From 675aa089c937c51aa6c6b59df52aa19814e9a3de Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 22 May 2026 14:00:33 +0000 Subject: [PATCH] updated rdsap option; seperated s3 location in infrastrucutre; added open ai api --- applications/SAL/handler.py | 37 +-- applications/postcode_splitter/handler.py | 2 +- datatypes/epc/domain/epc_property_data.py | 22 +- datatypes/epc/schema/rdsap_schema_17_0.py | 2 +- datatypes/epc/schema/rdsap_schema_17_1.py | 2 +- datatypes/epc/schema/rdsap_schema_18_0.py | 3 +- datatypes/epc/schema/rdsap_schema_19_0.py | 2 +- datatypes/epc/schema/rdsap_schema_20_0_0.py | 3 +- datatypes/epc/schema/rdsap_schema_21_0_0.py | 4 +- datatypes/epc/schema/rdsap_schema_21_0_1.py | 4 +- domain/epc/__init__.py | 4 + domain/epc/epc_record.py | 21 ++ domain/epc/property_type.py | 9 + infrastructure/epc/__init__.py | 13 ++ infrastructure/epc/epc_client.py | 41 ++++ infrastructure/epc/exceptions.py | 17 ++ infrastructure/epc/gov_uk/__init__.py | 6 + infrastructure/epc/gov_uk/_retry.py | 34 +++ .../epc/gov_uk/gov_uk_epc_client.py | 132 +++++++++++ .../epc/gov_uk/gov_uk_property_type.py | 25 +++ .../__init__.py | 5 + ...orical_open_data_communities_epc_client.py | 24 ++ infrastructure/openai/__init__.py | 0 infrastructure/openai/exceptions.py | 2 + infrastructure/openai/openai_client.py | 60 +++++ infrastructure/s3/__init__.py | 0 infrastructure/{ => s3}/csv_s3_client.py | 4 +- infrastructure/{ => s3}/s3_client.py | 0 infrastructure/{ => s3}/s3_uri.py | 0 ...dardised_address_list_csv_s3_repository.py | 2 +- tests/infrastructure/epc/__init__.py | 0 tests/infrastructure/epc/gov_uk/__init__.py | 0 tests/infrastructure/epc/gov_uk/conftest.py | 49 ++++ .../epc/gov_uk/test_gov_uk_epc_client.py | 211 ++++++++++++++++++ tests/infrastructure/test_csv_s3_client.py | 2 +- tests/infrastructure/test_s3_client.py | 2 +- tests/infrastructure/test_s3_uri.py | 2 +- .../test_postcode_splitter_orchestrator.py | 2 +- ...dardised_address_list_csv_s3_repository.py | 2 +- 39 files changed, 709 insertions(+), 41 deletions(-) create mode 100644 domain/epc/__init__.py create mode 100644 domain/epc/epc_record.py create mode 100644 domain/epc/property_type.py create mode 100644 infrastructure/epc/__init__.py create mode 100644 infrastructure/epc/epc_client.py create mode 100644 infrastructure/epc/exceptions.py create mode 100644 infrastructure/epc/gov_uk/__init__.py create mode 100644 infrastructure/epc/gov_uk/_retry.py create mode 100644 infrastructure/epc/gov_uk/gov_uk_epc_client.py create mode 100644 infrastructure/epc/gov_uk/gov_uk_property_type.py create mode 100644 infrastructure/epc/historical_open_data_communities/__init__.py create mode 100644 infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py create mode 100644 infrastructure/openai/__init__.py create mode 100644 infrastructure/openai/exceptions.py create mode 100644 infrastructure/openai/openai_client.py create mode 100644 infrastructure/s3/__init__.py rename infrastructure/{ => s3}/csv_s3_client.py (95%) rename infrastructure/{ => s3}/s3_client.py (100%) rename infrastructure/{ => s3}/s3_uri.py (100%) create mode 100644 tests/infrastructure/epc/__init__.py create mode 100644 tests/infrastructure/epc/gov_uk/__init__.py create mode 100644 tests/infrastructure/epc/gov_uk/conftest.py create mode 100644 tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py index 6076a662..f354171c 100644 --- a/applications/SAL/handler.py +++ b/applications/SAL/handler.py @@ -3,12 +3,14 @@ import boto3 from orchestration.sal_orchestrator import ( SALOrchestrator, ) -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import ( UnstandardisedAddressListCsvS3Repository, ) from domain.addresses.unstandardised_address import AddressList +from infrastructure.epc.gov_uk import GovUkEpcClient + def handler( body: dict[str, Any], @@ -24,7 +26,9 @@ def handler( boto_s3: Any = boto3_client("s3") csv_client = CsvS3Client(boto_s3, bucket) - unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(csv_client, bucket) + unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository( + csv_client, bucket + ) sal = SALOrchestrator( unstandardised_address_repo=unstandardised_address_repo, @@ -36,20 +40,17 @@ def handler( list_of_unstandardised_address=addressList ) - # Read csv of user input - # get the column and unique variations of each description - # { walls: "wall variation 1", "wall varition 2"} - # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped + """ + ---- + # TODO Property Type: + # 1) Make a small enum with all property types (5 enum) + # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum + # 3) Stroe in landlord overrides + # TODO Wall Type: + # 1) Make a small enum with all property types (5 enum) + # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum + # 3) Stroe in landlord overrides + --- + """ - - ENUM Walls: - cavity_wall_1976: 1 - - # 1) COuld download site notes from pashub and get - # 2) Open Data communites API -> - # 3) new api - - # User story: - # cavity: asbuilt (1976 - 1982): - - return {"hello world": ["hello world"]} + return {"hello": ["200"]} diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py index ac2c4e99..e34a6af3 100644 --- a/applications/postcode_splitter/handler.py +++ b/applications/postcode_splitter/handler.py @@ -9,7 +9,7 @@ from applications.postcode_splitter.postcode_splitter_trigger_body import ( PostcodeSplitterTriggerBody, ) from infrastructure.address2uprn_queue_client import Address2UprnQueueClient -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator from orchestration.task_orchestrator import TaskOrchestrator from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import ( diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index 8795b389..68a25205 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -29,7 +29,9 @@ class MainHeatingDetail: boiler_flue_type: Optional[int] = None # TODO: make enum? boiler_ignition_type: Optional[int] = None # TODO: make enum? central_heating_pump_age: Optional[int] = None - central_heating_pump_age_str: Optional[str] = None # str from site notes e.g. "Unknown", "Pre 2013" + central_heating_pump_age_str: Optional[str] = ( + None # str from site notes e.g. "Unknown", "Pre 2013" + ) main_heating_index_number: Optional[int] = None sap_main_heating_code: Optional[int] = None # TODO: make enum? main_heating_number: Optional[int] = None @@ -54,7 +56,7 @@ class ShowerOutlets: @dataclass class SapHeating: - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] has_fixed_air_conditioning: bool cylinder_size: Optional[Union[int, str]] = ( @@ -67,7 +69,9 @@ class SapHeating: cylinder_insulation_type: Optional[Union[int, str]] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None - secondary_heating_type: Optional[Union[int, str]] = None # int from API; str from site notes + secondary_heating_type: Optional[Union[int, str]] = ( + None # int from API; str from site notes + ) cylinder_insulation_thickness_mm: Optional[int] = None @@ -75,7 +79,9 @@ class SapHeating: class SapVentilation: ventilation_type: Optional[str] = None draught_lobby: Optional[bool] = None - pressure_test: Optional[str] = None # str from site notes e.g. "No test"; int in API via mechanical_ventilation + pressure_test: Optional[str] = ( + None # str from site notes e.g. "No test"; int in API via mechanical_ventilation + ) open_flues_count: Optional[int] = None closed_flues_count: Optional[int] = None boiler_flues_count: Optional[int] = None @@ -219,8 +225,12 @@ class SapBuildingPart: None # TODO: make enum/mapping? ) floor_type: Optional[str] = None # str from site notes e.g. "Ground Floor" - floor_construction_type: Optional[str] = None # str from site notes; distinct from floor_construction: int in SapFloorDimension - floor_insulation_type_str: Optional[str] = None # str from site notes e.g. "As Built" + floor_construction_type: Optional[str] = ( + None # str from site notes; distinct from floor_construction: int in SapFloorDimension + ) + floor_insulation_type_str: Optional[str] = ( + None # str from site notes e.g. "As Built" + ) floor_u_value_known: Optional[bool] = None roof_construction: Optional[int] = None diff --git a/datatypes/epc/schema/rdsap_schema_17_0.py b/datatypes/epc/schema/rdsap_schema_17_0.py index 22aaded4..9cbedf97 100644 --- a/datatypes/epc/schema/rdsap_schema_17_0.py +++ b/datatypes/epc/schema/rdsap_schema_17_0.py @@ -37,7 +37,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] cylinder_insulation_type: int diff --git a/datatypes/epc/schema/rdsap_schema_17_1.py b/datatypes/epc/schema/rdsap_schema_17_1.py index a4c007ed..b0af07e6 100644 --- a/datatypes/epc/schema/rdsap_schema_17_1.py +++ b/datatypes/epc/schema/rdsap_schema_17_1.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] cylinder_insulation_type: int diff --git a/datatypes/epc/schema/rdsap_schema_18_0.py b/datatypes/epc/schema/rdsap_schema_18_0.py index a038dc9b..4ce2f887 100644 --- a/datatypes/epc/schema/rdsap_schema_18_0.py +++ b/datatypes/epc/schema/rdsap_schema_18_0.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -86,6 +86,7 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: """Room-in-roof details. floor_area is a Measurement object in schema 18.0.""" + floor_area: Measurement insulation: str roof_room_connected: str diff --git a/datatypes/epc/schema/rdsap_schema_19_0.py b/datatypes/epc/schema/rdsap_schema_19_0.py index b94d9bb3..b3c77ec4 100644 --- a/datatypes/epc/schema/rdsap_schema_19_0.py +++ b/datatypes/epc/schema/rdsap_schema_19_0.py @@ -41,7 +41,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str diff --git a/datatypes/epc/schema/rdsap_schema_20_0_0.py b/datatypes/epc/schema/rdsap_schema_20_0_0.py index 8f3986a2..9deb235e 100644 --- a/datatypes/epc/schema/rdsap_schema_20_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_20_0_0.py @@ -49,7 +49,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -103,6 +103,7 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: """Room-in-roof details. floor_area is a plain number in schema 20.0.0 (not a Measurement object).""" + floor_area: Union[int, float] insulation: str roof_room_connected: str diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py index eee00cb8..8d19e5f9 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py @@ -33,6 +33,7 @@ class ShowerOutlets: @dataclass class InstantaneousWwhrs: """Changed in 21.0.0: references WWHRS product index numbers instead of room counts.""" + wwhrs_index_number1: Optional[int] = None wwhrs_index_number2: Optional[int] = None @@ -61,7 +62,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str @@ -154,6 +155,7 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: """Room-in-roof details. insulation and roof_room_connected removed in schema 21.0.0.""" + floor_area: Union[int, float] construction_age_band: str diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 9b3dbd1d..f6be7cc3 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -50,7 +50,7 @@ class MainHeatingDetail: main_heating_fraction: int main_heating_data_source: int boiler_flue_type: Optional[int] = None - fan_flue_present: Optional[str] = None # TODO: make bool + fan_flue_present: Optional[str] = None # TODO: make bool boiler_ignition_type: Optional[int] = None central_heating_pump_age: Optional[int] = None main_heating_index_number: Optional[int] = None @@ -62,7 +62,7 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs + instantaneous_wwhrs: Optional[InstantaneousWwhrs] main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str diff --git a/domain/epc/__init__.py b/domain/epc/__init__.py new file mode 100644 index 00000000..e49fea42 --- /dev/null +++ b/domain/epc/__init__.py @@ -0,0 +1,4 @@ +from domain.epc.epc_record import EpcRecord +from domain.epc.property_type import PropertyType + +__all__ = ["EpcRecord", "PropertyType"] diff --git a/domain/epc/epc_record.py b/domain/epc/epc_record.py new file mode 100644 index 00000000..7194d1d6 --- /dev/null +++ b/domain/epc/epc_record.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from domain.epc.property_type import PropertyType + + +@dataclass(frozen=True) +class EpcRecord: + """A streamlined record of EPC property data. + + A focused subset of the full ``EpcPropertyData``: a property's identity + plus its typed property type. Grow this with further fields as the + domain needs them. + """ + + address_line_1: str + postcode: str + uprn: Optional[int] + property_type: PropertyType diff --git a/domain/epc/property_type.py b/domain/epc/property_type.py new file mode 100644 index 00000000..707988aa --- /dev/null +++ b/domain/epc/property_type.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class PropertyType(Enum): + HOUSE = "House" + BUNGALOW = "Bungalow" + FLAT = "Flat" + MAISONETTE = "Maisonette" + PARK_HOME = "Park home" diff --git a/infrastructure/epc/__init__.py b/infrastructure/epc/__init__.py new file mode 100644 index 00000000..f99a7cb3 --- /dev/null +++ b/infrastructure/epc/__init__.py @@ -0,0 +1,13 @@ +from infrastructure.epc.epc_client import EpcClient +from infrastructure.epc.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) + +__all__ = [ + "EpcApiError", + "EpcClient", + "EpcNotFoundError", + "EpcRateLimitError", +] diff --git a/infrastructure/epc/epc_client.py b/infrastructure/epc/epc_client.py new file mode 100644 index 00000000..d1f8639c --- /dev/null +++ b/infrastructure/epc/epc_client.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Optional + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search import EpcSearchResult + + +class EpcClient(ABC): + """Interface for retrieving EPC (Energy Performance Certificate) data. + + Implementations fetch from a data source and return domain objects; + callers depend only on this interface, not on a concrete transport. + """ + + @abstractmethod + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + """Return the EPC certificates registered at ``postcode``. + + Returns an empty list when the postcode has no certificates. + """ + ... + + @abstractmethod + def get_by_certificate_number( + self, certificate_number: str + ) -> EpcPropertyData: + """Return the full EPC record for a certificate number. + + Raises EpcNotFoundError when no such certificate exists. + """ + ... + + @abstractmethod + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + """Return the most recent EPC record for ``uprn``. + + Returns None when the UPRN has no certificates. + """ + ... diff --git a/infrastructure/epc/exceptions.py b/infrastructure/epc/exceptions.py new file mode 100644 index 00000000..8e2e5165 --- /dev/null +++ b/infrastructure/epc/exceptions.py @@ -0,0 +1,17 @@ +from typing import Optional + + +class EpcApiError(Exception): + """Base for all EPC client errors.""" + + +class EpcNotFoundError(EpcApiError): + """Raised when the API returns 404 for a resource that must exist.""" + + +class EpcRateLimitError(EpcApiError): + """Raised when the API returns 429 and all retries are exhausted.""" + + def __init__(self, message: str, retry_after: Optional[float] = None) -> None: + super().__init__(message) + self.retry_after = retry_after diff --git a/infrastructure/epc/gov_uk/__init__.py b/infrastructure/epc/gov_uk/__init__.py new file mode 100644 index 00000000..d491a1ef --- /dev/null +++ b/infrastructure/epc/gov_uk/__init__.py @@ -0,0 +1,6 @@ +from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient +from infrastructure.epc.gov_uk.gov_uk_property_type import ( + property_type_from_gov_uk_code, +) + +__all__ = ["GovUkEpcClient", "property_type_from_gov_uk_code"] diff --git a/infrastructure/epc/gov_uk/_retry.py b/infrastructure/epc/gov_uk/_retry.py new file mode 100644 index 00000000..db92b131 --- /dev/null +++ b/infrastructure/epc/gov_uk/_retry.py @@ -0,0 +1,34 @@ +import time +from typing import Callable, Optional, TypeVar + +from infrastructure.epc.exceptions import EpcRateLimitError + +T = TypeVar("T") + + +def call_with_retry( + fn: Callable[[], T], + max_retries: int = 5, + backoff_base: float = 1.0, + backoff_multiplier: float = 2.0, + max_backoff: float = 60.0, +) -> T: + """Call ``fn``, retrying on EpcRateLimitError with exponential backoff. + + Honours the API's ``Retry-After`` header when present, otherwise backs off + ``backoff_base * backoff_multiplier ** attempt`` (capped at ``max_backoff``). + """ + last_exc: Optional[EpcRateLimitError] = None + for attempt in range(max_retries + 1): + try: + return fn() + except EpcRateLimitError as exc: + last_exc = exc + if attempt < max_retries: + if exc.retry_after is not None: + delay = exc.retry_after + else: + delay = backoff_base * (backoff_multiplier**attempt) + time.sleep(min(delay, max_backoff)) + assert last_exc is not None + raise last_exc diff --git a/infrastructure/epc/gov_uk/gov_uk_epc_client.py b/infrastructure/epc/gov_uk/gov_uk_epc_client.py new file mode 100644 index 00000000..ac0db09f --- /dev/null +++ b/infrastructure/epc/gov_uk/gov_uk_epc_client.py @@ -0,0 +1,132 @@ +# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml +from __future__ import annotations + +from typing import Any, Optional + +import httpx + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.search import EpcSearchResult +from infrastructure.epc.epc_client import EpcClient +from infrastructure.epc.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) +from infrastructure.epc.gov_uk._retry import call_with_retry + + +class GovUkEpcClient(EpcClient): + """EpcClient backed by the live gov.uk EPC API. + + Endpoint: https://api.get-energy-performance-data.communities.gov.uk + """ + + BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" + REQUEST_TIMEOUT = 10.0 + + def __init__(self, auth_token: str) -> None: + self._headers = { + "Authorization": f"Bearer {auth_token}", + "Accept": "application/json", + } + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + normalised = self._normalise_postcode(postcode) + return call_with_retry(lambda: self._search(postcode=normalised)) + + def get_by_certificate_number( + self, certificate_number: str + ) -> EpcPropertyData: + raw = call_with_retry(lambda: self._fetch_certificate(certificate_number)) + return EpcPropertyDataMapper.from_api_response(raw) + + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + results = call_with_retry(lambda: self._search(uprn=uprn)) + if not results: + return None + latest = max(results, key=lambda r: r.registration_date) + return self.get_by_certificate_number(latest.certificate_number) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + @staticmethod + def _normalise_postcode(postcode: str) -> str: + """Return the postcode with all spaces removed and uppercased.""" + return postcode.replace(" ", "").upper() + + @staticmethod + def _parse_retry_after(resp: httpx.Response) -> Optional[float]: + header = resp.headers.get("Retry-After") + if header is None: + return None + try: + return float(header) + except (TypeError, ValueError): + return None + + def _fetch_certificate(self, certificate_number: str) -> dict[str, Any]: + resp = httpx.get( + f"{self.BASE_URL}/api/certificate", + params={"certificate_number": certificate_number}, + headers=self._headers, + timeout=self.REQUEST_TIMEOUT, + ) + if resp.status_code == 404: + raise EpcNotFoundError(certificate_number) + if resp.status_code == 429: + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + return resp.json()["data"] + + def _search( + self, + postcode: Optional[str] = None, + uprn: Optional[int] = None, + ) -> list[EpcSearchResult]: + params: dict[str, str | int] = {} + if postcode: + params["postcode"] = postcode + if uprn is not None: + params["uprn"] = uprn + + resp = httpx.get( + f"{self.BASE_URL}/api/domestic/search", + params=params, + headers=self._headers, + timeout=self.REQUEST_TIMEOUT, + ) + if resp.status_code == 404: + return [] + if resp.status_code == 429: + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + + rows = resp.json().get("data", []) + return [self._parse_search_result(row) for row in rows] + + @staticmethod + def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult: + return EpcSearchResult( + certificate_number=row["certificateNumber"], + address_line_1=row["addressLine1"], + address_line_2=row.get("addressLine2"), + address_line_3=row.get("addressLine3"), + address_line_4=row.get("addressLine4"), + postcode=row["postcode"], + post_town=row["postTown"], + uprn=row.get("uprn"), + current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], + registration_date=row["registrationDate"], + ) diff --git a/infrastructure/epc/gov_uk/gov_uk_property_type.py b/infrastructure/epc/gov_uk/gov_uk_property_type.py new file mode 100644 index 00000000..a0f4a7a3 --- /dev/null +++ b/infrastructure/epc/gov_uk/gov_uk_property_type.py @@ -0,0 +1,25 @@ +from domain.epc.property_type import PropertyType + +# GOV.UK EPC API ``property_type`` integer codes mapped to the domain type. +# This translation is GOV.UK-specific and lives in the infrastructure layer so +# the domain ``PropertyType`` stays free of any source encoding. +_PROPERTY_TYPE_BY_GOV_UK_CODE: dict[int, PropertyType] = { + 0: PropertyType.HOUSE, + 1: PropertyType.BUNGALOW, + 2: PropertyType.FLAT, + 3: PropertyType.MAISONETTE, + 4: PropertyType.PARK_HOME, +} + + +def property_type_from_gov_uk_code(code: int) -> PropertyType: + """Translate a GOV.UK EPC ``property_type`` code to the domain PropertyType. + + Raises ValueError for a code GOV.UK has not been mapped here yet. + """ + try: + return _PROPERTY_TYPE_BY_GOV_UK_CODE[code] + except KeyError: + raise ValueError( + f"Unknown GOV.UK EPC property type code: {code}" + ) from None diff --git a/infrastructure/epc/historical_open_data_communities/__init__.py b/infrastructure/epc/historical_open_data_communities/__init__.py new file mode 100644 index 00000000..88a69081 --- /dev/null +++ b/infrastructure/epc/historical_open_data_communities/__init__.py @@ -0,0 +1,5 @@ +from infrastructure.epc.historical_open_data_communities.historical_open_data_communities_epc_client import ( + HistoricalOpenDataCommunitiesEpcClient, +) + +__all__ = ["HistoricalOpenDataCommunitiesEpcClient"] diff --git a/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py new file mode 100644 index 00000000..d8c7f9ac --- /dev/null +++ b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Optional + +from domain.epc.epc_record import EpcRecord + + +class HistoricalOpenDataCommunitiesEpcClient: + """EPC client backed by Open Data Communities' historical EPC data. + + Stub — not yet implemented. Every method raises NotImplementedError for + now. Unlike GovUkEpcClient it returns the domain ``EpcRecord`` directly; + once the ``EpcClient`` port is migrated to return ``EpcRecord``, this + adapter should implement it. + """ + + def search_by_postcode(self, postcode: str) -> list[EpcRecord]: + raise NotImplementedError + + def get_by_certificate_number(self, certificate_number: str) -> EpcRecord: + raise NotImplementedError + + def get_by_uprn(self, uprn: int) -> Optional[EpcRecord]: + raise NotImplementedError diff --git a/infrastructure/openai/__init__.py b/infrastructure/openai/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/openai/exceptions.py b/infrastructure/openai/exceptions.py new file mode 100644 index 00000000..14cf95a2 --- /dev/null +++ b/infrastructure/openai/exceptions.py @@ -0,0 +1,2 @@ +class OpenAiClientError(Exception): + """Base for all OpenAI client errors.""" diff --git a/infrastructure/openai/openai_client.py b/infrastructure/openai/openai_client.py new file mode 100644 index 00000000..34af4290 --- /dev/null +++ b/infrastructure/openai/openai_client.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import os +from typing import Optional + +from openai import OpenAI +from openai.types.chat import ChatCompletionMessageParam + +from infrastructure.openai.exceptions import OpenAiClientError + + +class OpenAiChatClient: + """Thin wrapper over the OpenAI Chat Completions API. + + Sends a single prompt and returns the assistant's reply as plain text. + """ + + DEFAULT_MODEL = "gpt-4o-mini" + + def __init__( + self, + api_key: Optional[str] = None, + model: Optional[str] = None, + ) -> None: + key = api_key or os.environ.get("OPENAI_API_KEY") + if not key: + raise OpenAiClientError( + "No OpenAI API key provided. " + "Pass api_key or set the OPENAI_API_KEY environment variable." + ) + self._client = OpenAI(api_key=key) + self._model = model or self.DEFAULT_MODEL + + def generate( + self, + prompt: str, + system_prompt: Optional[str] = None, + ) -> str: + """Send a prompt to the model and return its reply text. + + Args: + prompt: The user message to send. + system_prompt: Optional instruction that sets the model's behaviour. + + Raises: + OpenAiClientError: If the model returns an empty response. + """ + messages: list[ChatCompletionMessageParam] = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": prompt}) + + response = self._client.chat.completions.create( + model=self._model, + messages=messages, + ) + content = response.choices[0].message.content + if content is None: + raise OpenAiClientError("OpenAI returned an empty response.") + return content diff --git a/infrastructure/s3/__init__.py b/infrastructure/s3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/csv_s3_client.py b/infrastructure/s3/csv_s3_client.py similarity index 95% rename from infrastructure/csv_s3_client.py rename to infrastructure/s3/csv_s3_client.py index d058ba53..67c9a8d4 100644 --- a/infrastructure/csv_s3_client.py +++ b/infrastructure/s3/csv_s3_client.py @@ -1,8 +1,8 @@ import csv from io import StringIO -from infrastructure.s3_client import S3Client -from infrastructure.s3_uri import parse_s3_uri +from infrastructure.s3.s3_client import S3Client +from infrastructure.s3.s3_uri import parse_s3_uri def _dedupe_fieldnames(fieldnames: list[str]) -> list[str]: diff --git a/infrastructure/s3_client.py b/infrastructure/s3/s3_client.py similarity index 100% rename from infrastructure/s3_client.py rename to infrastructure/s3/s3_client.py diff --git a/infrastructure/s3_uri.py b/infrastructure/s3/s3_uri.py similarity index 100% rename from infrastructure/s3_uri.py rename to infrastructure/s3/s3_uri.py diff --git a/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py index 260fce1d..20bae20c 100644 --- a/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py +++ b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py @@ -6,7 +6,7 @@ from typing import Optional from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress from domain.postcode import Postcode -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from repositories.unstandardised_address.unstandardised_address_list_repository import ( UnstandardisedAddressListRepository, ) diff --git a/tests/infrastructure/epc/__init__.py b/tests/infrastructure/epc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/epc/gov_uk/__init__.py b/tests/infrastructure/epc/gov_uk/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/epc/gov_uk/conftest.py b/tests/infrastructure/epc/gov_uk/conftest.py new file mode 100644 index 00000000..8fbd3094 --- /dev/null +++ b/tests/infrastructure/epc/gov_uk/conftest.py @@ -0,0 +1,49 @@ +import json +import pathlib + +import pytest + +from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient + +SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples") + + +@pytest.fixture +def rdsap_21_0_0_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text()) + + +@pytest.fixture +def rdsap_21_0_1_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text()) + + +@pytest.fixture +def epc_client(): + return GovUkEpcClient(auth_token="test-token") + + +def make_search_row( + cert_num="CERT-001", + address_line_1="1 Test Street", + postcode="SW1A 1AA", + post_town="London", + uprn=100023336956, + band="D", + registration_date="2024-01-01", + address_line_2=None, + address_line_3=None, + address_line_4=None, +): + return { + "certificateNumber": cert_num, + "addressLine1": address_line_1, + "addressLine2": address_line_2, + "addressLine3": address_line_3, + "addressLine4": address_line_4, + "postcode": postcode, + "postTown": post_town, + "uprn": uprn, + "currentEnergyEfficiencyBand": band, + "registrationDate": registration_date, + } diff --git a/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py new file mode 100644 index 00000000..46164a0e --- /dev/null +++ b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py @@ -0,0 +1,211 @@ +from unittest.mock import MagicMock, call, patch + +import pytest + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search import EpcSearchResult +from infrastructure.epc.exceptions import EpcNotFoundError +from tests.infrastructure.epc.gov_uk.conftest import make_search_row + +_SLEEP = "infrastructure.epc.gov_uk._retry.time.sleep" + + +def _mock_response(status_code=200, json_data=None, headers=None): + resp = MagicMock() + resp.status_code = status_code + resp.is_success = 200 <= status_code < 300 + resp.json.return_value = json_data or {} + resp.text = str(json_data) + resp.headers = headers or {} + return resp + + +# --------------------------------------------------------------------------- +# Test 1: get_by_certificate_number happy path +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_returns_epc_property_data( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + with patch("httpx.get", return_value=_mock_response(200, cert_response)): + result = epc_client.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 2: get_by_certificate_number 404 -> EpcNotFoundError +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_404_raises_not_found(epc_client): + with patch("httpx.get", return_value=_mock_response(404)): + with pytest.raises(EpcNotFoundError): + epc_client.get_by_certificate_number("BAD-CERT") + + +# --------------------------------------------------------------------------- +# Test 3: 429 retried, succeeds on 3rd attempt +# --------------------------------------------------------------------------- + + +def test_get_by_certificate_number_retries_on_429_and_succeeds( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP): + result = epc_client.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 3b: 429 with Retry-After header -> sleeps for that value +# --------------------------------------------------------------------------- + + +def test_429_retry_after_header_drives_sleep_duration( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "7"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(7.0) + + +# --------------------------------------------------------------------------- +# Test 3c: 429 without Retry-After -> falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_without_retry_after_uses_exponential_backoff( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + assert mock_sleep.call_args_list == [call(1.0), call(2.0)] + + +# --------------------------------------------------------------------------- +# Test 3d: malformed Retry-After header -> falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_malformed_retry_after_falls_back_to_backoff( + epc_client, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(1.0) + + +# --------------------------------------------------------------------------- +# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values +# --------------------------------------------------------------------------- + + +def test_429_retry_after_capped_by_max_backoff(epc_client, rdsap_21_0_1_cert): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "9999"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep: + epc_client.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(60.0) + + +# --------------------------------------------------------------------------- +# Test 4: get_by_uprn empty search -> None +# --------------------------------------------------------------------------- + + +def test_get_by_uprn_returns_none_when_no_results(epc_client): + with patch("httpx.get", return_value=_mock_response(200, {"data": []})): + result = epc_client.get_by_uprn(100023336956) + + assert result is None + + +# --------------------------------------------------------------------------- +# Test 5: get_by_uprn multiple results -> fetches latest by registration_date +# --------------------------------------------------------------------------- + + +def test_get_by_uprn_picks_most_recent_certificate(epc_client, rdsap_21_0_1_cert): + search_rows = [ + make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"), + make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"), + make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"), + ] + cert_response = {"data": rdsap_21_0_1_cert} + + def fake_get(url, params=None, **kwargs): + if "search" in url: + return _mock_response(200, {"data": search_rows}) + return _mock_response(200, cert_response) + + with patch("httpx.get", side_effect=fake_get) as mock_get: + result = epc_client.get_by_uprn(100023336956) + + assert isinstance(result, EpcPropertyData) + # Second call must be for the most recent cert + cert_call = mock_get.call_args_list[1] + assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW" + + +# --------------------------------------------------------------------------- +# Test 6: search_by_postcode returns list[EpcSearchResult] +# --------------------------------------------------------------------------- + + +def test_search_by_postcode_returns_results(epc_client): + rows = [ + make_search_row(cert_num="CERT-A", address_line_1="1 High Street"), + make_search_row(cert_num="CERT-B", address_line_1="2 High Street"), + ] + with patch("httpx.get", return_value=_mock_response(200, {"data": rows})): + results = epc_client.search_by_postcode("SW1A 1AA") + + assert len(results) == 2 + assert all(isinstance(r, EpcSearchResult) for r in results) + assert results[0].certificate_number == "CERT-A" + assert results[1].address_line_1 == "2 High Street" + + +# --------------------------------------------------------------------------- +# Test 7: search_by_postcode 404 -> empty list +# --------------------------------------------------------------------------- + + +def test_search_by_postcode_404_returns_empty_list(epc_client): + with patch("httpx.get", return_value=_mock_response(404)): + results = epc_client.search_by_postcode("ZZ9 9ZZ") + + assert results == [] diff --git a/tests/infrastructure/test_csv_s3_client.py b/tests/infrastructure/test_csv_s3_client.py index e7ec7eab..048a1cbe 100644 --- a/tests/infrastructure/test_csv_s3_client.py +++ b/tests/infrastructure/test_csv_s3_client.py @@ -3,7 +3,7 @@ from collections.abc import Iterator import pytest from moto import mock_aws -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from tests.infrastructure import make_boto_client BUCKET = "csv-bucket" diff --git a/tests/infrastructure/test_s3_client.py b/tests/infrastructure/test_s3_client.py index 67db4f58..bdac6be1 100644 --- a/tests/infrastructure/test_s3_client.py +++ b/tests/infrastructure/test_s3_client.py @@ -3,7 +3,7 @@ from collections.abc import Iterator import pytest from moto import mock_aws -from infrastructure.s3_client import S3Client +from infrastructure.s3.s3_client import S3Client from tests.infrastructure import make_boto_client BUCKET = "test-bucket" diff --git a/tests/infrastructure/test_s3_uri.py b/tests/infrastructure/test_s3_uri.py index 32fd710f..f0865865 100644 --- a/tests/infrastructure/test_s3_uri.py +++ b/tests/infrastructure/test_s3_uri.py @@ -1,6 +1,6 @@ import pytest -from infrastructure.s3_uri import parse_s3_uri +from infrastructure.s3.s3_uri import parse_s3_uri def test_parses_simple_s3_uri() -> None: diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py index d21bcfba..9ad56094 100644 --- a/tests/orchestration/test_postcode_splitter_orchestrator.py +++ b/tests/orchestration/test_postcode_splitter_orchestrator.py @@ -13,7 +13,7 @@ from sqlalchemy import Engine from sqlmodel import Session from infrastructure.address2uprn_queue_client import Address2UprnQueueClient -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator from orchestration.task_orchestrator import TaskOrchestrator from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository diff --git a/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py index 866d6f2d..f86878c3 100644 --- a/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py +++ b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py @@ -5,7 +5,7 @@ from moto import mock_aws from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress from domain.postcode import Postcode -from infrastructure.csv_s3_client import CsvS3Client +from infrastructure.s3.csv_s3_client import CsvS3Client from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import ( UnstandardisedAddressListCsvS3Repository, )