From 6d4942c0136f58721920c4ab9f5756cc72d71428 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 15:03:07 +0000 Subject: [PATCH 01/44] adding to dev container to create shared network on start up --- .devcontainer/backend/devcontainer.json | 1 + Makefile | 10 +++++++++- README.md | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index a9b7352a..ee37224f 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -4,6 +4,7 @@ "service": "model-backend", "remoteUser": "vscode", "workspaceFolder": "/workspaces/model", + "initializeCommand": "docker network create shared-dev 2>/dev/null || true", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ "source=${localEnv:HOME},target=/workspaces/home,type=bind", diff --git a/Makefile b/Makefile index 00942acd..255e2abf 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PYTHON = python -.PHONY: setup test lint typecheck check clean +.PHONY: setup test lint typecheck check clean network-setup dev-setup # Install dev dependencies + tox setup: @@ -28,3 +28,11 @@ check: lint typecheck test # Clean up tox environments clean: rm -rf .tox + +# Create shared Docker network required by dev container (idempotent) +network-setup: + docker network create shared-dev 2>/dev/null || true + +# First-time dev environment setup +dev-setup: network-setup + @echo "Dev environment ready. Open the repo in VS Code and select 'Reopen in Container'." diff --git a/README.md b/README.md index b470e12c..0f88328a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,27 @@ The different folders in this repository relate to services that can be used independently, or can be imported and used as part of a larger application +# Getting Started + +## Prerequisites + +- [Docker Desktop](https://www.docker.com/products/docker-desktop/) +- [VS Code](https://code.visualstudio.com/) with the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + +## Dev Container Setup + +This repo uses a Docker Compose-based dev container. The `model-backend` service joins a `shared-dev` Docker network so it can communicate with other local services (e.g. a frontend container) running on your machine. + +**VS Code users:** The `initializeCommand` in `devcontainer.json` creates the `shared-dev` network automatically before the container starts. No manual step required — just open the repo and select **Reopen in Container**. + +**Non-VS Code / CI workflows:** Run the following once before starting the container: + +```commandline +make dev-setup +``` + +This is idempotent and safe to re-run if the network already exists. + # Folders ### backend/ From f8d785411bd9862e56dff2baea906ed78a6263af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 18:54:41 +0000 Subject: [PATCH 02/44] setting up new claude dev instructions --- .devcontainer/backend/Dockerfile | 10 ++++-- .../backend/install-claude-skills.sh | 15 +++++++++ CLAUDE.md | 31 +++++++++++++++++++ UBIQUITOUS_LANGUAGE.md | 9 ++++++ 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100755 .devcontainer/backend/install-claude-skills.sh create mode 100644 UBIQUITOUS_LANGUAGE.md diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index a92d37f6..983670e4 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -86,10 +86,16 @@ USER ${USER} # Bootstrap LazyVim starter config RUN git clone https://github.com/LazyVim/starter /home/${USER}/.config/nvim \ && rm -rf /home/${USER}/.config/nvim/.git -# Install Claude +# Install Claude + plugins RUN curl -fsSL https://claude.ai/install.sh | bash \ && export PATH="/home/${USER}/.local/bin:${PATH}" \ && claude plugin marketplace add JuliusBrussee/caveman \ - && claude plugin install caveman@caveman + && claude plugin install caveman@caveman \ + && claude plugin marketplace add mattpocock/skills \ + && claude plugin install skills@grill-me \ + && claude plugin install skills@to-prd \ + && claude plugin install skills@ubiquitous-language \ + && claude plugin install skills@tdd \ + && claude plugin install skills@improve-codebase-architecture ENV PATH="/home/vscode/.local/bin:${PATH}" USER root diff --git a/.devcontainer/backend/install-claude-skills.sh b/.devcontainer/backend/install-claude-skills.sh new file mode 100755 index 00000000..71727e4d --- /dev/null +++ b/.devcontainer/backend/install-claude-skills.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Run this in an existing container to install the mattpocock skills +# without rebuilding the image. New containers get them automatically via Dockerfile. +set -euo pipefail + +echo "Installing Claude Code skills (mattpocock/skills)..." + +claude plugin marketplace add mattpocock/skills +claude plugin install skills@grill-me +claude plugin install skills@to-prd +claude plugin install skills@ubiquitous-language +claude plugin install skills@tdd +claude plugin install skills@improve-codebase-architecture + +echo "Done. Available: /grill-me /to-prd /ubiquitous-language /tdd /improve-codebase-architecture" diff --git a/CLAUDE.md b/CLAUDE.md index de2917f2..263679ff 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -28,3 +28,34 @@ You MUST read the overview resource to understand the complete workflow. The inf +## Available Skills + +Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle. + +| Skill | Invoke | When to use | +|-------|--------|-------------| +| **grill-me** | `/grill-me` | Before implementing — stress-tests a design through sequential questioning | +| **to-prd** | `/to-prd` | After a planning conversation — formalises context into a GitHub issue PRD | +| **ubiquitous-language** | `/ubiquitous-language` | When domain terms are drifting or ambiguous — builds/updates `UBIQUITOUS_LANGUAGE.md` | +| **tdd** | `/tdd` | During implementation — enforces vertical-slice TDD (one test → one impl → repeat) | +| **improve-codebase-architecture** | `/improve-codebase-architecture` | During refactoring — surfaces shallow modules and proposes deepening opportunities | + +### Typical session chains + +**Feature planning:** +`/grill-me` → `/to-prd` → `/ubiquitous-language` + +**Implementation:** +`/tdd` (+ `/grill-me` if a design fork appears mid-session) + +**Refactoring:** +`/improve-codebase-architecture` → `/grill-me` → `/tdd` → `/ubiquitous-language` + +### First time setting up? + +New containers install all skills automatically via the Dockerfile. If you're in an existing container, run: + +```bash +bash .devcontainer/backend/install-claude-skills.sh +``` + diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md new file mode 100644 index 00000000..3f2c3fe3 --- /dev/null +++ b/UBIQUITOUS_LANGUAGE.md @@ -0,0 +1,9 @@ +# Ubiquitous Language + +Domain terminology glossary for this project. Generated and maintained by the `/ubiquitous-language` Claude Code skill. + +Invoke `/ubiquitous-language` in any session to extract new terms from the conversation, flag ambiguities, and update this file with canonical definitions. + +--- + + From 9ce1928b1e7cabc375263677319336745f61c094 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 20:28:43 +0000 Subject: [PATCH 03/44] added new skills to repo and reduced size of dev container (Sorry Jun-te) --- .devcontainer/backend/Dockerfile | 19 +++++++++---------- .devcontainer/backend/devcontainer.json | 4 ---- .../backend/install-claude-skills.sh | 11 +++++------ 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index 983670e4..ebe405a0 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -10,7 +10,7 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ sudo jq vim curl git ca-certificates wget \ build-essential pkg-config automake autoconf libtool \ - ripgrep fd-find make unzip \ + ripgrep fd-find make unzip bash-completion \ && rm -rf /var/lib/apt/lists/* # Neovim latest (LazyVim needs >=0.9) @@ -65,8 +65,8 @@ RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \ https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \ tee /etc/apt/sources.list.d/hashicorp.list RUN apt update -RUN apt-get install terraform -RUN terraform -install-autocomplete +RUN apt-get install -y terraform +RUN terraform -install-autocomplete || true # Install postgres RUN apt install -y wget gnupg2 lsb-release @@ -86,16 +86,15 @@ USER ${USER} # Bootstrap LazyVim starter config RUN git clone https://github.com/LazyVim/starter /home/${USER}/.config/nvim \ && rm -rf /home/${USER}/.config/nvim/.git -# Install Claude + plugins +# Install Claude + plugins + skills RUN curl -fsSL https://claude.ai/install.sh | bash \ && export PATH="/home/${USER}/.local/bin:${PATH}" \ && claude plugin marketplace add JuliusBrussee/caveman \ && claude plugin install caveman@caveman \ - && claude plugin marketplace add mattpocock/skills \ - && claude plugin install skills@grill-me \ - && claude plugin install skills@to-prd \ - && claude plugin install skills@ubiquitous-language \ - && claude plugin install skills@tdd \ - && claude plugin install skills@improve-codebase-architecture + && npx skills@latest add --global --yes mattpocock/skills/grill-me \ + && npx skills@latest add --global --yes mattpocock/skills/to-prd \ + && npx skills@latest add --global --yes mattpocock/skills/ubiquitous-language \ + && npx skills@latest add --global --yes mattpocock/skills/tdd \ + && npx skills@latest add --global --yes mattpocock/skills/improve-codebase-architecture ENV PATH="/home/vscode/.local/bin:${PATH}" USER root diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index ee37224f..54e45095 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -17,7 +17,6 @@ "ms-toolsai.jupyter", "mechatroner.rainbow-csv", "ms-toolsai.datawrangler", - "lindacong.vscode-book-reader", "4ops.terraform", "fabiospampinato.vscode-todo-plus", "jgclark.vscode-todo-highlight", @@ -26,9 +25,6 @@ "ms-python.black-formatter", "waderyan.gitblame", "GrapeCity.gc-excelviewer", - "jakobhoeg.vscode-pokemon", - "github.vscode-github-actions", - "me-dutour-mathieu.vscode-github-actions", "anthropic.claude-code", "eamodio.gitlens" ], diff --git a/.devcontainer/backend/install-claude-skills.sh b/.devcontainer/backend/install-claude-skills.sh index 71727e4d..a54f69e0 100755 --- a/.devcontainer/backend/install-claude-skills.sh +++ b/.devcontainer/backend/install-claude-skills.sh @@ -5,11 +5,10 @@ set -euo pipefail echo "Installing Claude Code skills (mattpocock/skills)..." -claude plugin marketplace add mattpocock/skills -claude plugin install skills@grill-me -claude plugin install skills@to-prd -claude plugin install skills@ubiquitous-language -claude plugin install skills@tdd -claude plugin install skills@improve-codebase-architecture +npx skills@latest add --global --yes mattpocock/skills/grill-me +npx skills@latest add --global --yes mattpocock/skills/to-prd +npx skills@latest add --global --yes mattpocock/skills/ubiquitous-language +npx skills@latest add --global --yes mattpocock/skills/tdd +npx skills@latest add --global --yes mattpocock/skills/improve-codebase-architecture echo "Done. Available: /grill-me /to-prd /ubiquitous-language /tdd /improve-codebase-architecture" From 3ed25030d44edf2f01e37637bd4f02110285c55a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 22:17:38 +0000 Subject: [PATCH 04/44] added new api call for new epc api --- backend/address2UPRN/main.py | 128 ++------------------------ backend/utils/addressMatch.py | 46 +++++++++ datatypes/epc/domain/mapper.py | 22 +++++ datatypes/epc/schema/tests/helpers.py | 78 +--------------- pytest.ini | 2 +- 5 files changed, 80 insertions(+), 196 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 28ad344f..bd562bc7 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -1,8 +1,6 @@ from typing import Optional -from epc_api.client import EpcClient import os -from urllib.parse import urlencode import pandas as pd from utils.logger import setup_logger import json @@ -16,7 +14,7 @@ from utils.s3 import ( ) from datetime import datetime -from backend.utils.addressMatch import AddressMatch +from backend.utils.addressMatch import AddressMatch, get_uprn_candidates, df_has_single_uprn, score_addresses logger = setup_logger() @@ -29,122 +27,14 @@ if EPC_AUTH_TOKEN is None: raise RuntimeError("EPC_AUTH_TOKEN not defined in env") -def score_addresses( - df: pd.DataFrame, - user_address: str, - column: str = "address", -) -> pd.Series: - if column not in df.columns: - raise ValueError(f"Missing column: {column}") - - return df[column].apply(lambda x: AddressMatch.score(user_address, x)) - - -def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3): - """ - Recursively fetch EPC data by postcode. - If results hit the size limit, retry with double size up to max_attempts. - """ - client = EpcClient(auth_token=EPC_AUTH_TOKEN) - - url = os.path.join(client.domestic.host, "search") - - if size: - url += "?" + urlencode({"size": size}) - - search_resp = client.domestic.call( - url=url, - method="get", - params={"postcode": postcode}, - ) - if not search_resp or "rows" not in search_resp: - return pd.DataFrame() - - results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"]) - - row_count = len(results_df) - - # If we hit the size limit, there *may* be more results - if row_count == size: - print( - f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. " - f"Attempt {attempt}/{max_attempts}." - ) - - if attempt < max_attempts: - print(f"🔁 Retrying with size={size * 2}") - return get_epc_data_with_postcode( - postcode=postcode, - size=size * 2, - attempt=attempt + 1, - max_attempts=max_attempts, - ) - else: - print( - "🚨 Max attempts reached. Results may be truncated. " - "(Please do a manual review by the tech team.)" - ) - - return results_df - - -def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: - """ - Returns True if all non-null UPRNs in df match the given uprn. - Returns False otherwise. - """ - - if column not in df.columns: - return False - - # Drop nulls and normalise to string - uprns = df[column].dropna().astype(str).str.strip().unique() - - # No valid UPRNs to compare - if len(uprns) == 0: - return False - - # Exactly one unique UPRN and it matches - return len(uprns) == 1 and uprns[0] == str(uprn) - - -def get_uprn_candidates( - df: pd.DataFrame, - user_address: str, - address_column: str = "address", - uprn_column: str = "uprn", -) -> pd.DataFrame: - """ - Annotate EPC results with lexicographical similarity scores and ranks. - - Returns a DataFrame sorted by descending lexiscore. - DOES NOT choose or return a UPRN. - """ - - if address_column not in df.columns: - raise ValueError(f"Missing column: {address_column}") - - if uprn_column not in df.columns: - raise ValueError(f"Missing column: {uprn_column}") - - out = df.copy() - - user_norm = AddressMatch.normalise_address(user_address) - - out["lexiscore"] = out[address_column].apply( - lambda x: AddressMatch.levenshtein(user_norm, x) - ) - - # Normalise UPRN to string - out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) - - # Rank: 1 = best match - out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) - - return out.sort_values( - ["lexirank", "lexiscore"], - ascending=[True, False], - ) +def get_epc_data_with_postcode(postcode: str) -> pd.DataFrame: + from backend.epc_client.client import EpcClientService + service = EpcClientService(auth_token=EPC_AUTH_TOKEN) + results = service.search_by_postcode(postcode) + return pd.DataFrame([ + {"address": r.address_line_1, "uprn": r.uprn} + for r in results + ]) def get_uprn_with_epc_df( diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 411bb07c..12c1ac53 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -2,6 +2,7 @@ import re from typing import Any, Optional from difflib import SequenceMatcher import requests +import pandas as pd class AddressMatch: @@ -199,3 +200,48 @@ class AddressMatch: 0.65 * token_score + 0.35 * char_score, 4, ) + + +def score_addresses( + df: pd.DataFrame, + user_address: str, + column: str = "address", +) -> pd.Series: + if column not in df.columns: + raise ValueError(f"Missing column: {column}") + return df[column].apply(lambda x: AddressMatch.score(user_address, x)) + + +def get_uprn_candidates( + df: pd.DataFrame, + user_address: str, + address_column: str = "address", + uprn_column: str = "uprn", +) -> pd.DataFrame: + """ + Annotate EPC results with lexicographical similarity scores and ranks. + Returns a DataFrame sorted by descending lexiscore. + """ + if address_column not in df.columns: + raise ValueError(f"Missing column: {address_column}") + if uprn_column not in df.columns: + raise ValueError(f"Missing column: {uprn_column}") + + out = df.copy() + user_norm = AddressMatch.normalise_address(user_address) + out["lexiscore"] = out[address_column].apply( + lambda x: AddressMatch.levenshtein(user_norm, x) + ) + out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) + out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) + return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) + + +def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: + """Returns True if all non-null UPRNs in df match the given uprn.""" + if column not in df.columns: + return False + uprns = df[column].dropna().astype(str).str.strip().unique() + if len(uprns) == 0: + return False + return len(uprns) == 1 and uprns[0] == str(uprn) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 1afade5c..7ef74340 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1447,6 +1447,28 @@ class EpcPropertyDataMapper: ) -> List[EnergyElement]: return [EpcPropertyDataMapper._map_energy_element(e) for e in elements] + @staticmethod + def from_api_response(data: dict) -> "EpcPropertyData": + """ + Dispatch to the correct schema mapper based on schema_type. + Supports RdSAP-Schema-21.0.0 and RdSAP-Schema-21.0.1 only. + Raises ValueError for unsupported schemas — add cases here as needed. + """ + from datatypes.epc.schema.helpers import from_dict + + schema = data.get("schema_type", "") + if schema == "RdSAP-Schema-21.0.1": + from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1 + return EpcPropertyDataMapper.from_rdsap_schema_21_0_1( + from_dict(RdSapSchema21_0_1, data) + ) + if schema == "RdSAP-Schema-21.0.0": + from datatypes.epc.schema.rdsap_schema_21_0_0 import RdSapSchema21_0_0 + return EpcPropertyDataMapper.from_rdsap_schema_21_0_0( + from_dict(RdSapSchema21_0_0, data) + ) + raise ValueError(f"Unsupported EPC schema: {schema!r}") + # --------------------------------------------------------------------------- # Private helpers diff --git a/datatypes/epc/schema/tests/helpers.py b/datatypes/epc/schema/tests/helpers.py index 22f132d2..06338c0a 100644 --- a/datatypes/epc/schema/tests/helpers.py +++ b/datatypes/epc/schema/tests/helpers.py @@ -1,77 +1,3 @@ -import dataclasses -import typing -from datetime import date -from typing import Any, Dict, Type, TypeVar +from datatypes.epc.schema.helpers import from_dict -T = TypeVar("T") - - -def from_dict(cls: Type[T], data: Dict[str, Any]) -> T: - """ - Recursively convert a plain dict (e.g. from json.loads) into the given - dataclass type, using the field type hints to convert nested structures. - - Handles: - - Nested dataclasses - - List[SomeDataclass] - - Optional[X] / Union[X, None] - - Union[DataclassType, primitive] (e.g. Union[Measurement, int]) - - Primitive pass-through for Union[str, int] etc. - """ - return _from_dict_impl(cls, data) # type: ignore[return-value] - - -def _from_dict_impl(cls: Any, data: Any) -> Any: - hints = typing.get_type_hints(cls) - kwargs: Dict[str, Any] = {} - - for field in dataclasses.fields(cls): # type: ignore[arg-type] - has_default = ( - field.default is not dataclasses.MISSING - or field.default_factory is not dataclasses.MISSING # type: ignore[misc] - ) - if field.name not in data: - if has_default: - continue - raise ValueError(f"{cls.__name__}: missing required field '{field.name}'") - - kwargs[field.name] = _coerce(data[field.name], hints[field.name]) - - return cls(**kwargs) - - -def _coerce(value: Any, hint: Any) -> Any: - if value is None: - return None - - origin = typing.get_origin(hint) - args = typing.get_args(hint) - - # Union (includes Optional[X] which is Union[X, None]) - if origin is typing.Union: - if value is None: - return None - non_none_args = [a for a in args if a is not type(None)] - if len(non_none_args) == 1: - # Optional[X] — recurse so List[X] and nested dataclasses are handled - return _coerce(value, non_none_args[0]) - # Multi-type Union (e.g. Union[Measurement, int]): try dataclasses first - for arg in non_none_args: - if dataclasses.is_dataclass(arg) and isinstance(value, dict): - return _from_dict_impl(arg, value) - # All remaining args are primitives — return value as-is - return value - - # List[X] - if origin is list: - item_hint = args[0] - return [_coerce(item, item_hint) for item in value] - - # Plain dataclass - if dataclasses.is_dataclass(hint) and isinstance(value, dict): - return _from_dict_impl(hint, value) - - if hint is date and isinstance(value, str): - return date.fromisoformat(value) - - return value +__all__ = ["from_dict"] diff --git a/pytest.ini b/pytest.ini index 33231c61..1ddc8747 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,6 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/documents_parser/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/documents_parser/tests backend/epc_client/tests markers = integration: mark a test as an integration test From fa0c77af782e661a8254d5882e8cb27708faf617 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 22:24:26 +0000 Subject: [PATCH 05/44] updated ubiqutous language --- UBIQUITOUS_LANGUAGE.md | 71 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md index 3f2c3fe3..1765cbc8 100644 --- a/UBIQUITOUS_LANGUAGE.md +++ b/UBIQUITOUS_LANGUAGE.md @@ -6,4 +6,73 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve --- - +## Energy Performance Certificates + +| Term | Definition | Aliases to avoid | +|------|------------|------------------| +| **EPC** | An Energy Performance Certificate — a government-issued document rating a dwelling's energy efficiency from A (best) to G (worst). | "energy certificate", "energy report" | +| **Certificate Number** | The unique identifier assigned to an EPC by the government registry. | "cert number", "EPC ID" | +| **Registration Date** | The date an EPC was lodged with the government register; used to identify the most recent certificate for a property. | "assessment date", "submission date" | +| **EPC Band** | A single letter A–G representing a property's current or potential energy efficiency rating. | "energy rating", "EPC grade", "EPC score" | +| **Schema Type** | The versioned RdSAP or SAP schema that describes the structure of a certificate's raw data (e.g. `RdSAP-Schema-21.0.1`). | "schema version", "EPC format" | +| **Domestic Certificate** | An EPC issued for a residential dwelling, as opposed to a commercial one. | "residential EPC", "home EPC" | + +## Properties and Addresses + +| Term | Definition | Aliases to avoid | +|------|------------|------------------| +| **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" | +| **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" | +| **User Address** | A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching. | "user input", "raw address", "user_inputed_address" | +| **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" | + +## Address Matching + +| Term | Definition | Aliases to avoid | +|------|------------|------------------| +| **Lexiscore** | A similarity score in [0, 1] between a user address and a candidate EPC address; combines token overlap and character-level similarity. | "score", "match score", "similarity" | +| **Lexirank** | Dense rank of candidates sorted by lexiscore descending; rank 1 = best match. | "rank", "position" | +| **UPRN Candidate** | An EPC search result that is a plausible match for a given user address, before scoring decides the winner. | "match candidate", "result" | +| **Score Threshold** | The minimum lexiscore (currently 0.6) below which no match is returned even if a candidate exists. | "minimum score", "cutoff" | +| **Ambiguous Match** | A matching outcome where two or more candidates share lexirank 1, making it impossible to select a unique winner. | "tie", "draw", "duplicate" | +| **Best Match** | The single UPRN candidate with lexirank 1 that meets or exceeds the score threshold. | "winner", "top result" | + +## API and Integration + +| Term | Definition | Aliases to avoid | +|------|------------|------------------| +| **EPC Search Result** | A lightweight record returned by the government domestic search endpoint — contains address lines, postcode, UPRN, band, and certificate number but not the full certificate data. | "search row", "EPC row", "result" | +| **EPC Property Data** | The fully mapped domain object produced after fetching and parsing a complete EPC certificate. | "EPC data", "certificate data", "parsed EPC" | +| **Old EPC API** | The retired government API (`epc.opendatacommunities.org`) using HTTP Basic auth; decommissioned May 2026. | "legacy API" | +| **New EPC API** | The replacement government API (`api.get-energy-performance-data.communities.gov.uk`) using Bearer token auth. | "new API", "current API" | +| **Bearer Token** | The auth credential required by the new EPC API; stored in the `EPC_AUTH_TOKEN` environment variable. | "API key", "auth token", "secret" | + +## Relationships + +- An **EPC** belongs to exactly one **Dwelling** and has one **Certificate Number**. +- A **Dwelling** may have multiple **EPCs** across time; the one with the most recent **Registration Date** is the current one. +- A **UPRN** identifies a **Dwelling** permanently; it does not change when the property changes owner. +- An **EPC Search Result** is a summary; it points to a full **EPC** via its **Certificate Number**. +- **Address Matching** uses a **User Address** and **Postcode** to find a **UPRN** by scoring **UPRN Candidates** from an EPC search. +- A **Lexirank** of 1 with no **Ambiguous Match** and a **Lexiscore** ≥ the **Score Threshold** produces a **Best Match**. + +## Example dialogue + +> **Dev:** "We have a user address and postcode. How do we find the UPRN?" + +> **Domain expert:** "Search the **New EPC API** by **Postcode** — you get back a list of **EPC Search Results** for that area. Each one has an address and a **UPRN**. Score each against the **User Address** using the **Lexiscore**. If the top **UPRN Candidate** scores above the **Score Threshold** and there's no **Ambiguous Match**, that's your **Best Match**." + +> **Dev:** "What if two results share the same address line 1?" + +> **Domain expert:** "That's an **Ambiguous Match** — two candidates at **Lexirank** 1. Fall back to scoring on the full address using all address lines joined together. If that still ties, return nothing." + +> **Dev:** "Once we have the best match, do we use the UPRN or fetch the full EPC?" + +> **Domain expert:** "Depends on what you need. The **EPC Search Result** gives you the **EPC Band** and **Certificate Number**. If you need energy efficiency detail, use the **Certificate Number** to fetch the full **EPC Property Data**." + +## Flagged ambiguities + +- **"address"** appears as both the raw **User Address** (free-text from customer data) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1". +- **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments. +- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`. +- **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter. From d338be867b0938580f4c4c90ab9e0b52245dec97 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 22:41:57 +0000 Subject: [PATCH 06/44] added missing files --- backend/epc_client/__init__.py | 3 + backend/epc_client/_retry.py | 23 ++ backend/epc_client/client.py | 175 ++++++++++++++ backend/epc_client/exceptions.py | 10 + backend/epc_client/requirements.txt | 1 + backend/epc_client/tests/__init__.py | 0 backend/epc_client/tests/conftest.py | 48 ++++ backend/epc_client/tests/test_client.py | 224 ++++++++++++++++++ .../tests/test_mapper_dispatcher.py | 31 +++ datatypes/epc/schema/helpers.py | 77 ++++++ 10 files changed, 592 insertions(+) create mode 100644 backend/epc_client/__init__.py create mode 100644 backend/epc_client/_retry.py create mode 100644 backend/epc_client/client.py create mode 100644 backend/epc_client/exceptions.py create mode 100644 backend/epc_client/requirements.txt create mode 100644 backend/epc_client/tests/__init__.py create mode 100644 backend/epc_client/tests/conftest.py create mode 100644 backend/epc_client/tests/test_client.py create mode 100644 backend/epc_client/tests/test_mapper_dispatcher.py create mode 100644 datatypes/epc/schema/helpers.py diff --git a/backend/epc_client/__init__.py b/backend/epc_client/__init__.py new file mode 100644 index 00000000..720594f7 --- /dev/null +++ b/backend/epc_client/__init__.py @@ -0,0 +1,3 @@ +from backend.epc_client.client import EpcClientService, EpcSearchResult + +__all__ = ["EpcClientService", "EpcSearchResult"] diff --git a/backend/epc_client/_retry.py b/backend/epc_client/_retry.py new file mode 100644 index 00000000..e290e95b --- /dev/null +++ b/backend/epc_client/_retry.py @@ -0,0 +1,23 @@ +import time +from typing import Callable, TypeVar + +from backend.epc_client.exceptions import EpcRateLimitError + +T = TypeVar("T") + + +def call_with_retry( + fn: Callable[[], T], + max_retries: int = 5, + backoff_base: float = 1.0, + backoff_multiplier: float = 2.0, +) -> T: + last_exc: EpcRateLimitError | None = None + for attempt in range(max_retries + 1): + try: + return fn() + except EpcRateLimitError as exc: + last_exc = exc + if attempt < max_retries: + time.sleep(backoff_base * (backoff_multiplier ** attempt)) + raise last_exc # type: ignore[misc] diff --git a/backend/epc_client/client.py b/backend/epc_client/client.py new file mode 100644 index 00000000..33f25ef5 --- /dev/null +++ b/backend/epc_client/client.py @@ -0,0 +1,175 @@ +# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Optional + +import httpx +import pandas as pd + +from backend.epc_client.exceptions import EpcApiError, EpcNotFoundError, EpcRateLimitError +from backend.epc_client._retry import call_with_retry +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + + +@dataclass +class EpcSearchResult: + certificate_number: str + address_line_1: str + address_line_2: Optional[str] + address_line_3: Optional[str] + address_line_4: Optional[str] + postcode: str + post_town: str + uprn: Optional[int] + current_energy_efficiency_band: str + registration_date: str + + def full_address(self) -> str: + parts = [ + self.address_line_1, + self.address_line_2, + self.address_line_3, + self.address_line_4, + ] + return ", ".join(p for p in parts if p) + + +class EpcClientService: + BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" + _MIN_MATCH_SCORE = 0.6 + + def __init__(self, auth_token: str) -> None: + self._headers = { + "Authorization": f"Bearer {auth_token}", + "Accept": "application/json", + } + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: + raw = call_with_retry(lambda: self._fetch_certificate(cert_num)) + return EpcPropertyDataMapper.from_api_response(raw) + + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + results = call_with_retry(lambda: self._search(uprn=uprn)) + if not results: + return None + latest = max(results, key=lambda r: r.registration_date) + return self.get_by_certificate_number(latest.certificate_number) + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + return call_with_retry(lambda: self._search(postcode=postcode)) + + def find_best_match(self, postcode: str, address: str) -> Optional[EpcPropertyData]: + from backend.utils.addressMatch import get_uprn_candidates + + candidates = self.search_by_postcode(postcode) + if not candidates: + return None + + # Round 1: score on addressLine1 only + cert_num = self._pick_best_cert(candidates, address, use_full_address=False, fn=get_uprn_candidates) + if cert_num: + return self._safe_get(cert_num) + + # Round 2: score on all address lines joined + cert_num = self._pick_best_cert(candidates, address, use_full_address=True, fn=get_uprn_candidates) + if cert_num: + return self._safe_get(cert_num) + + return None + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _fetch_certificate(self, cert_num: str) -> dict: + resp = httpx.get( + f"{self.BASE_URL}/api/certificate", + params={"certificate_number": cert_num}, + headers=self._headers, + ) + if resp.status_code == 404: + raise EpcNotFoundError(cert_num) + if resp.status_code == 429: + raise EpcRateLimitError("Rate limited by EPC API") + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + return resp.json()["data"] + + def _search( + self, + postcode: Optional[str] = None, + uprn: Optional[int] = None, + ) -> list[EpcSearchResult]: + params: dict[str, str | int] = {} + if postcode: + params["postcode"] = postcode + if uprn is not None: + params["uprn"] = uprn + + resp = httpx.get( + f"{self.BASE_URL}/api/domestic/search", + params=params, + headers=self._headers, + ) + if resp.status_code == 404: + return [] + if resp.status_code == 429: + raise EpcRateLimitError("Rate limited by EPC API") + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + + rows = resp.json().get("data", []) + return [self._parse_search_result(r) for r in rows] + + @staticmethod + def _parse_search_result(row: dict) -> EpcSearchResult: + return EpcSearchResult( + certificate_number=row["certificateNumber"], + address_line_1=row["addressLine1"], + address_line_2=row.get("addressLine2"), + address_line_3=row.get("addressLine3"), + address_line_4=row.get("addressLine4"), + postcode=row["postcode"], + post_town=row["postTown"], + uprn=row.get("uprn"), + current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], + registration_date=row["registrationDate"], + ) + + def _pick_best_cert( + self, + candidates: list[EpcSearchResult], + user_address: str, + use_full_address: bool, + fn: Callable[..., pd.DataFrame], + ) -> Optional[str]: + df = pd.DataFrame([ + { + "address": r.full_address() if use_full_address else r.address_line_1, + "uprn": str(r.uprn) if r.uprn is not None else "", + "certificate_number": r.certificate_number, + } + for r in candidates + ]) + + scored = fn(df, user_address=user_address) + if scored.empty: + return None + + best_score = scored.iloc[0]["lexiscore"] + if best_score < self._MIN_MATCH_SCORE: + return None + + top = scored[scored["lexirank"] == 1] + if len(top) != 1: + return None + + return str(top.iloc[0]["certificate_number"]) + + def _safe_get(self, cert_num: str) -> Optional[EpcPropertyData]: + try: + return self.get_by_certificate_number(cert_num) + except EpcNotFoundError: + return None diff --git a/backend/epc_client/exceptions.py b/backend/epc_client/exceptions.py new file mode 100644 index 00000000..49f1542a --- /dev/null +++ b/backend/epc_client/exceptions.py @@ -0,0 +1,10 @@ +class EpcApiError(Exception): + """Base for all EPC client errors.""" + + +class EpcNotFoundError(EpcApiError): + """Raised when the API returns 404.""" + + +class EpcRateLimitError(EpcApiError): + """Raised when the API returns 429 and all retries are exhausted.""" diff --git a/backend/epc_client/requirements.txt b/backend/epc_client/requirements.txt new file mode 100644 index 00000000..aa69c38b --- /dev/null +++ b/backend/epc_client/requirements.txt @@ -0,0 +1 @@ +httpx>=0.27.0 diff --git a/backend/epc_client/tests/__init__.py b/backend/epc_client/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/epc_client/tests/conftest.py b/backend/epc_client/tests/conftest.py new file mode 100644 index 00000000..2ed444af --- /dev/null +++ b/backend/epc_client/tests/conftest.py @@ -0,0 +1,48 @@ +import json +import pathlib +import pytest + +from backend.epc_client.client import EpcClientService + +SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples") + + +@pytest.fixture +def rdsap_21_0_0_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text()) + + +@pytest.fixture +def rdsap_21_0_1_cert(): + return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text()) + + +@pytest.fixture +def epc_service(): + return EpcClientService(auth_token="test-token") + + +def make_search_row( + cert_num="CERT-001", + address_line_1="1 Test Street", + postcode="SW1A 1AA", + post_town="London", + uprn=100023336956, + band="D", + registration_date="2024-01-01", + address_line_2=None, + address_line_3=None, + address_line_4=None, +): + return { + "certificateNumber": cert_num, + "addressLine1": address_line_1, + "addressLine2": address_line_2, + "addressLine3": address_line_3, + "addressLine4": address_line_4, + "postcode": postcode, + "postTown": post_town, + "uprn": uprn, + "currentEnergyEfficiencyBand": band, + "registrationDate": registration_date, + } diff --git a/backend/epc_client/tests/test_client.py b/backend/epc_client/tests/test_client.py new file mode 100644 index 00000000..51dd2a12 --- /dev/null +++ b/backend/epc_client/tests/test_client.py @@ -0,0 +1,224 @@ +from unittest.mock import MagicMock, patch, call +import pytest + +from backend.epc_client.client import EpcClientService, EpcSearchResult +from backend.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from backend.epc_client.tests.conftest import make_search_row + + +def _mock_response(status_code=200, json_data=None): + resp = MagicMock() + resp.status_code = status_code + resp.is_success = 200 <= status_code < 300 + resp.json.return_value = json_data or {} + resp.text = str(json_data) + return resp + + +# --------------------------------------------------------------------------- +# Test 1: get_by_certificate_number happy path +# --------------------------------------------------------------------------- + +def test_get_by_certificate_number_returns_epc_property_data(epc_service, rdsap_21_0_1_cert): + cert_response = {"data": rdsap_21_0_1_cert} + with patch("httpx.get", return_value=_mock_response(200, cert_response)): + result = epc_service.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 2: get_by_certificate_number 404 → EpcNotFoundError +# --------------------------------------------------------------------------- + +def test_get_by_certificate_number_404_raises_not_found(epc_service): + with patch("httpx.get", return_value=_mock_response(404)): + with pytest.raises(EpcNotFoundError): + epc_service.get_by_certificate_number("BAD-CERT") + + +# --------------------------------------------------------------------------- +# Test 3: 429 retried, succeeds on 3rd attempt +# --------------------------------------------------------------------------- + +def test_get_by_certificate_number_retries_on_429_and_succeeds(epc_service, rdsap_21_0_1_cert): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch("time.sleep"): + result = epc_service.get_by_certificate_number("CERT-001") + + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 4: get_by_uprn empty search → None +# --------------------------------------------------------------------------- + +def test_get_by_uprn_returns_none_when_no_results(epc_service): + with patch("httpx.get", return_value=_mock_response(200, {"data": []})): + result = epc_service.get_by_uprn(100023336956) + + assert result is None + + +# --------------------------------------------------------------------------- +# Test 5: get_by_uprn multiple results → fetches latest by registration_date +# --------------------------------------------------------------------------- + +def test_get_by_uprn_picks_most_recent_certificate(epc_service, rdsap_21_0_1_cert): + search_rows = [ + make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"), + make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"), + make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"), + ] + cert_response = {"data": rdsap_21_0_1_cert} + + def fake_get(url, params=None, **kwargs): + if "search" in url: + return _mock_response(200, {"data": search_rows}) + return _mock_response(200, cert_response) + + with patch("httpx.get", side_effect=fake_get) as mock_get: + result = epc_service.get_by_uprn(100023336956) + + assert isinstance(result, EpcPropertyData) + # Second call must be for the most recent cert + cert_call = mock_get.call_args_list[1] + assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW" + + +# --------------------------------------------------------------------------- +# Test 6: search_by_postcode returns list[EpcSearchResult] +# --------------------------------------------------------------------------- + +def test_search_by_postcode_returns_results(epc_service): + rows = [ + make_search_row(cert_num="CERT-A", address_line_1="1 High Street"), + make_search_row(cert_num="CERT-B", address_line_1="2 High Street"), + ] + with patch("httpx.get", return_value=_mock_response(200, {"data": rows})): + results = epc_service.search_by_postcode("SW1A 1AA") + + assert len(results) == 2 + assert all(isinstance(r, EpcSearchResult) for r in results) + assert results[0].certificate_number == "CERT-A" + assert results[1].address_line_1 == "2 High Street" + + +# --------------------------------------------------------------------------- +# Test 7: search_by_postcode 404 → empty list +# --------------------------------------------------------------------------- + +def test_search_by_postcode_404_returns_empty_list(epc_service): + with patch("httpx.get", return_value=_mock_response(404)): + results = epc_service.search_by_postcode("ZZ9 9ZZ") + + assert results == [] + + +# --------------------------------------------------------------------------- +# Tests 8-10: find_best_match +# --------------------------------------------------------------------------- + +def _make_scored_df(rows, scores, ranks): + import pandas as pd + df = pd.DataFrame(rows) + df["lexiscore"] = scores + df["lexirank"] = ranks + return df.sort_values("lexirank") + + +def test_find_best_match_round1_clear_winner(epc_service, rdsap_21_0_1_cert): + search_rows = [ + make_search_row(cert_num="CERT-WIN", address_line_1="1 High Street"), + make_search_row(cert_num="CERT-LOSE", address_line_1="99 Nowhere Lane"), + ] + cert_response = {"data": rdsap_21_0_1_cert} + + df_rows = [ + {"address": "1 High Street", "uprn": "100023336956", "certificate_number": "CERT-WIN"}, + {"address": "99 Nowhere Lane", "uprn": "100023336956", "certificate_number": "CERT-LOSE"}, + ] + scored = _make_scored_df(df_rows, [0.9, 0.1], [1, 2]) + + def fake_get(url, params=None, **kwargs): + if "search" in url: + return _mock_response(200, {"data": search_rows}) + return _mock_response(200, cert_response) + + with patch("httpx.get", side_effect=fake_get), \ + patch("backend.utils.addressMatch.get_uprn_candidates", return_value=scored): + result = epc_service.find_best_match("SW1A 1AA", "1 High Street") + + assert isinstance(result, EpcPropertyData) + + +def test_find_best_match_round1_ambiguous_round2_resolves(epc_service, rdsap_21_0_1_cert): + search_rows = [ + make_search_row( + cert_num="CERT-A", address_line_1="1 High Street", + address_line_2="Ground Floor", + ), + make_search_row( + cert_num="CERT-B", address_line_1="1 High Street", + address_line_2="First Floor", + ), + ] + cert_response = {"data": rdsap_21_0_1_cert} + + # Round 1: both score equally — ambiguous (two rank-1s) + ambiguous = _make_scored_df( + [ + {"address": "1 High Street", "uprn": "111", "certificate_number": "CERT-A"}, + {"address": "1 High Street", "uprn": "222", "certificate_number": "CERT-B"}, + ], + [0.9, 0.9], + [1, 1], + ) + # Round 2: CERT-A wins on full address + resolved = _make_scored_df( + [ + {"address": "1 High Street, Ground Floor", "uprn": "111", "certificate_number": "CERT-A"}, + {"address": "1 High Street, First Floor", "uprn": "222", "certificate_number": "CERT-B"}, + ], + [0.85, 0.4], + [1, 2], + ) + + call_count = {"n": 0} + + def fake_candidates(df, user_address, **kwargs): + call_count["n"] += 1 + return ambiguous if call_count["n"] == 1 else resolved + + def fake_get(url, params=None, **kwargs): + if "search" in url: + return _mock_response(200, {"data": search_rows}) + return _mock_response(200, cert_response) + + with patch("httpx.get", side_effect=fake_get), \ + patch("backend.utils.addressMatch.get_uprn_candidates", side_effect=fake_candidates): + result = epc_service.find_best_match("SW1A 1AA", "1 High Street Ground Floor") + + assert isinstance(result, EpcPropertyData) + + +def test_find_best_match_returns_none_when_no_good_match(epc_service): + search_rows = [make_search_row(cert_num="CERT-X", address_line_1="99 Nowhere Lane")] + + low_score = _make_scored_df( + [{"address": "99 Nowhere Lane", "uprn": "111", "certificate_number": "CERT-X"}], + [0.1], + [1], + ) + + with patch("httpx.get", return_value=_mock_response(200, {"data": search_rows})), \ + patch("backend.utils.addressMatch.get_uprn_candidates", return_value=low_score): + result = epc_service.find_best_match("SW1A 1AA", "1 Completely Different Road") + + assert result is None diff --git a/backend/epc_client/tests/test_mapper_dispatcher.py b/backend/epc_client/tests/test_mapper_dispatcher.py new file mode 100644 index 00000000..efb9c4ec --- /dev/null +++ b/backend/epc_client/tests/test_mapper_dispatcher.py @@ -0,0 +1,31 @@ +import pytest + +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.domain.epc_property_data import EpcPropertyData + + +# --------------------------------------------------------------------------- +# Test 1: from_api_response with RdSAP-Schema-21.0.0 fixture → EpcPropertyData +# --------------------------------------------------------------------------- + +def test_from_api_response_rdsap_21_0_0(rdsap_21_0_0_cert): + result = EpcPropertyDataMapper.from_api_response(rdsap_21_0_0_cert) + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 2: from_api_response with RdSAP-Schema-21.0.1 fixture → EpcPropertyData +# --------------------------------------------------------------------------- + +def test_from_api_response_rdsap_21_0_1(rdsap_21_0_1_cert): + result = EpcPropertyDataMapper.from_api_response(rdsap_21_0_1_cert) + assert isinstance(result, EpcPropertyData) + + +# --------------------------------------------------------------------------- +# Test 3: unknown schema_type → ValueError +# --------------------------------------------------------------------------- + +def test_from_api_response_unknown_schema_raises(): + with pytest.raises(ValueError, match="Unsupported EPC schema"): + EpcPropertyDataMapper.from_api_response({"schema_type": "RdSAP-Schema-99.0.0"}) diff --git a/datatypes/epc/schema/helpers.py b/datatypes/epc/schema/helpers.py new file mode 100644 index 00000000..22f132d2 --- /dev/null +++ b/datatypes/epc/schema/helpers.py @@ -0,0 +1,77 @@ +import dataclasses +import typing +from datetime import date +from typing import Any, Dict, Type, TypeVar + +T = TypeVar("T") + + +def from_dict(cls: Type[T], data: Dict[str, Any]) -> T: + """ + Recursively convert a plain dict (e.g. from json.loads) into the given + dataclass type, using the field type hints to convert nested structures. + + Handles: + - Nested dataclasses + - List[SomeDataclass] + - Optional[X] / Union[X, None] + - Union[DataclassType, primitive] (e.g. Union[Measurement, int]) + - Primitive pass-through for Union[str, int] etc. + """ + return _from_dict_impl(cls, data) # type: ignore[return-value] + + +def _from_dict_impl(cls: Any, data: Any) -> Any: + hints = typing.get_type_hints(cls) + kwargs: Dict[str, Any] = {} + + for field in dataclasses.fields(cls): # type: ignore[arg-type] + has_default = ( + field.default is not dataclasses.MISSING + or field.default_factory is not dataclasses.MISSING # type: ignore[misc] + ) + if field.name not in data: + if has_default: + continue + raise ValueError(f"{cls.__name__}: missing required field '{field.name}'") + + kwargs[field.name] = _coerce(data[field.name], hints[field.name]) + + return cls(**kwargs) + + +def _coerce(value: Any, hint: Any) -> Any: + if value is None: + return None + + origin = typing.get_origin(hint) + args = typing.get_args(hint) + + # Union (includes Optional[X] which is Union[X, None]) + if origin is typing.Union: + if value is None: + return None + non_none_args = [a for a in args if a is not type(None)] + if len(non_none_args) == 1: + # Optional[X] — recurse so List[X] and nested dataclasses are handled + return _coerce(value, non_none_args[0]) + # Multi-type Union (e.g. Union[Measurement, int]): try dataclasses first + for arg in non_none_args: + if dataclasses.is_dataclass(arg) and isinstance(value, dict): + return _from_dict_impl(arg, value) + # All remaining args are primitives — return value as-is + return value + + # List[X] + if origin is list: + item_hint = args[0] + return [_coerce(item, item_hint) for item in value] + + # Plain dataclass + if dataclasses.is_dataclass(hint) and isinstance(value, dict): + return _from_dict_impl(hint, value) + + if hint is date and isinstance(value, str): + return date.fromisoformat(value) + + return value From 0d3189beee875487296100a2d3c72ec7c446ee70 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 22:55:35 +0000 Subject: [PATCH 07/44] added httpx dependency --- backend/engine/requirements.txt | 4 +++- backend/epc_client/requirements.txt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/engine/requirements.txt b/backend/engine/requirements.txt index 5cca1211..41d07a1a 100644 --- a/backend/engine/requirements.txt +++ b/backend/engine/requirements.txt @@ -23,4 +23,6 @@ pyarrow==17.0.0 fastparquet==2024.5.0 aiohttp==3.10.10 # find my epc -beautifulsoup4 \ No newline at end of file +beautifulsoup4 +# HTTP client (epc_client module) +httpx==0.28.1 \ No newline at end of file diff --git a/backend/epc_client/requirements.txt b/backend/epc_client/requirements.txt index aa69c38b..cee32373 100644 --- a/backend/epc_client/requirements.txt +++ b/backend/epc_client/requirements.txt @@ -1 +1 @@ -httpx>=0.27.0 +httpx==0.28.1 From 077f14764b36bf51cf6a8954d10a31dc2412d58f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 25 Apr 2026 23:03:11 +0000 Subject: [PATCH 08/44] updated test for 101 columns with new fields on property_details_epc --- backend/export/tests/test_export.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/export/tests/test_export.py b/backend/export/tests/test_export.py index b00d1744..f13ef374 100644 --- a/backend/export/tests/test_export.py +++ b/backend/export/tests/test_export.py @@ -284,8 +284,8 @@ def test_default_export_integration(db_session): assert df.shape == ( 10, - 100, - ), "Expected dataframe shape to be (10, 100), got {}".format(df.shape) + 101, + ), "Expected dataframe shape to be (10, 101), got {}".format(df.shape) def test_solar_with_battery_example(db_session): From 09558629731f6e8f2281dd593324f257bd0b7586 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 27 Apr 2026 11:32:44 +0000 Subject: [PATCH 09/44] working on integrating new EPC api into address2UPRN --- .github/workflows/deploy_fastapi_backend.yml | 5 ++++ .github/workflows/unit_tests.yml | 1 + .gitignore | 3 ++ backend/.env.example | 3 +- backend/address2UPRN/main.py | 29 +++++++++++--------- backend/address2UPRN/tests/test_csv.py | 1 - backend/app/config.py | 1 + conftest.py | 10 ++++--- 8 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.github/workflows/deploy_fastapi_backend.yml b/.github/workflows/deploy_fastapi_backend.yml index 5ad4d6ac..cb861d31 100644 --- a/.github/workflows/deploy_fastapi_backend.yml +++ b/.github/workflows/deploy_fastapi_backend.yml @@ -51,6 +51,10 @@ jobs: id: set_auth_token run: echo "::set-output name=auth_token::${{ secrets[format('{0}_EPC_AUTH_TOKEN', github.ref_name)] }}" + - name: Set Open EPC API token + id: set_open_epc_token + run: echo "::set-output name=open_epc_token::${{ secrets[format('{0}_OPEN_EPC_API_TOKEN', github.ref_name)] }}" + # Store port, name and host in github secrets - name: Set DB credentials id: set_db_credentials @@ -127,6 +131,7 @@ jobs: GOOGLE_SOLAR_API_KEY: ${{ steps.set_api_secrets.outputs.google_solar_api_key }} DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }} EPC_AUTH_TOKEN: ${{ steps.set_auth_token.outputs.auth_token }} + OPEN_EPC_API_TOKEN: ${{ steps.set_open_epc_token.outputs.open_epc_token }} DB_HOST: ${{ steps.set_db_credentials.outputs.db_host }} DB_PORT: ${{ steps.set_db_credentials.outputs.db_port }} DB_NAME: ${{ steps.set_db_credentials.outputs.db_name }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 436428f9..e1f4fb48 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -49,6 +49,7 @@ jobs: docker run --rm \ --network host \ -e EPC_AUTH_TOKEN=${{ secrets.DEV_EPC_AUTH_TOKEN }} \ + -e OPEN_EPC_API_TOKEN=${{ secrets.DEV_OPEN_EPC_API_TOKEN }} \ -e HUBSPOT_API_KEY=${{ secrets.HUBSPOT_API_KEY }} \ -e DB_HOST=localhost \ -e DB_NAME=test \ diff --git a/.gitignore b/.gitignore index d6d23313..888d527a 100644 --- a/.gitignore +++ b/.gitignore @@ -292,3 +292,6 @@ pyrightconfig.json # playwright output */pashub_fetcher/videos/* backlog/* + +# Local Claude config files +.claude/* \ No newline at end of file diff --git a/backend/.env.example b/backend/.env.example index 352192d0..04611719 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,4 +1,5 @@ API_KEY = example-api-key ENVIRONMENT = local SECRET_KEY = YOUR_SECRET_KEY -ALGORITHM = HS256 \ No newline at end of file +ALGORITHM = HS256 +OPEN_EPC_API_TOKEN = your_token_here \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index bd562bc7..98f8c65b 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -14,27 +14,30 @@ from utils.s3 import ( ) from datetime import datetime -from backend.utils.addressMatch import AddressMatch, get_uprn_candidates, df_has_single_uprn, score_addresses +from backend.utils.addressMatch import ( + AddressMatch, + get_uprn_candidates, + df_has_single_uprn, + score_addresses, +) logger = setup_logger() -EPC_AUTH_TOKEN = os.getenv( - "EPC_AUTH_TOKEN", -) +OPEN_EPC_API_TOKEN = os.getenv("OPEN_EPC_API_TOKEN") -if EPC_AUTH_TOKEN is None: - raise RuntimeError("EPC_AUTH_TOKEN not defined in env") +if OPEN_EPC_API_TOKEN is None: + raise RuntimeError("OPEN_EPC_API_TOKEN not defined in env") def get_epc_data_with_postcode(postcode: str) -> pd.DataFrame: from backend.epc_client.client import EpcClientService - service = EpcClientService(auth_token=EPC_AUTH_TOKEN) + + service = EpcClientService(auth_token=OPEN_EPC_API_TOKEN) results = service.search_by_postcode(postcode) - return pd.DataFrame([ - {"address": r.address_line_1, "uprn": r.uprn} - for r in results - ]) + return pd.DataFrame( + [{"address": r.address_line_1, "uprn": r.uprn} for r in results] + ) def get_uprn_with_epc_df( @@ -58,8 +61,8 @@ def get_uprn_with_epc_df( best_score = scored_df.iloc[0]["lexiscore"] # # Return None if score is below threshold - # if best_score < 0.7: - # return None + if best_score < 0.7: + return None # All rank-1 rows (possible draw) top_rank_df = scored_df[scored_df["lexirank"] == 1] diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py index 70e7a9f9..a8f0b1b4 100644 --- a/backend/address2UPRN/tests/test_csv.py +++ b/backend/address2UPRN/tests/test_csv.py @@ -31,7 +31,6 @@ def test_uprn_resolution_matches_manual( postcode: str, expected_uprn: str, ): - from utils.logger import setup_logger uprn = get_uprn(user_input, postcode) if uprn: diff --git a/backend/app/config.py b/backend/app/config.py index 70a6b50c..44826d24 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -44,6 +44,7 @@ class Settings(BaseSettings): # Third parties EPC_AUTH_TOKEN: str = "changeme" + OPEN_EPC_API_TOKEN: str = "changeme" GOOGLE_SOLAR_API_KEY: str = "changeme" # Database settings diff --git a/conftest.py b/conftest.py index 2ea20ebb..0689853b 100644 --- a/conftest.py +++ b/conftest.py @@ -1,11 +1,9 @@ import os +from pathlib import Path from backend.app.config import get_settings -import os from dotenv import load_dotenv -import os -# Load .env in conftest.py directory for local development -load_dotenv() +load_dotenv(Path(__file__).resolve().parent / "backend" / ".env") DEFAULT_ENV = { "API_KEY": "test", @@ -18,6 +16,10 @@ DEFAULT_ENV = { "EPC_AUTH_TOKEN", "test", ), # overridden in GitHub Actions + "OPEN_EPC_API_TOKEN": os.getenv( + "OPEN_EPC_API_TOKEN", + "test", + ), # overridden in GitHub Actions "GOOGLE_SOLAR_API_KEY": "test", "DB_HOST": "localhost", "DB_USERNAME": "test", From 1af6bc674831f28dc62cd865f048f09b74a6dc90 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 27 Apr 2026 12:15:30 +0000 Subject: [PATCH 10/44] creating lodgment dates data, using old EPC api, to verify test failures --- .../tests/populate_lodgement_dates.py | 81 ++ backend/address2UPRN/tests/test_csv.py | 46 +- .../tests/test_lodgement_dates.json | 1230 +++++++++++++++++ 3 files changed, 1348 insertions(+), 9 deletions(-) create mode 100644 backend/address2UPRN/tests/populate_lodgement_dates.py create mode 100644 backend/address2UPRN/tests/test_lodgement_dates.json diff --git a/backend/address2UPRN/tests/populate_lodgement_dates.py b/backend/address2UPRN/tests/populate_lodgement_dates.py new file mode 100644 index 00000000..0726596b --- /dev/null +++ b/backend/address2UPRN/tests/populate_lodgement_dates.py @@ -0,0 +1,81 @@ +import csv +import json +import os +from pathlib import Path +from urllib.parse import urlencode + +import pandas as pd +from epc_api.client import EpcClient + +FIXTURE_PATH = Path(__file__).parent / "test_data.csv" +SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json" + + +def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame: + url = os.path.join(client.domestic.host, "search") + url += "?" + urlencode({"size": 500}) + resp = client.domestic.call(url=url, method="get", params={"postcode": postcode}) + if not resp or "rows" not in resp: + return pd.DataFrame() + return pd.DataFrame(resp["rows"], columns=resp["column-names"]) + + +def main(): + auth_token = os.getenv("EPC_AUTH_TOKEN") + if not auth_token: + raise RuntimeError("EPC_AUTH_TOKEN not set") + + client = EpcClient(auth_token=auth_token) + + sidecar = {} + if SIDECAR_PATH.exists(): + sidecar = json.loads(SIDECAR_PATH.read_text()) + + with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: + rows = list(csv.DictReader(f)) + + by_postcode: dict[str, list[dict]] = {} + for row in rows: + if row["Manual UPRN Code"] == "None": + continue + by_postcode.setdefault(row["Postcode"], []).append(row) + + for postcode, postcode_rows in by_postcode.items(): + print(f"Fetching {postcode} ({len(postcode_rows)} rows)...") + try: + epc_df = fetch_postcode_records(client, postcode) + except Exception as e: + print(f" ERROR: {e}") + continue + + if epc_df.empty: + print(f" No results from old API for {postcode}") + continue + + epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True) + + for row in postcode_rows: + key = f"{row['User Input']}|{row['Postcode']}" + if key in sidecar: + continue + + expected_uprn = str(row["Manual UPRN Code"]).strip() + match = epc_df[epc_df["uprn"] == expected_uprn] + + if match.empty: + print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}") + sidecar[key] = {"lodgement_date": None, "found_in_old_api": False} + else: + lodgement_date = match.iloc[0].get("lodgement-date") + sidecar[key] = { + "lodgement_date": str(lodgement_date) if lodgement_date else None, + "found_in_old_api": True, + } + print(f" {row['User Input']}: {lodgement_date}") + + SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2)) + print(f"\nWritten to {SIDECAR_PATH}") + + +if __name__ == "__main__": + main() diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py index a8f0b1b4..d8f54c39 100644 --- a/backend/address2UPRN/tests/test_csv.py +++ b/backend/address2UPRN/tests/test_csv.py @@ -1,25 +1,54 @@ # tests/test_address_to_uprn_csv.py import csv +import json import pytest +from datetime import date from pathlib import Path from backend.address2UPRN.main import get_uprn FIXTURE_PATH = Path(__file__).parent / "test_data.csv" +SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json" +NEW_API_CUTOFF = date(2012, 1, 1) + + +def _load_sidecar() -> dict: + if SIDECAR_PATH.exists(): + return json.loads(SIDECAR_PATH.read_text()) + return {} def load_test_cases(): + sidecar = _load_sidecar() with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) - return [ - pytest.param( - row["User Input"], - row["Postcode"], - row["Manual UPRN Code"], - id=f'{row["User Input"]} [{row["Postcode"]}]', + cases = [] + for row in reader: + key = f"{row['User Input']}|{row['Postcode']}" + entry = sidecar.get(key, {}) + lodgement_date = entry.get("lodgement_date") + + marks = [] + if lodgement_date: + parsed = date.fromisoformat(lodgement_date[:10]) + if parsed < NEW_API_CUTOFF: + marks.append( + pytest.mark.xfail( + reason=f"EPC lodged {lodgement_date} — predates new API coverage (Jan 2012)", + strict=False, + ) + ) + + cases.append( + pytest.param( + row["User Input"], + row["Postcode"], + row["Manual UPRN Code"], + id=f'{row["User Input"]} [{row["Postcode"]}]', + marks=marks, + ) ) - for row in reader - ] + return cases @pytest.mark.parametrize( @@ -31,7 +60,6 @@ def test_uprn_resolution_matches_manual( postcode: str, expected_uprn: str, ): - uprn = get_uprn(user_input, postcode) if uprn: assert uprn == expected_uprn diff --git a/backend/address2UPRN/tests/test_lodgement_dates.json b/backend/address2UPRN/tests/test_lodgement_dates.json new file mode 100644 index 00000000..c58be704 --- /dev/null +++ b/backend/address2UPRN/tests/test_lodgement_dates.json @@ -0,0 +1,1230 @@ +{ + "47 The Fairway|OX16 0RR": { + "lodgement_date": "2010-03-16", + "found_in_old_api": true + }, + "11 REGENT COURT|SL1 3LG": { + "lodgement_date": "2022-05-04", + "found_in_old_api": true + }, + "3/137a Windmill Road|TW8 9NH": { + "lodgement_date": "2025-01-30", + "found_in_old_api": true + }, + "Flat 33|SW18 4BE": { + "lodgement_date": "2022-04-27", + "found_in_old_api": true + }, + "FLAT 1 Brendon Grove|N2 8JE": { + "lodgement_date": "2011-02-17", + "found_in_old_api": true + }, + "Flat 15|KT8 2NE": { + "lodgement_date": "2018-03-26", + "found_in_old_api": true + }, + "FLAT 5 Stonehill Road|W4 3AH": { + "lodgement_date": "2025-09-22", + "found_in_old_api": true + }, + "Flat 10|W4 3AH": { + "lodgement_date": "2023-06-15", + "found_in_old_api": true + }, + "Flat 11|W4 3AH": { + "lodgement_date": "2023-10-19", + "found_in_old_api": true + }, + "Flat 12, Forbes House|W4 3AH": { + "lodgement_date": "2023-10-04", + "found_in_old_api": true + }, + "Flat 13|W4 3AH": { + "lodgement_date": "2012-05-14", + "found_in_old_api": true + }, + "Flat 14|W4 3AH": { + "lodgement_date": "2022-10-15", + "found_in_old_api": true + }, + "Flat 15|W4 3AH": { + "lodgement_date": "2009-08-25", + "found_in_old_api": true + }, + "Flat 16|W4 3AH": { + "lodgement_date": "2012-05-23", + "found_in_old_api": true + }, + "Flat 17|W4 3AH": { + "lodgement_date": "2023-08-31", + "found_in_old_api": true + }, + "Flat 19|W4 3AH": { + "lodgement_date": "2025-07-16", + "found_in_old_api": true + }, + "Flat 20|W4 3AH": { + "lodgement_date": "2024-10-27", + "found_in_old_api": true + }, + "Flat 21|W4 3AH": { + "lodgement_date": "2023-08-08", + "found_in_old_api": true + }, + "Flat 22|W4 3AH": { + "lodgement_date": "2022-10-15", + "found_in_old_api": true + }, + "Flat 23|W4 3AH": { + "lodgement_date": "2022-10-15", + "found_in_old_api": true + }, + "Flat 24|W4 3AH": { + "lodgement_date": "2024-01-12", + "found_in_old_api": true + }, + "10 Douglas Court|SL7 1UQ": { + "lodgement_date": "2018-10-25", + "found_in_old_api": true + }, + "1 Windmill Road|HP17 8JA": { + "lodgement_date": "2009-08-25", + "found_in_old_api": true + }, + "31 Denewood|HP13 7LH": { + "lodgement_date": "2009-03-23", + "found_in_old_api": true + }, + "10, Greenways Drive|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 11|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "12, Greenways Drive|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 13|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 14|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 15|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 16|TW4 5DD": { + "lodgement_date": "2025-02-26", + "found_in_old_api": true + }, + "Flat 17|TW4 5DD": { + "lodgement_date": "2012-11-29", + "found_in_old_api": true + }, + "Flat 18|TW4 5DD": { + "lodgement_date": "2022-12-28", + "found_in_old_api": true + }, + "FLAT 1 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 2 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 3 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 4 Goodstone Court|HA1 4FL": { + "lodgement_date": "2022-12-14", + "found_in_old_api": true + }, + "FLAT 5 Goodstone Court|HA1 4FL": { + "lodgement_date": "2016-10-04", + "found_in_old_api": true + }, + "FLAT 6 Goodstone Court|HA1 4FL": { + "lodgement_date": "2024-06-05", + "found_in_old_api": true + }, + "FLAT 7 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 8 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 9 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 10 Goodstone Court|HA1 4FL": { + "lodgement_date": "2023-09-21", + "found_in_old_api": true + }, + "FLAT 11 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 12 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 13 Goodstone Court|HA1 4FL": { + "lodgement_date": "2022-12-13", + "found_in_old_api": true + }, + "FLAT 14 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 15 Goodstone Court|HA1 4FL": { + "lodgement_date": "2024-02-09", + "found_in_old_api": true + }, + "FLAT 16 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 17 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 18 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 19 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 20 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 21 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 22 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 23 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 24 Goodstone Court|HA1 4FL": { + "lodgement_date": "2024-10-24", + "found_in_old_api": true + }, + "FLAT 25 Goodstone Court|HA1 4FL": { + "lodgement_date": "2020-01-18", + "found_in_old_api": true + }, + "FLAT 26 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 27 Goodstone Court|HA1 4FL": { + "lodgement_date": "2022-11-04", + "found_in_old_api": true + }, + "FLAT 28 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 29 Goodstone Court|HA1 4FL": { + "lodgement_date": "2023-10-13", + "found_in_old_api": true + }, + "FLAT 30 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 31 Goodstone Court|HA1 4FL": { + "lodgement_date": "2023-04-19", + "found_in_old_api": true + }, + "FLAT 32 Goodstone Court|HA1 4FL": { + "lodgement_date": "2025-11-18", + "found_in_old_api": true + }, + "FLAT 33 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 34 Goodstone Court|HA1 4FL": { + "lodgement_date": "2022-09-19", + "found_in_old_api": true + }, + "FLAT 35 Goodstone Court|HA1 4FL": { + "lodgement_date": "2021-10-13", + "found_in_old_api": true + }, + "FLAT 36 Goodstone Court|HA1 4FL": { + "lodgement_date": "2022-10-12", + "found_in_old_api": true + }, + "FLAT 37 Goodstone Court|HA1 4FL": { + "lodgement_date": "2024-08-26", + "found_in_old_api": true + }, + "FLAT 38 Goodstone Court|HA1 4FL": { + "lodgement_date": "2023-05-26", + "found_in_old_api": true + }, + "FLAT 39 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 40 Goodstone Court|HA1 4FL": { + "lodgement_date": "2023-10-05", + "found_in_old_api": true + }, + "FLAT 41 Goodstone Court|HA1 4FL": { + "lodgement_date": "2025-11-24", + "found_in_old_api": true + }, + "FLAT 42 Goodstone Court|HA1 4FL": { + "lodgement_date": "2012-11-06", + "found_in_old_api": true + }, + "FLAT 43 Goodstone Court|HA1 4FL": { + "lodgement_date": "2025-07-08", + "found_in_old_api": true + }, + "30c, Bosanquet Close|UB8 3PE": { + "lodgement_date": "2019-05-27", + "found_in_old_api": true + }, + "30e, Bosanquet Close|UB8 3PE": { + "lodgement_date": "2024-07-30", + "found_in_old_api": true + }, + "13 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2025-07-05", + "found_in_old_api": true + }, + "14 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2012-07-18", + "found_in_old_api": true + }, + "15 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2012-06-11", + "found_in_old_api": true + }, + "16 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2022-07-01", + "found_in_old_api": true + }, + "17 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2025-01-07", + "found_in_old_api": true + }, + "18 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2012-07-18", + "found_in_old_api": true + }, + "19 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2025-03-22", + "found_in_old_api": true + }, + "20 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2022-08-15", + "found_in_old_api": true + }, + "21 Stubwick Court, Old Saw Mill Place|HP6 6FF": { + "lodgement_date": "2012-07-18", + "found_in_old_api": true + }, + "90a Murray Road|W5 4DA": { + "lodgement_date": "2013-12-12", + "found_in_old_api": true + }, + "Flat 1, 6 Wolverton Gardens|W5 3LJ": { + "lodgement_date": "2017-10-13", + "found_in_old_api": true + }, + "1, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "10, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "20, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "2, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "3, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "4, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "5, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "6, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "7, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "8, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "9, Monsted House|UB1 1FG": { + "lodgement_date": "2019-02-08", + "found_in_old_api": true + }, + "1 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "2 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "3 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "4 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "5 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "6 Cullis House, 1, Accolade Avenue|UB1 1FH": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "1 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-10", + "found_in_old_api": true + }, + "2 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-10", + "found_in_old_api": true + }, + "3 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "4 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "5 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "6 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "7 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "8 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "9 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "10 Genteel House Samara Drive|UB1 1FJ": { + "lodgement_date": "2019-05-13", + "found_in_old_api": true + }, + "Flat 1 Ash Tree House, 2, Thompson Avenue|SE5 0TE": { + "lodgement_date": "2018-09-05", + "found_in_old_api": true + }, + "Flat 3 ASH TREE HOUSE|SE5 0TE": { + "lodgement_date": "2018-09-05", + "found_in_old_api": true + }, + "Flat 5 ASH TREE HOUSE|SE5 0TE": { + "lodgement_date": "2019-09-12", + "found_in_old_api": true + }, + "Flat 8 ASH TREE HOUSE|SE5 0TE": { + "lodgement_date": "2011-10-26", + "found_in_old_api": true + }, + "Flat 12 ASH TREE HOUSE|SE5 0TE": { + "lodgement_date": "2018-09-05", + "found_in_old_api": true + }, + "FLAT 1 599 HARROW ROAD|W10 4RA": { + "lodgement_date": "2017-01-12", + "found_in_old_api": true + }, + "FLAT 2 599 HARROW ROAD|W10 4RA": { + "lodgement_date": "2020-07-28", + "found_in_old_api": true + }, + "FLAT 5 599 HARROW ROAD|W10 4RA": { + "lodgement_date": "2017-01-12", + "found_in_old_api": true + }, + "Flat 1, Ohio Building|SE13 7RX": { + "lodgement_date": "2023-08-15", + "found_in_old_api": true + }, + "Flat 2, Ohio Building|SE13 7RX": { + "lodgement_date": "2017-06-09", + "found_in_old_api": true + }, + "Apartment 1 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2017-01-05", + "found_in_old_api": true + }, + "Apartment 2 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2014-01-22", + "found_in_old_api": true + }, + "Apartment 3 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 4 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2017-01-05", + "found_in_old_api": true + }, + "Apartment 5 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 6 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 7 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2022-10-24", + "found_in_old_api": true + }, + "Apartment 8 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 9 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 10 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 11 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2011-08-17", + "found_in_old_api": true + }, + "Apartment 12 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 13 Block B, 105, Benwell Road|N7 7BW": { + "lodgement_date": "2009-02-25", + "found_in_old_api": true + }, + "Apartment 1 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2023-07-19", + "found_in_old_api": true + }, + "Apartment 2 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2022-10-20", + "found_in_old_api": true + }, + "Apartment 3 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 4 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 5 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 6 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2012-11-08", + "found_in_old_api": true + }, + "Apartment 7 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2015-08-30", + "found_in_old_api": true + }, + "Apartment 8 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2020-08-02", + "found_in_old_api": true + }, + "Apartment 9 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2021-10-12", + "found_in_old_api": true + }, + "Apartment 10 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 11 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 12 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2022-02-22", + "found_in_old_api": true + }, + "Apartment 13 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 14 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 15 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 16 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2009-05-15", + "found_in_old_api": true + }, + "Apartment 17Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2019-01-22", + "found_in_old_api": true + }, + "Apartment 18 Block D, 32, Hornsey Road|N7 7AT": { + "lodgement_date": "2013-06-03", + "found_in_old_api": true + }, + "FLAT B 158 LEAHURST ROAD|SE13 5NL": { + "lodgement_date": "2014-01-24", + "found_in_old_api": true + }, + "2 COLLEGE HOUSE|CM7 1JS": { + "lodgement_date": "2017-01-12", + "found_in_old_api": true + }, + "3 COLLEGE HOUSE|CM7 1JS": { + "lodgement_date": "2017-01-12", + "found_in_old_api": true + }, + "2 Anita Street|M4 5DU": { + "lodgement_date": "2019-10-18", + "found_in_old_api": true + }, + "5 Anita Street|M4 5DU": { + "lodgement_date": "2012-12-21", + "found_in_old_api": true + }, + "6 Anita Street|M4 5DU": { + "lodgement_date": "2021-02-16", + "found_in_old_api": true + }, + "10 Anita Street|M4 5DU": { + "lodgement_date": "2021-07-01", + "found_in_old_api": true + }, + "12 Anita Street|M4 5DU": { + "lodgement_date": "2025-08-08", + "found_in_old_api": true + }, + "26 Anita Street|M4 5DU": { + "lodgement_date": "2010-06-25", + "found_in_old_api": true + }, + "33 Anita Street|M4 5DU": { + "lodgement_date": "2017-03-10", + "found_in_old_api": true + }, + "35 Anita Street|M4 5DU": { + "lodgement_date": "2015-11-18", + "found_in_old_api": true + }, + "36 Anita Street|M4 5DU": { + "lodgement_date": "2013-09-12", + "found_in_old_api": true + }, + "23 George Leigh Street|M4 5DR": { + "lodgement_date": "2025-03-11", + "found_in_old_api": true + }, + "35 George Leigh Street|M4 5DR": { + "lodgement_date": "2024-05-29", + "found_in_old_api": true + }, + "39 George Leigh Street|M4 5DR": { + "lodgement_date": "2024-05-28", + "found_in_old_api": true + }, + "51 George Leigh Street|M4 5DR": { + "lodgement_date": "2022-02-03", + "found_in_old_api": true + }, + "1a, Victoria Square|M4 5DX": { + "lodgement_date": "2016-01-08", + "found_in_old_api": true + }, + "4a, Victoria Square|M4 5DX": { + "lodgement_date": "2012-09-19", + "found_in_old_api": true + }, + "5a Victoria Square|M4 5DX": { + "lodgement_date": "2012-06-25", + "found_in_old_api": true + }, + " 6a Victoria Square|M4 5DX": { + "lodgement_date": "2023-02-13", + "found_in_old_api": true + }, + "7a Victoria Square|M4 5DX": { + "lodgement_date": "2017-03-15", + "found_in_old_api": true + }, + "8a Victoria Square|M4 5DX": { + "lodgement_date": "2019-11-25", + "found_in_old_api": true + }, + "9a Victoria Square|M4 5DX": { + "lodgement_date": "2026-02-24", + "found_in_old_api": true + }, + "10a Victoria Square|M4 5DX": { + "lodgement_date": "2013-10-16", + "found_in_old_api": true + }, + "11a Victoria Square|M4 5DX": { + "lodgement_date": "2015-11-06", + "found_in_old_api": true + }, + "12a Victoria Square|M4 5DX": { + "lodgement_date": "2022-11-08", + "found_in_old_api": true + }, + "13a Victoria Square|M4 5DX": { + "lodgement_date": "2025-04-27", + "found_in_old_api": true + }, + "14a Victoria Square|M4 5DX": { + "lodgement_date": "2010-11-09", + "found_in_old_api": true + }, + "15a Victoria Square|M4 5DX": { + "lodgement_date": "2012-03-26", + "found_in_old_api": true + }, + "16a Victoria Square|M4 5DX": { + "lodgement_date": "2009-05-28", + "found_in_old_api": true + }, + "17a Victoria Square|M4 5DX": { + "lodgement_date": "2012-12-20", + "found_in_old_api": true + }, + "18a Victoria Square|M4 5DX": { + "lodgement_date": "2022-07-21", + "found_in_old_api": true + }, + "19a Victoria Square|M4 5DX": { + "lodgement_date": "2009-08-18", + "found_in_old_api": true + }, + "20a Victoria Square|M4 5DX": { + "lodgement_date": "2014-05-27", + "found_in_old_api": true + }, + "21a Victoria Square|M4 5DY": { + "lodgement_date": "2010-04-08", + "found_in_old_api": true + }, + "23a Victoria Square|M4 5DY": { + "lodgement_date": "2016-04-05", + "found_in_old_api": true + }, + "24a Victoria Square|M4 5DY": { + "lodgement_date": "2022-03-23", + "found_in_old_api": true + }, + "25a Victoria Square|M4 5DY": { + "lodgement_date": "2024-10-13", + "found_in_old_api": true + }, + "26a Victoria Square|M4 5DY": { + "lodgement_date": "2024-03-25", + "found_in_old_api": true + }, + "27a Victoria Square|M4 5DY": { + "lodgement_date": "2009-10-05", + "found_in_old_api": true + }, + "29a Victoria Square|M4 5DY": { + "lodgement_date": "2024-05-27", + "found_in_old_api": true + }, + "30a Victoria Square|M4 5DY": { + "lodgement_date": "2011-09-07", + "found_in_old_api": true + }, + "31a Victoria Square|M4 5DY": { + "lodgement_date": "2010-12-09", + "found_in_old_api": true + }, + "32a Victoria Square|M4 5DY": { + "lodgement_date": "2021-02-17", + "found_in_old_api": true + }, + "33a Victoria Square|M4 5DY": { + "lodgement_date": "2011-04-05", + "found_in_old_api": true + }, + "34a Victoria Square|M4 5DY": { + "lodgement_date": "2021-08-13", + "found_in_old_api": true + }, + "36a Victoria Square|M4 5DY": { + "lodgement_date": "2011-04-05", + "found_in_old_api": true + }, + "37a Victoria Square|M4 5DY": { + "lodgement_date": "2018-07-02", + "found_in_old_api": true + }, + "38a Victoria Square|M4 5DY": { + "lodgement_date": "2010-02-02", + "found_in_old_api": true + }, + "39a Victoria Square|M4 5DY": { + "lodgement_date": "2018-01-04", + "found_in_old_api": true + }, + "41a Victoria Square|M4 5DY": { + "lodgement_date": "2011-05-23", + "found_in_old_api": true + }, + "42a Victoria Square|M4 5DY": { + "lodgement_date": "2010-10-14", + "found_in_old_api": true + }, + "43a Victoria Square|M4 5DY": { + "lodgement_date": "2018-10-11", + "found_in_old_api": true + }, + "44a Victoria Square|M4 5DY": { + "lodgement_date": "2010-06-08", + "found_in_old_api": true + }, + "45a Victoria Square|M4 5DY": { + "lodgement_date": "2023-03-08", + "found_in_old_api": true + }, + "46a Victoria Square|M4 5DY": { + "lodgement_date": "2010-12-09", + "found_in_old_api": true + }, + "47a Victoria Square|M4 5DY": { + "lodgement_date": "2010-02-09", + "found_in_old_api": true + }, + "48a Victoria Square|M4 5DY": { + "lodgement_date": "2011-04-12", + "found_in_old_api": true + }, + "49a Victoria Square|M4 5DY": { + "lodgement_date": "2010-11-09", + "found_in_old_api": true + }, + "50a Victoria Square|M4 5DY": { + "lodgement_date": "2025-09-06", + "found_in_old_api": true + }, + "51a Victoria Square|M4 5DY": { + "lodgement_date": "2009-10-05", + "found_in_old_api": true + }, + "52a Victoria Square|M4 5DY": { + "lodgement_date": "2010-12-17", + "found_in_old_api": true + }, + "53a Victoria Square|M4 5DY": { + "lodgement_date": "2022-11-10", + "found_in_old_api": true + }, + "54a Victoria Square|M4 5DY": { + "lodgement_date": "2021-01-08", + "found_in_old_api": true + }, + "55a Victoria Square|M4 5DY": { + "lodgement_date": "2009-08-18", + "found_in_old_api": true + }, + "56a Victoria Square|M4 5DZ": { + "lodgement_date": "2019-03-15", + "found_in_old_api": true + }, + "58a Victoria Square|M4 5DZ": { + "lodgement_date": "2018-11-14", + "found_in_old_api": true + }, + "59a Victoria Square|M4 5DZ": { + "lodgement_date": "2013-11-26", + "found_in_old_api": true + }, + "60a Victoria Square|M4 5DZ": { + "lodgement_date": "2024-06-12", + "found_in_old_api": true + }, + "61a Victoria Square|M4 5DZ": { + "lodgement_date": "2024-08-05", + "found_in_old_api": true + }, + "62a Victoria Square|M4 5DZ": { + "lodgement_date": "2013-05-24", + "found_in_old_api": true + }, + "64a Victoria Square|M4 5DZ": { + "lodgement_date": "2021-07-29", + "found_in_old_api": true + }, + "65a Victoria Square|M4 5DZ": { + "lodgement_date": "2011-08-26", + "found_in_old_api": true + }, + "68a Victoria Square|M4 5DZ": { + "lodgement_date": "2022-03-29", + "found_in_old_api": true + }, + "69a Victoria Square|M4 5DZ": { + "lodgement_date": "2011-01-19", + "found_in_old_api": true + }, + "70a Victoria Square|M4 5DZ": { + "lodgement_date": "2011-07-27", + "found_in_old_api": true + }, + "71a Victoria Square|M4 5DZ": { + "lodgement_date": "2016-11-22", + "found_in_old_api": true + }, + "72a Victoria Square|M4 5DZ": { + "lodgement_date": "2019-01-07", + "found_in_old_api": true + }, + "73a Victoria Square|M4 5DZ": { + "lodgement_date": "2014-07-25", + "found_in_old_api": true + }, + "75a Victoria Square|M4 5DZ": { + "lodgement_date": "2016-01-20", + "found_in_old_api": true + }, + "76a Victoria Square|M4 5DZ": { + "lodgement_date": "2018-01-26", + "found_in_old_api": true + }, + "78a Victoria Square|M4 5DZ": { + "lodgement_date": "2011-06-02", + "found_in_old_api": true + }, + "79a Victoria Square|M4 5DZ": { + "lodgement_date": "2022-01-26", + "found_in_old_api": true + }, + "80a Victoria Square|M4 5DZ": { + "lodgement_date": "2018-11-05", + "found_in_old_api": true + }, + "81a Victoria Square|M4 5DZ": { + "lodgement_date": "2017-03-05", + "found_in_old_api": true + }, + "83a Victoria Square|M4 5DZ": { + "lodgement_date": "2012-05-01", + "found_in_old_api": true + }, + "85a Victoria Square|M4 5DZ": { + "lodgement_date": "2009-10-21", + "found_in_old_api": true + }, + "86a Victoria Square|M4 5DZ": { + "lodgement_date": "2024-05-29", + "found_in_old_api": true + }, + "87a Victoria Square|M4 5DZ": { + "lodgement_date": "2025-07-13", + "found_in_old_api": true + }, + "89a Victoria Square|M4 5DZ": { + "lodgement_date": "2016-05-12", + "found_in_old_api": true + }, + "90a Victoria Square|M4 5DZ": { + "lodgement_date": "2012-05-09", + "found_in_old_api": true + }, + "91a Victoria Square|M4 5DZ": { + "lodgement_date": "2025-04-30", + "found_in_old_api": true + }, + "92a Victoria Square|M4 5DZ": { + "lodgement_date": "2021-07-29", + "found_in_old_api": true + }, + "93a Victoria Square|M4 5EA": { + "lodgement_date": "2013-02-26", + "found_in_old_api": true + }, + "95a Victoria Square|M4 5EA": { + "lodgement_date": "2020-09-06", + "found_in_old_api": true + }, + "96a Victoria Square|M4 5EA": { + "lodgement_date": "2022-06-30", + "found_in_old_api": true + }, + "97a Victoria Square|M4 5EA": { + "lodgement_date": "2016-09-05", + "found_in_old_api": true + }, + "98a Victoria Square|M4 5EA": { + "lodgement_date": "2019-12-19", + "found_in_old_api": true + }, + "99a Victoria Square|M4 5EA": { + "lodgement_date": "2009-03-05", + "found_in_old_api": true + }, + "100a Victoria Square|M4 5EA": { + "lodgement_date": "2011-03-31", + "found_in_old_api": true + }, + "103a Victoria Square|M4 5EA": { + "lodgement_date": "2009-03-05", + "found_in_old_api": true + }, + "104a Victoria Square|M4 5EA": { + "lodgement_date": "2010-01-21", + "found_in_old_api": true + }, + "106a Victoria Square|M4 5EA": { + "lodgement_date": "2015-12-10", + "found_in_old_api": true + }, + "107a Victoria Square|M4 5EA": { + "lodgement_date": "2013-07-01", + "found_in_old_api": true + }, + "108a Victoria Square|M4 5EA": { + "lodgement_date": "2023-03-01", + "found_in_old_api": true + }, + "109a Victoria Square|M4 5EA": { + "lodgement_date": "2010-03-24", + "found_in_old_api": true + }, + "110a Victoria Square|M4 5EA": { + "lodgement_date": "2019-02-25", + "found_in_old_api": true + }, + "111a Victoria Square|M4 5EA": { + "lodgement_date": "2010-02-01", + "found_in_old_api": true + }, + "113a Victoria Square|M4 5EA": { + "lodgement_date": "2012-11-21", + "found_in_old_api": true + }, + "114a Victoria Square|M4 5EA": { + "lodgement_date": "2013-12-06", + "found_in_old_api": true + }, + "115a Victoria Square|M4 5EA": { + "lodgement_date": "2022-08-25", + "found_in_old_api": true + }, + "116a Victoria Square|M4 5EA": { + "lodgement_date": "2011-02-25", + "found_in_old_api": true + }, + "119a Victoria Square|M4 5EA": { + "lodgement_date": "2024-04-12", + "found_in_old_api": true + }, + "120a Victoria Square|M4 5EA": { + "lodgement_date": "2011-04-04", + "found_in_old_api": true + }, + "121a Victoria Square|M4 5EA": { + "lodgement_date": "2010-11-09", + "found_in_old_api": true + }, + "122a Victoria Square|M4 5EA": { + "lodgement_date": "2012-05-01", + "found_in_old_api": true + }, + "123a Victoria Square|M4 5EA": { + "lodgement_date": "2022-01-12", + "found_in_old_api": true + }, + "125a Victoria Square|M4 5EA": { + "lodgement_date": "2023-11-22", + "found_in_old_api": true + }, + "126a Victoria Square|M4 5EA": { + "lodgement_date": "2010-08-24", + "found_in_old_api": true + }, + "127a Victoria Square|M4 5EA": { + "lodgement_date": "2020-03-01", + "found_in_old_api": true + }, + "128a Victoria Square|M4 5EA": { + "lodgement_date": "2015-02-04", + "found_in_old_api": true + }, + "129a Victoria Square|M4 5EA": { + "lodgement_date": "2010-07-07", + "found_in_old_api": true + }, + "130a Victoria Square|M4 5FA": { + "lodgement_date": "2026-02-11", + "found_in_old_api": true + }, + "131a Victoria Square|M4 5FA": { + "lodgement_date": "2025-05-29", + "found_in_old_api": true + }, + "132a Victoria Square|M4 5FA": { + "lodgement_date": "2019-12-24", + "found_in_old_api": true + }, + "134a Victoria Square|M4 5FA": { + "lodgement_date": "2011-08-18", + "found_in_old_api": true + }, + "135a Victoria Square|M4 5FA": { + "lodgement_date": "2019-09-05", + "found_in_old_api": true + }, + "136a Victoria Square|M4 5FA": { + "lodgement_date": "2025-02-14", + "found_in_old_api": true + }, + "137a Victoria Square|M4 5FA": { + "lodgement_date": "2024-07-17", + "found_in_old_api": true + }, + "138a Victoria Square|M4 5FA": { + "lodgement_date": "2023-10-11", + "found_in_old_api": true + }, + "139a Victoria Square|M4 5FA": { + "lodgement_date": "2021-06-22", + "found_in_old_api": true + }, + "140a Victoria Square|M4 5FA": { + "lodgement_date": "2020-06-15", + "found_in_old_api": true + }, + "141a Victoria Square|M4 5FA": { + "lodgement_date": "2025-12-22", + "found_in_old_api": true + }, + "142a Victoria Square|M4 5FA": { + "lodgement_date": "2025-12-22", + "found_in_old_api": true + }, + "143a Victoria Square|M4 5FA": { + "lodgement_date": "2023-01-18", + "found_in_old_api": true + }, + "144a Victoria Square|M4 5FA": { + "lodgement_date": "2011-04-04", + "found_in_old_api": true + }, + "146a Victoria Square|M4 5FA": { + "lodgement_date": "2022-09-21", + "found_in_old_api": true + }, + "147a Victoria Square|M4 5FA": { + "lodgement_date": "2011-05-04", + "found_in_old_api": true + }, + "148a Victoria Square|M4 5FA": { + "lodgement_date": "2014-11-18", + "found_in_old_api": true + }, + "149a Victoria Square|M4 5FA": { + "lodgement_date": "2009-12-14", + "found_in_old_api": true + }, + "150a Victoria Square|M4 5FA": { + "lodgement_date": "2009-12-14", + "found_in_old_api": true + }, + "152a Victoria Square|M4 5FA": { + "lodgement_date": "2017-06-23", + "found_in_old_api": true + }, + "154a Victoria Square|M4 5FA": { + "lodgement_date": "2025-04-29", + "found_in_old_api": true + }, + "156a Victoria Square|M4 5FA": { + "lodgement_date": "2011-04-05", + "found_in_old_api": true + }, + "157a Victoria Square|M4 5FA": { + "lodgement_date": "2023-09-11", + "found_in_old_api": true + }, + "158a Victoria Square|M4 5FA": { + "lodgement_date": "2021-12-07", + "found_in_old_api": true + }, + "160a Victoria Square|M4 5FA": { + "lodgement_date": "2011-02-04", + "found_in_old_api": true + }, + "163a Victoria Square|M4 5FA": { + "lodgement_date": "2010-02-02", + "found_in_old_api": true + }, + "164a Victoria Square|M4 5FA": { + "lodgement_date": "2020-10-19", + "found_in_old_api": true + }, + "165a Victoria Square|M4 5FA": { + "lodgement_date": "2019-12-13", + "found_in_old_api": true + } +} \ No newline at end of file From 8f2885474bb8c0959a23e4c60398a60f07c5987e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 11:53:52 +0000 Subject: [PATCH 11/44] fixing address2uprn tests --- backend/address2UPRN/tests/test_data.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index ee23813b..aaeee66d 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -117,14 +117,14 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095 10 Genteel House Samara Drive,UB1 1FJ,12189844 1 ASH TREE HOUSE,SE5 0TE,None "Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979 -3 ASH TREE HOUSE,SE5 0TE,None +3 ASH TREE HOUSE,SE5 0TE,10009803981 Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981 -5 ASH TREE HOUSE,SE5 0TE,None +5 ASH TREE HOUSE,SE5 0TE,10009803983 Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983 Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986 8 ASH TREE HOUSE,SE5 0TE,None Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990 -12 ASH TREE HOUSE,SE5 0TE,None +12 ASH TREE HOUSE,SE5 0TE,10009803990 FLAT 1 599 HARROW ROAD,W10 4RA,217113930 FLAT 2 599 HARROW ROAD,W10 4RA,217113931 FLAT 3 599 HARROW ROAD,W10 4RA,None From 8ec6eecc4d28157db264258c1555a6ae464129ff Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 12:00:19 +0000 Subject: [PATCH 12/44] reverting manually tweaked tests --- backend/address2UPRN/main.py | 3 ++- backend/address2UPRN/tests/test_data.csv | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 98f8c65b..fad5c64e 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -78,7 +78,8 @@ def get_uprn_with_epc_df( # Safe to return the agreed UPRN found_uprn = top_rank_df.iloc[0]["uprn"] - if found_uprn == "": + # Handling numeric missingness in new api + if found_uprn in ["", "nan"]: return None if verbose: diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index aaeee66d..ee23813b 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -117,14 +117,14 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095 10 Genteel House Samara Drive,UB1 1FJ,12189844 1 ASH TREE HOUSE,SE5 0TE,None "Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979 -3 ASH TREE HOUSE,SE5 0TE,10009803981 +3 ASH TREE HOUSE,SE5 0TE,None Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981 -5 ASH TREE HOUSE,SE5 0TE,10009803983 +5 ASH TREE HOUSE,SE5 0TE,None Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983 Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986 8 ASH TREE HOUSE,SE5 0TE,None Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990 -12 ASH TREE HOUSE,SE5 0TE,10009803990 +12 ASH TREE HOUSE,SE5 0TE,None FLAT 1 599 HARROW ROAD,W10 4RA,217113930 FLAT 2 599 HARROW ROAD,W10 4RA,217113931 FLAT 3 599 HARROW ROAD,W10 4RA,None From 821a0a08f7508a72c1d71fb8cfc46963d3f60b39 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 12:02:34 +0000 Subject: [PATCH 13/44] addressing feedback on from_api_response --- datatypes/epc/domain/mapper.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 7ef74340..d5212fe5 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,5 +1,5 @@ from datetime import date -from typing import List, Optional, Sequence, Union +from typing import List, Optional, Sequence, Union, Dict, Any from datatypes.epc.domain.epc_property_data import ( EnergyElement, @@ -1448,7 +1448,7 @@ class EpcPropertyDataMapper: return [EpcPropertyDataMapper._map_energy_element(e) for e in elements] @staticmethod - def from_api_response(data: dict) -> "EpcPropertyData": + def from_api_response(data: Dict[str, Any]) -> "EpcPropertyData": """ Dispatch to the correct schema mapper based on schema_type. Supports RdSAP-Schema-21.0.0 and RdSAP-Schema-21.0.1 only. @@ -1459,11 +1459,13 @@ class EpcPropertyDataMapper: schema = data.get("schema_type", "") if schema == "RdSAP-Schema-21.0.1": from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1 + return EpcPropertyDataMapper.from_rdsap_schema_21_0_1( from_dict(RdSapSchema21_0_1, data) ) if schema == "RdSAP-Schema-21.0.0": from datatypes.epc.schema.rdsap_schema_21_0_0 import RdSapSchema21_0_0 + return EpcPropertyDataMapper.from_rdsap_schema_21_0_0( from_dict(RdSapSchema21_0_0, data) ) @@ -1596,7 +1598,11 @@ def _map_sap_heating( fuel_type = ( _raw_fuel if _raw_fuel - else ("Electricity" if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES else _raw_fuel) + else ( + "Electricity" + if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES + else _raw_fuel + ) ) return SapHeating( @@ -1618,7 +1624,11 @@ def _map_sap_heating( secondary_fuel_type=secondary_fuel_type, secondary_heating_type=heating.secondary_heating.secondary_system, shower_outlets=shower_outlets, - cylinder_size=heating.water_heating.cylinder_size if heating.water_heating.cylinder_size != "No Cylinder" else None, + cylinder_size=( + heating.water_heating.cylinder_size + if heating.water_heating.cylinder_size != "No Cylinder" + else None + ), cylinder_insulation_type=heating.water_heating.insulation_type, cylinder_insulation_thickness_mm=heating.water_heating.insulation_thickness_mm, immersion_heating_type=heating.water_heating.immersion_type, From 001e9ce88235f1231d9c87eea2136a44daf04b91 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 12:03:39 +0000 Subject: [PATCH 14/44] remove inline import --- datatypes/epc/domain/mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d5212fe5..cc960f87 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,5 +1,6 @@ from datetime import date from typing import List, Optional, Sequence, Union, Dict, Any +from datatypes.epc.schema.helpers import from_dict from datatypes.epc.domain.epc_property_data import ( EnergyElement, @@ -1454,7 +1455,6 @@ class EpcPropertyDataMapper: Supports RdSAP-Schema-21.0.0 and RdSAP-Schema-21.0.1 only. Raises ValueError for unsupported schemas — add cases here as needed. """ - from datatypes.epc.schema.helpers import from_dict schema = data.get("schema_type", "") if schema == "RdSAP-Schema-21.0.1": From cadf8836d13a3249bb591fb0abc626d86f8ac9a3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 12:04:46 +0000 Subject: [PATCH 15/44] making full_address property --- backend/epc_client/client.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/backend/epc_client/client.py b/backend/epc_client/client.py index 33f25ef5..0e3b48fc 100644 --- a/backend/epc_client/client.py +++ b/backend/epc_client/client.py @@ -7,7 +7,11 @@ from typing import Callable, Optional import httpx import pandas as pd -from backend.epc_client.exceptions import EpcApiError, EpcNotFoundError, EpcRateLimitError +from backend.epc_client.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) from backend.epc_client._retry import call_with_retry from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper @@ -26,6 +30,7 @@ class EpcSearchResult: current_energy_efficiency_band: str registration_date: str + @property def full_address(self) -> str: parts = [ self.address_line_1, @@ -68,12 +73,16 @@ class EpcClientService: return None # Round 1: score on addressLine1 only - cert_num = self._pick_best_cert(candidates, address, use_full_address=False, fn=get_uprn_candidates) + cert_num = self._pick_best_cert( + candidates, address, use_full_address=False, fn=get_uprn_candidates + ) if cert_num: return self._safe_get(cert_num) # Round 2: score on all address lines joined - cert_num = self._pick_best_cert(candidates, address, use_full_address=True, fn=get_uprn_candidates) + cert_num = self._pick_best_cert( + candidates, address, use_full_address=True, fn=get_uprn_candidates + ) if cert_num: return self._safe_get(cert_num) @@ -145,14 +154,18 @@ class EpcClientService: use_full_address: bool, fn: Callable[..., pd.DataFrame], ) -> Optional[str]: - df = pd.DataFrame([ - { - "address": r.full_address() if use_full_address else r.address_line_1, - "uprn": str(r.uprn) if r.uprn is not None else "", - "certificate_number": r.certificate_number, - } - for r in candidates - ]) + df = pd.DataFrame( + [ + { + "address": ( + r.full_address() if use_full_address else r.address_line_1 + ), + "uprn": str(r.uprn) if r.uprn is not None else "", + "certificate_number": r.certificate_number, + } + for r in candidates + ] + ) scored = fn(df, user_address=user_address) if scored.empty: From a1b207ba558e391c4c37ca65dd8d7bf5432d76d9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Apr 2026 13:46:09 +0000 Subject: [PATCH 16/44] bolstering testing --- CLAUDE.md | 6 ++ backend/app/requirements/requirements.txt | 7 +- backend/epc_client/__init__.py | 4 +- backend/epc_client/client.py | 97 +---------------------- backend/epc_client/requirements.txt | 1 - backend/epc_client/tests/test_client.py | 78 +++++------------- backend/tests/test_address_match.py | 60 ++++++++++++++ backend/utils/addressMatch.py | 9 ++- backend/utils/epc_address_match.py | 67 ++++++++++++++++ datatypes/epc/search/__init__.py | 3 + datatypes/epc/search/epc_search_result.py | 28 +++++++ pyproject.toml | 2 - 12 files changed, 201 insertions(+), 161 deletions(-) delete mode 100644 backend/epc_client/requirements.txt create mode 100644 backend/tests/test_address_match.py create mode 100644 backend/utils/epc_address_match.py create mode 100644 datatypes/epc/search/__init__.py create mode 100644 datatypes/epc/search/epc_search_result.py diff --git a/CLAUDE.md b/CLAUDE.md index 263679ff..23d465a7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -59,3 +59,9 @@ New containers install all skills automatically via the Dockerfile. If you're in bash .devcontainer/backend/install-claude-skills.sh ``` +## Type Safety + +All new code must pass `pyright` with zero errors under `typeCheckingMode = strict`. +Annotate all function return types. Use `dict[str, Any]` for untyped external API +payloads — never bare `dict`. Add `pandas-stubs` when introducing pandas to a module. + diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 9fdbfe4c..80907a79 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -13,4 +13,9 @@ boto3==1.35.44 openpyxl==3.1.5 # Basic pytz -sqlmodel \ No newline at end of file +sqlmodel +# HTTP client +httpx==0.28.1 +# Data +pandas +pandas-stubs \ No newline at end of file diff --git a/backend/epc_client/__init__.py b/backend/epc_client/__init__.py index 720594f7..ab46a266 100644 --- a/backend/epc_client/__init__.py +++ b/backend/epc_client/__init__.py @@ -1,3 +1,3 @@ -from backend.epc_client.client import EpcClientService, EpcSearchResult +from backend.epc_client.client import EpcClientService -__all__ = ["EpcClientService", "EpcSearchResult"] +__all__ = ["EpcClientService"] diff --git a/backend/epc_client/client.py b/backend/epc_client/client.py index 0e3b48fc..d00a164f 100644 --- a/backend/epc_client/client.py +++ b/backend/epc_client/client.py @@ -1,11 +1,9 @@ # Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml from __future__ import annotations -from dataclasses import dataclass -from typing import Callable, Optional +from typing import Any, Optional import httpx -import pandas as pd from backend.epc_client.exceptions import ( EpcApiError, @@ -15,35 +13,11 @@ from backend.epc_client.exceptions import ( from backend.epc_client._retry import call_with_retry from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper - - -@dataclass -class EpcSearchResult: - certificate_number: str - address_line_1: str - address_line_2: Optional[str] - address_line_3: Optional[str] - address_line_4: Optional[str] - postcode: str - post_town: str - uprn: Optional[int] - current_energy_efficiency_band: str - registration_date: str - - @property - def full_address(self) -> str: - parts = [ - self.address_line_1, - self.address_line_2, - self.address_line_3, - self.address_line_4, - ] - return ", ".join(p for p in parts if p) +from datatypes.epc.search import EpcSearchResult class EpcClientService: BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" - _MIN_MATCH_SCORE = 0.6 def __init__(self, auth_token: str) -> None: self._headers = { @@ -65,34 +39,11 @@ class EpcClientService: def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: return call_with_retry(lambda: self._search(postcode=postcode)) - def find_best_match(self, postcode: str, address: str) -> Optional[EpcPropertyData]: - from backend.utils.addressMatch import get_uprn_candidates - - candidates = self.search_by_postcode(postcode) - if not candidates: - return None - - # Round 1: score on addressLine1 only - cert_num = self._pick_best_cert( - candidates, address, use_full_address=False, fn=get_uprn_candidates - ) - if cert_num: - return self._safe_get(cert_num) - - # Round 2: score on all address lines joined - cert_num = self._pick_best_cert( - candidates, address, use_full_address=True, fn=get_uprn_candidates - ) - if cert_num: - return self._safe_get(cert_num) - - return None - # ------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------ - def _fetch_certificate(self, cert_num: str) -> dict: + def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: resp = httpx.get( f"{self.BASE_URL}/api/certificate", params={"certificate_number": cert_num}, @@ -133,7 +84,7 @@ class EpcClientService: return [self._parse_search_result(r) for r in rows] @staticmethod - def _parse_search_result(row: dict) -> EpcSearchResult: + def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult: return EpcSearchResult( certificate_number=row["certificateNumber"], address_line_1=row["addressLine1"], @@ -146,43 +97,3 @@ class EpcClientService: current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], registration_date=row["registrationDate"], ) - - def _pick_best_cert( - self, - candidates: list[EpcSearchResult], - user_address: str, - use_full_address: bool, - fn: Callable[..., pd.DataFrame], - ) -> Optional[str]: - df = pd.DataFrame( - [ - { - "address": ( - r.full_address() if use_full_address else r.address_line_1 - ), - "uprn": str(r.uprn) if r.uprn is not None else "", - "certificate_number": r.certificate_number, - } - for r in candidates - ] - ) - - scored = fn(df, user_address=user_address) - if scored.empty: - return None - - best_score = scored.iloc[0]["lexiscore"] - if best_score < self._MIN_MATCH_SCORE: - return None - - top = scored[scored["lexirank"] == 1] - if len(top) != 1: - return None - - return str(top.iloc[0]["certificate_number"]) - - def _safe_get(self, cert_num: str) -> Optional[EpcPropertyData]: - try: - return self.get_by_certificate_number(cert_num) - except EpcNotFoundError: - return None diff --git a/backend/epc_client/requirements.txt b/backend/epc_client/requirements.txt deleted file mode 100644 index cee32373..00000000 --- a/backend/epc_client/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -httpx==0.28.1 diff --git a/backend/epc_client/tests/test_client.py b/backend/epc_client/tests/test_client.py index 51dd2a12..7933f21d 100644 --- a/backend/epc_client/tests/test_client.py +++ b/backend/epc_client/tests/test_client.py @@ -1,7 +1,9 @@ from unittest.mock import MagicMock, patch, call import pytest -from backend.epc_client.client import EpcClientService, EpcSearchResult +from backend.epc_client.client import EpcClientService +from backend.utils.epc_address_match import find_best_epc_match +from datatypes.epc.search import EpcSearchResult from backend.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError from datatypes.epc.domain.epc_property_data import EpcPropertyData from backend.epc_client.tests.conftest import make_search_row @@ -122,88 +124,51 @@ def test_search_by_postcode_404_returns_empty_list(epc_service): # --------------------------------------------------------------------------- -# Tests 8-10: find_best_match +# Tests 8-10: find_best_epc_match — real scoring, only HTTP mocked # --------------------------------------------------------------------------- -def _make_scored_df(rows, scores, ranks): - import pandas as pd - df = pd.DataFrame(rows) - df["lexiscore"] = scores - df["lexirank"] = ranks - return df.sort_values("lexirank") - - -def test_find_best_match_round1_clear_winner(epc_service, rdsap_21_0_1_cert): +def test_find_best_match_clear_winner_on_first_pass(epc_service, rdsap_21_0_1_cert): search_rows = [ make_search_row(cert_num="CERT-WIN", address_line_1="1 High Street"), make_search_row(cert_num="CERT-LOSE", address_line_1="99 Nowhere Lane"), ] cert_response = {"data": rdsap_21_0_1_cert} - df_rows = [ - {"address": "1 High Street", "uprn": "100023336956", "certificate_number": "CERT-WIN"}, - {"address": "99 Nowhere Lane", "uprn": "100023336956", "certificate_number": "CERT-LOSE"}, - ] - scored = _make_scored_df(df_rows, [0.9, 0.1], [1, 2]) - def fake_get(url, params=None, **kwargs): if "search" in url: return _mock_response(200, {"data": search_rows}) return _mock_response(200, cert_response) - with patch("httpx.get", side_effect=fake_get), \ - patch("backend.utils.addressMatch.get_uprn_candidates", return_value=scored): - result = epc_service.find_best_match("SW1A 1AA", "1 High Street") + with patch("httpx.get", side_effect=fake_get): + result = find_best_epc_match(epc_service, "SW1A 1AA", "1 High Street") assert isinstance(result, EpcPropertyData) -def test_find_best_match_round1_ambiguous_round2_resolves(epc_service, rdsap_21_0_1_cert): +def test_find_best_match_resolves_on_second_pass_using_full_address(epc_service, rdsap_21_0_1_cert): + # Both candidates share address_line_1 — round 1 is ambiguous. + # Round 2 scores against full_address and picks the correct floor. search_rows = [ make_search_row( - cert_num="CERT-A", address_line_1="1 High Street", + cert_num="CERT-A", + address_line_1="1 High Street", address_line_2="Ground Floor", ), make_search_row( - cert_num="CERT-B", address_line_1="1 High Street", + cert_num="CERT-B", + address_line_1="1 High Street", address_line_2="First Floor", ), ] cert_response = {"data": rdsap_21_0_1_cert} - # Round 1: both score equally — ambiguous (two rank-1s) - ambiguous = _make_scored_df( - [ - {"address": "1 High Street", "uprn": "111", "certificate_number": "CERT-A"}, - {"address": "1 High Street", "uprn": "222", "certificate_number": "CERT-B"}, - ], - [0.9, 0.9], - [1, 1], - ) - # Round 2: CERT-A wins on full address - resolved = _make_scored_df( - [ - {"address": "1 High Street, Ground Floor", "uprn": "111", "certificate_number": "CERT-A"}, - {"address": "1 High Street, First Floor", "uprn": "222", "certificate_number": "CERT-B"}, - ], - [0.85, 0.4], - [1, 2], - ) - - call_count = {"n": 0} - - def fake_candidates(df, user_address, **kwargs): - call_count["n"] += 1 - return ambiguous if call_count["n"] == 1 else resolved - def fake_get(url, params=None, **kwargs): if "search" in url: return _mock_response(200, {"data": search_rows}) return _mock_response(200, cert_response) - with patch("httpx.get", side_effect=fake_get), \ - patch("backend.utils.addressMatch.get_uprn_candidates", side_effect=fake_candidates): - result = epc_service.find_best_match("SW1A 1AA", "1 High Street Ground Floor") + with patch("httpx.get", side_effect=fake_get): + result = find_best_epc_match(epc_service, "SW1A 1AA", "1 High Street Ground Floor") assert isinstance(result, EpcPropertyData) @@ -211,14 +176,7 @@ def test_find_best_match_round1_ambiguous_round2_resolves(epc_service, rdsap_21_ def test_find_best_match_returns_none_when_no_good_match(epc_service): search_rows = [make_search_row(cert_num="CERT-X", address_line_1="99 Nowhere Lane")] - low_score = _make_scored_df( - [{"address": "99 Nowhere Lane", "uprn": "111", "certificate_number": "CERT-X"}], - [0.1], - [1], - ) - - with patch("httpx.get", return_value=_mock_response(200, {"data": search_rows})), \ - patch("backend.utils.addressMatch.get_uprn_candidates", return_value=low_score): - result = epc_service.find_best_match("SW1A 1AA", "1 Completely Different Road") + with patch("httpx.get", return_value=_mock_response(200, {"data": search_rows})): + result = find_best_epc_match(epc_service, "SW1A 1AA", "1 Completely Different Road") assert result is None diff --git a/backend/tests/test_address_match.py b/backend/tests/test_address_match.py new file mode 100644 index 00000000..f6a564df --- /dev/null +++ b/backend/tests/test_address_match.py @@ -0,0 +1,60 @@ +from backend.utils.addressMatch import AddressMatch + + +class TestNormaliseAddress: + def test_lowercases_input(self): + assert AddressMatch.normalise_address("1 HIGH STREET") == "1 high street" + + def test_expands_road_abbreviation(self): + assert AddressMatch.normalise_address("1 Moreton Rd") == "1 moreton road" + + def test_expands_avenue_abbreviation(self): + assert AddressMatch.normalise_address("2 Park Ave") == "2 park avenue" + + def test_removes_punctuation_keeps_slash(self): + result = AddressMatch.normalise_address("Flat 1/A, Some Road") + assert "," not in result + assert "/" in result + + def test_splits_digit_letter_suffix(self): + assert "42 a" in AddressMatch.normalise_address("42a Some Road") + + def test_empty_string_returns_empty(self): + assert AddressMatch.normalise_address("") == "" + + def test_removes_no_prefix(self): + result = AddressMatch.normalise_address("No 5 High Street") + assert "no" not in result.split() + assert "5" in result + + +class TestScore: + def test_identical_address_scores_one(self): + assert AddressMatch.score("1 High Street", "1 High Street") == 1.0 + + def test_case_insensitive(self): + assert AddressMatch.score("1 HIGH STREET", "1 high street") == 1.0 + + def test_street_type_synonym_scores_one(self): + # "Rd" expands to "road" during normalisation — should be identical + assert AddressMatch.score("1 High Rd", "1 High Road") == 1.0 + + def test_different_building_numbers_score_zero(self): + assert AddressMatch.score("1 High Street", "2 High Street") == 0.0 + + def test_disjoint_number_sets_score_zero(self): + assert AddressMatch.score("1 High Street", "99 Nowhere Lane") == 0.0 + + def test_user_address_has_number_but_epc_does_not_scores_zero(self): + assert AddressMatch.score("1 High Street", "High Street") == 0.0 + + def test_partial_address_scores_above_threshold(self): + # Extra token in user address ("London") — same building number, high overlap + score = AddressMatch.score("1 High Street London", "1 High Street") + assert 0.6 <= score < 1.0 + + def test_flat_number_mismatch_scores_zero(self): + # User has two numbers but no "flat" token; EPC has different flat number + # Triggers the order-sensitive flat guard + score = AddressMatch.score("3 42 High Street", "Flat 7 42 High Street") + assert score == 0.0 diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 12c1ac53..a0c6ebdf 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -1,8 +1,13 @@ +from __future__ import annotations + import re -from typing import Any, Optional from difflib import SequenceMatcher +from typing import TYPE_CHECKING, Any, Optional + import requests -import pandas as pd + +if TYPE_CHECKING: + import pandas as pd class AddressMatch: diff --git a/backend/utils/epc_address_match.py b/backend/utils/epc_address_match.py new file mode 100644 index 00000000..f73d6d1d --- /dev/null +++ b/backend/utils/epc_address_match.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +from backend.utils.addressMatch import AddressMatch +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search import EpcSearchResult + +if TYPE_CHECKING: + from backend.epc_client.client import EpcClientService + +_MIN_MATCH_SCORE = 0.6 + + +def find_best_epc_match( + service: EpcClientService, + postcode: str, + address: str, +) -> Optional[EpcPropertyData]: + candidates = service.search_by_postcode(postcode) + if not candidates: + return None + + cert_num = _pick_best_cert(candidates, address, use_full_address=False) + if cert_num: + return _safe_get(service, cert_num) + + cert_num = _pick_best_cert(candidates, address, use_full_address=True) + if cert_num: + return _safe_get(service, cert_num) + + return None + + +def _pick_best_cert( + candidates: list[EpcSearchResult], + user_address: str, + use_full_address: bool, +) -> Optional[str]: + scored: list[tuple[float, str]] = [ + ( + AddressMatch.score( + user_address, + r.full_address if use_full_address else r.address_line_1, + ), + r.certificate_number, + ) + for r in candidates + ] + if not scored: + return None + best_score = max(s for s, _ in scored) + if best_score < _MIN_MATCH_SCORE: + return None + top = [cert for s, cert in scored if s == best_score] + if len(top) != 1: + return None + return top[0] + + +def _safe_get(service: EpcClientService, cert_num: str) -> Optional[EpcPropertyData]: + from backend.epc_client.exceptions import EpcNotFoundError + + try: + return service.get_by_certificate_number(cert_num) + except EpcNotFoundError: + return None diff --git a/datatypes/epc/search/__init__.py b/datatypes/epc/search/__init__.py new file mode 100644 index 00000000..3e08a56e --- /dev/null +++ b/datatypes/epc/search/__init__.py @@ -0,0 +1,3 @@ +from datatypes.epc.search.epc_search_result import EpcSearchResult + +__all__ = ["EpcSearchResult"] diff --git a/datatypes/epc/search/epc_search_result.py b/datatypes/epc/search/epc_search_result.py new file mode 100644 index 00000000..b6f47caf --- /dev/null +++ b/datatypes/epc/search/epc_search_result.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class EpcSearchResult: + certificate_number: str + address_line_1: str + address_line_2: Optional[str] + address_line_3: Optional[str] + address_line_4: Optional[str] + postcode: str + post_town: str + uprn: Optional[int] + current_energy_efficiency_band: str + registration_date: str + + @property + def full_address(self) -> str: + parts = [ + self.address_line_1, + self.address_line_2, + self.address_line_3, + self.address_line_4, + ] + return ", ".join(p for p in parts if p) diff --git a/pyproject.toml b/pyproject.toml index 72ec3f0c..49108861 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1 @@ [tool.pyright] -reportUnknownMemberType = false -reportUnknownVariableType = false \ No newline at end of file From 87afac86315bae7ee19b8f029bcce4ab872add1a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 30 Apr 2026 09:58:24 +0100 Subject: [PATCH 17/44] minor exporting data --- backend/export/property_scenarios/main.py | 6 +++--- sfr/principal_pitch/2_export_data.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 64627e01..100e34e8 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -83,7 +83,7 @@ def process_export( else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key - ] + ] if scenario_recs.empty: logger.info( @@ -140,8 +140,8 @@ def handler( body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 682, - "scenario_ids": [1210], + "portfolio_id": 632, + "scenario_ids": [1144], "default_plans_only": False, } :param event: Lambda event containing export request details diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index b275086d..9fdff9f6 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -26,13 +26,13 @@ from backend.app.db.functions.materials_functions import get_materials from collections import defaultdict from sqlalchemy import func -PORTFOLIO_ID = 711 -SCENARIOS = [1233] +PORTFOLIO_ID = 632 +SCENARIOS = [1144] scenario_names = { - 1233: "Reach EPC C", + 1144: "EPC C", } -project_name = "Novus" +project_name = "Calico" def get_data(portfolio_id, scenario_ids): @@ -230,7 +230,7 @@ for scenario_id in SCENARIOS: # Get recs for this scenario recommended_measures_df = recommendations_df[ recommendations_df["scenario_id"] == scenario_id - ][["property_id", "measure_type", "estimated_cost", "default"]] + ][["property_id", "measure_type", "estimated_cost", "default"]] recommended_measures_df = recommended_measures_df[ recommended_measures_df["default"] ] @@ -238,7 +238,7 @@ for scenario_id in SCENARIOS: post_install_sap = recommendations_df[ recommendations_df["scenario_id"] == scenario_id - ][["property_id", "default", "sap_points"]] + ][["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id post_install_sap = ( From c53d8b2a339853c1128c865aecd6acd43fb14b90 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Sun, 10 May 2026 21:07:16 +0000 Subject: [PATCH 18/44] basic end up check --- .github/workflows/deploy_fastapi_backend.yml | 15 ++++ backend/app/dependencies.py | 4 +- backend/app/main.py | 76 ++------------------ 3 files changed, 24 insertions(+), 71 deletions(-) diff --git a/.github/workflows/deploy_fastapi_backend.yml b/.github/workflows/deploy_fastapi_backend.yml index 5ad4d6ac..ede816b3 100644 --- a/.github/workflows/deploy_fastapi_backend.yml +++ b/.github/workflows/deploy_fastapi_backend.yml @@ -148,3 +148,18 @@ jobs: # Deploy to AWS Lambda via Serverless sls deploy --stage ${{ github.ref_name }} --verbose + - name: Smoke test deployed /health + env: + EXPECTED_SHA: ${{ github.sha }} + HEALTH_URL: https://api.${{ steps.set_domain.outputs.domain }}/health + run: | + set -euo pipefail + echo "Probing $HEALTH_URL" + RESPONSE=$(curl -fsSL --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$HEALTH_URL") + echo "Response: $RESPONSE" + ACTUAL_SHA=$(echo "$RESPONSE" | jq -r '.sha') + if [[ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]]; then + echo "::error::SHA mismatch. expected=$EXPECTED_SHA actual=$ACTUAL_SHA" + exit 1 + fi + echo "Health check passed. sha=$ACTUAL_SHA" diff --git a/backend/app/dependencies.py b/backend/app/dependencies.py index 027cfe40..757973f2 100644 --- a/backend/app/dependencies.py +++ b/backend/app/dependencies.py @@ -19,7 +19,9 @@ api_key_header = APIKeyHeader(name=get_settings().API_KEY_NAME, auto_error=False oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") -async def validate_api_key(api_key_header: str = Depends(api_key_header)): +async def validate_api_key(request: Request, api_key_header: str = Depends(api_key_header)): + if request.url.path == "/health": + return None if api_key_header != get_settings().API_KEY: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Could not validate credentials" diff --git a/backend/app/main.py b/backend/app/main.py index c9733c18..80c3e038 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,4 +1,5 @@ import logging +import os from fastapi.responses import JSONResponse from fastapi import FastAPI, Depends, Request, status from fastapi.exceptions import RequestValidationError @@ -22,7 +23,6 @@ else: app = FastAPI(dependencies=[Depends(validate_api_key)]) -# Handle 422 errors (validation failures) @app.exception_handler(RequestValidationError) async def validation_exception_handler(request: Request, exc: RequestValidationError): logger.error(f"422 Validation Error at {request.url}") @@ -37,7 +37,6 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE ) -# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.) @app.exception_handler(StarletteHTTPException) async def http_exception_handler(request: Request, exc: StarletteHTTPException): logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}") @@ -47,7 +46,6 @@ async def http_exception_handler(request: Request, exc: StarletteHTTPException): ) -# Middleware to log requests @app.middleware("http") async def log_requests(request: Request, call_next): logger.info(f"Incoming request: {request.method} {request.url}") @@ -56,6 +54,11 @@ async def log_requests(request: Request, call_next): return response +@app.get("/health") +async def health(): + return {"status": "ok", "sha": os.getenv("GITHUB_SHA", "unknown")} + + app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") app.include_router(whlg_router.router, prefix="/v1") @@ -67,70 +70,3 @@ if get_settings().ENVIRONMENT == "local": app.include_router(local_router.router) handler = Mangum(app) -import logging -from fastapi.responses import JSONResponse -from fastapi import FastAPI, Depends, Request, status -from fastapi.exceptions import RequestValidationError -from fastapi.encoders import jsonable_encoder -from starlette.exceptions import HTTPException as StarletteHTTPException -from mangum import Mangum -from backend.app.portfolio import router as portfolio_router -from backend.app.whlg import router as whlg_router -from backend.app.plan import router as plan_router -from backend.app.bulk_uploads import router as bulk_uploads_router -from backend.app.dependencies import validate_api_key -from backend.app.config import get_settings - -logger = logging.getLogger("uvicorn.error") -logging.basicConfig(level=logging.INFO) - -if get_settings().ENVIRONMENT == "local": - app = FastAPI() -else: - app = FastAPI(dependencies=[Depends(validate_api_key)]) - - -# Handle 422 errors (validation failures) -@app.exception_handler(RequestValidationError) -async def validation_exception_handler(request: Request, exc: RequestValidationError): - logger.error(f"422 Validation Error at {request.url}") - logger.error(f"Body: {exc.body}") - logger.error(f"Validation Errors: {exc.errors()}") - return JSONResponse( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - content=jsonable_encoder({ - "detail": exc.errors(), - "body": exc.body - }), - ) - - -# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.) -@app.exception_handler(StarletteHTTPException) -async def http_exception_handler(request: Request, exc: StarletteHTTPException): - logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}") - return JSONResponse( - status_code=exc.status_code, - content={"detail": exc.detail}, - ) - - -# Middleware to log requests -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -app.include_router(portfolio_router.router, prefix="/v1") -app.include_router(plan_router.router, prefix="/v1") -app.include_router(whlg_router.router, prefix="/v1") -app.include_router(bulk_uploads_router.router, prefix="/v1") - -if get_settings().ENVIRONMENT == "local": - from app.local import router as local_router - app.include_router(local_router.router) - -handler = Mangum(app) From 9aae5bf482a522e5b2fc4cb41174b7ef05ff1b07 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 11 May 2026 15:20:17 +0000 Subject: [PATCH 19/44] added logic to deal with flats --- backend/address2UPRN/main.py | 92 ++++++++++++++++++++++++++++++----- backend/utils/addressMatch.py | 23 +++++++++ 2 files changed, 102 insertions(+), 13 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index fad5c64e..0938a53b 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -24,22 +24,53 @@ from backend.utils.addressMatch import ( logger = setup_logger() -OPEN_EPC_API_TOKEN = os.getenv("OPEN_EPC_API_TOKEN") - -if OPEN_EPC_API_TOKEN is None: - raise RuntimeError("OPEN_EPC_API_TOKEN not defined in env") - - def get_epc_data_with_postcode(postcode: str) -> pd.DataFrame: from backend.epc_client.client import EpcClientService - service = EpcClientService(auth_token=OPEN_EPC_API_TOKEN) + token = os.getenv("OPEN_EPC_API_TOKEN") + if token is None: + raise RuntimeError("OPEN_EPC_API_TOKEN not defined in env") + + service = EpcClientService(auth_token=token) results = service.search_by_postcode(postcode) return pd.DataFrame( [{"address": r.address_line_1, "uprn": r.uprn} for r in results] ) +def get_uprn_from_historic_epc( + user_inputed_address: str, + postcode: str, +) -> Optional[tuple[str, str, float]]: + """Resolve a UPRN via historic EPC S3 data. + + Returns (uprn, address, lexiscore) when the historic dataset agrees on a + single rank-1 UPRN, None otherwise (missing postcode file, zero score, + or ambiguous top rank). The score gate is `unambiguous_uprn`'s own + (score > 0); the 0.7 heuristic used for the new-EPC source isn't applied + here because historic addresses use a more verbose format that + systematically depresses lexiscores. + """ + from datatypes.epc.domain.historic_epc_matching import ( + match_addresses_for_postcode, + ) + + try: + result = match_addresses_for_postcode(user_inputed_address, postcode) + except FileNotFoundError: + return None + + uprn = result.unambiguous_uprn() + if not uprn or uprn in ("", "nan"): + return None + + top = result.top() + if top is None: + return None + + return (uprn, top.record.address, top.lexiscore) + + def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, @@ -95,20 +126,37 @@ def get_uprn( ): """ Return uprn (str) - Return False if failed to find a sensible matching epc - Return None when epc found but no UPRN + Return None when no sensible match is found in either EPC source. - This function fetches EPC data via API for a single postcode. - For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead. + Tries the new EPC API first; if that yields no confident match, falls + back to the historic EPC dataset on S3. + + For processing multiple addresses in the same postcode, use + get_uprn_with_epc_df instead. """ df = get_epc_data_with_postcode(postcode=postcode) - return get_uprn_with_epc_df( + result = get_uprn_with_epc_df( user_inputed_address=user_inputed_address, epc_df=df, - verbose=verbose, + verbose=True, ) + if not result: + result = get_uprn_from_historic_epc( + user_inputed_address=user_inputed_address, + postcode=postcode, + ) + if result: + logger.info( + f"Historic EPC matched {user_inputed_address} in {postcode}" + ) + + if not result: + return None + + return result if verbose else result[0] + def resolve_uprns_for_postcode_group( group_df: pd.DataFrame, @@ -379,6 +427,7 @@ def handler(event, context, local=False): ) continue + # Process each address in this postcode with the same EPC data for row in postcode_rows: try: @@ -404,6 +453,23 @@ def handler(event, context, local=False): verbose=True, ) + # Fallback to historic EPC if new EPC produced no match + if not result: + try: + result = get_uprn_from_historic_epc( + user_inputed_address=address2uprn_user_input, + postcode=postcode, + ) + except Exception as e: + logger.error( + f"Historic EPC lookup failed for {address2uprn_user_input} in {postcode}: {e}" + ) + result = None + if result: + logger.info( + f"Historic EPC matched {address2uprn_user_input} in {postcode}" + ) + # Parse result tuple if successful if result: uprn, found_address, score = result diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index a0c6ebdf..1435a629 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -178,6 +178,29 @@ class AddressMatch: tok in a_norm for tok in ("flat", "apt", "apartment", "unit") ) has_flat_token_epc = "flat" in b_norm + # Slash-format like "3/137a" is an implicit flat reference + # (flat 3 of 137a) even without a "flat" keyword. + has_implicit_flat_user = bool(re.search(r"\d+\s*/\s*\d+", a_norm)) + # If the user named a street, their leading number is a house number, + # not a flat number — so an EPC "Flat N, …" candidate is a wrong unit. + # Without a street token (e.g. "2 College House"), the user may be + # implicitly naming a flat in a named building; don't apply the guard. + STREET_TYPE_TOKENS = { + "road", "street", "lane", "avenue", "close", "way", + "crescent", "court", "drive", "place", "terrace", "mews", + "gardens", "square", "grove", "park", "walk", "row", + "green", "hill", "rise", "parade", "broadway", + } + user_tokens = set(a_norm.split()) + has_street_type_user = bool(user_tokens & STREET_TYPE_TOKENS) + + if ( + has_flat_token_epc + and not has_flat_token_user + and not has_implicit_flat_user + and has_street_type_user + ): + return 0.0 if ( len(seq_a) == 2 From 1934c889b0892ef521e38274797df967bd282bfb Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 11 May 2026 16:23:03 +0000 Subject: [PATCH 20/44] refactored test to deal with flats better --- backend/address2UPRN/tests/test_data.csv | 5 +++-- backend/utils/addressMatch.py | 28 ++++++++++-------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index ee23813b..408edc29 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -168,8 +168,8 @@ FLAT 8 599 HARROW ROAD,W10 4RA,None "Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383 24b Honley Road,SE6 2HZ,None FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 -2 COLLEGE HOUSE,CM7 1JS,100091449870 -3 COLLEGE HOUSE,CM7 1JS,100091449871 +2 COLLEGE HOUSE,CM7 1JS,None +3 COLLEGE HOUSE,CM7 1JS,None 1 Anita Street,M4 5DU,None 2 Anita Street,M4 5DU,77123061 5 Anita Street,M4 5DU,77123081 @@ -279,6 +279,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 80a Victoria Square,M4 5DZ,77211231 81a Victoria Square,M4 5DZ,77211232 82 Victoria Square,M4 5DZ,None +82a Victoria Square,M4 5DZ,77211233 83a Victoria Square,M4 5DZ,77211234 84a Victoria Square,M4 5DZ,None 85a Victoria Square,M4 5DZ,77211236 diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 1435a629..ee9d1004 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -127,6 +127,7 @@ class AddressMatch: Assumes formats like: - '42 moreton road' - 'flat 3 42 moreton road' + - '82 a victoria square' (recombined to '82a') """ tokens = s.split() @@ -142,9 +143,15 @@ class AddressMatch: continue cleaned.append(t) - # first remaining number is building number - for t in cleaned: - if re.fullmatch(r"\d+[a-z]?", t): + # first remaining number is building number; recombine with a + # single-letter suffix when normalisation has split "82a" → "82 a" + for i, t in enumerate(cleaned): + if re.fullmatch(r"\d+[a-z]", t): + return t + if re.fullmatch(r"\d+", t): + nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None + if nxt is not None and re.fullmatch(r"[a-z]", nxt): + return t + nxt return t return None @@ -181,24 +188,13 @@ class AddressMatch: # Slash-format like "3/137a" is an implicit flat reference # (flat 3 of 137a) even without a "flat" keyword. has_implicit_flat_user = bool(re.search(r"\d+\s*/\s*\d+", a_norm)) - # If the user named a street, their leading number is a house number, - # not a flat number — so an EPC "Flat N, …" candidate is a wrong unit. - # Without a street token (e.g. "2 College House"), the user may be - # implicitly naming a flat in a named building; don't apply the guard. - STREET_TYPE_TOKENS = { - "road", "street", "lane", "avenue", "close", "way", - "crescent", "court", "drive", "place", "terrace", "mews", - "gardens", "square", "grove", "park", "walk", "row", - "green", "hill", "rise", "parade", "broadway", - } - user_tokens = set(a_norm.split()) - has_street_type_user = bool(user_tokens & STREET_TYPE_TOKENS) + # EPC says it's a flat but user gave no flat indication + # (neither keyword nor slash-format). Unlikely to be the right unit. if ( has_flat_token_epc and not has_flat_token_user and not has_implicit_flat_user - and has_street_type_user ): return 0.0 From 5edae06a659a3b6853df56ff8e68b5c2d186e4c9 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 09:37:23 +0000 Subject: [PATCH 21/44] added imports at the top of the file instead of function --- backend/address2UPRN/main.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 0938a53b..a7378fbe 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -20,12 +20,15 @@ from backend.utils.addressMatch import ( df_has_single_uprn, score_addresses, ) +from datatypes.epc.domain.historic_epc_matching import ( + match_addresses_for_postcode, +) +from backend.epc_client.client import EpcClientService logger = setup_logger() def get_epc_data_with_postcode(postcode: str) -> pd.DataFrame: - from backend.epc_client.client import EpcClientService token = os.getenv("OPEN_EPC_API_TOKEN") if token is None: @@ -51,9 +54,6 @@ def get_uprn_from_historic_epc( here because historic addresses use a more verbose format that systematically depresses lexiscores. """ - from datatypes.epc.domain.historic_epc_matching import ( - match_addresses_for_postcode, - ) try: result = match_addresses_for_postcode(user_inputed_address, postcode) @@ -148,9 +148,7 @@ def get_uprn( postcode=postcode, ) if result: - logger.info( - f"Historic EPC matched {user_inputed_address} in {postcode}" - ) + logger.info(f"Historic EPC matched {user_inputed_address} in {postcode}") if not result: return None @@ -427,7 +425,6 @@ def handler(event, context, local=False): ) continue - # Process each address in this postcode with the same EPC data for row in postcode_rows: try: From c22528299ce1ba240263a3315537734dc0e456fd Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 09:40:12 +0000 Subject: [PATCH 22/44] added type hinting to uprn --- backend/address2UPRN/main.py | 2 +- scripts/historic_epc_demo.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index a7378fbe..6b684cef 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -60,7 +60,7 @@ def get_uprn_from_historic_epc( except FileNotFoundError: return None - uprn = result.unambiguous_uprn() + uprn: Optional[str] = result.unambiguous_uprn() if not uprn or uprn in ("", "nan"): return None diff --git a/scripts/historic_epc_demo.py b/scripts/historic_epc_demo.py index b47c3a3c..31e1ee28 100644 --- a/scripts/historic_epc_demo.py +++ b/scripts/historic_epc_demo.py @@ -12,6 +12,7 @@ Usage: import sys from datatypes.epc.domain.historic_epc_matching import match_addresses_for_postcode +from typing import Optional def main(user_address: str, postcode: str) -> None: @@ -29,7 +30,7 @@ def main(user_address: str, postcode: str) -> None: ) print() - uprn = result.unambiguous_uprn() + uprn: Optional[str] = result.unambiguous_uprn() if uprn: print(f"Unambiguous UPRN: {uprn}") else: From b72d5fbf42f530eb439d5e8804a9fb270a035f53 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 09:43:40 +0000 Subject: [PATCH 23/44] fix nitpick --- backend/address2UPRN/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 6b684cef..e49088f4 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -61,7 +61,7 @@ def get_uprn_from_historic_epc( return None uprn: Optional[str] = result.unambiguous_uprn() - if not uprn or uprn in ("", "nan"): + if not uprn or uprn == "nan": return None top = result.top() From e06ead55d0226ec216969fa749de861ece1f4ce8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 09:48:21 +0000 Subject: [PATCH 24/44] add more type hint --- backend/address2UPRN/main.py | 3 +- .../tests/test_historic_epc_matching.py | 211 ++++++++++++------ 2 files changed, 150 insertions(+), 64 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index e49088f4..642733a7 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -24,6 +24,7 @@ from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) from backend.epc_client.client import EpcClientService +from datatypes.epc.domain.historic_epc_matching import ScoredHistoricEpc logger = setup_logger() @@ -64,7 +65,7 @@ def get_uprn_from_historic_epc( if not uprn or uprn == "nan": return None - top = result.top() + top: Optional[ScoredHistoricEpc] = result.top() if top is None: return None diff --git a/datatypes/epc/domain/tests/test_historic_epc_matching.py b/datatypes/epc/domain/tests/test_historic_epc_matching.py index 1c3ee6d4..ce86e5c0 100644 --- a/datatypes/epc/domain/tests/test_historic_epc_matching.py +++ b/datatypes/epc/domain/tests/test_historic_epc_matching.py @@ -1,3 +1,4 @@ +from typing import Optional from unittest.mock import patch import numpy as np @@ -13,40 +14,103 @@ from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) - # Columns required by the HistoricEpc dataclass (lower-cased CSV columns). # The matcher only reads ADDRESS + UPRN to score; everything else is filled # with "" but must be present for HistoricEpc(**kwargs) to construct. _FULL_COLUMN_FIELDS = [ - "LMK_KEY", "ADDRESS1", "ADDRESS2", "ADDRESS3", "POSTCODE", - "BUILDING_REFERENCE_NUMBER", "CURRENT_ENERGY_RATING", "POTENTIAL_ENERGY_RATING", - "CURRENT_ENERGY_EFFICIENCY", "POTENTIAL_ENERGY_EFFICIENCY", "PROPERTY_TYPE", - "BUILT_FORM", "INSPECTION_DATE", "LOCAL_AUTHORITY", "CONSTITUENCY", "COUNTY", - "LODGEMENT_DATE", "TRANSACTION_TYPE", "ENVIRONMENT_IMPACT_CURRENT", - "ENVIRONMENT_IMPACT_POTENTIAL", "ENERGY_CONSUMPTION_CURRENT", - "ENERGY_CONSUMPTION_POTENTIAL", "CO2_EMISSIONS_CURRENT", - "CO2_EMISS_CURR_PER_FLOOR_AREA", "CO2_EMISSIONS_POTENTIAL", - "LIGHTING_COST_CURRENT", "LIGHTING_COST_POTENTIAL", "HEATING_COST_CURRENT", - "HEATING_COST_POTENTIAL", "HOT_WATER_COST_CURRENT", "HOT_WATER_COST_POTENTIAL", - "TOTAL_FLOOR_AREA", "ENERGY_TARIFF", "MAINS_GAS_FLAG", "FLOOR_LEVEL", - "FLAT_TOP_STOREY", "FLAT_STOREY_COUNT", "MAIN_HEATING_CONTROLS", - "MULTI_GLAZE_PROPORTION", "GLAZED_TYPE", "GLAZED_AREA", "EXTENSION_COUNT", - "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "LOW_ENERGY_LIGHTING", - "NUMBER_OPEN_FIREPLACES", "HOTWATER_DESCRIPTION", "HOT_WATER_ENERGY_EFF", - "HOT_WATER_ENV_EFF", "FLOOR_DESCRIPTION", "FLOOR_ENERGY_EFF", "FLOOR_ENV_EFF", - "WINDOWS_DESCRIPTION", "WINDOWS_ENERGY_EFF", "WINDOWS_ENV_EFF", - "WALLS_DESCRIPTION", "WALLS_ENERGY_EFF", "WALLS_ENV_EFF", - "SECONDHEAT_DESCRIPTION", "SHEATING_ENERGY_EFF", "SHEATING_ENV_EFF", - "ROOF_DESCRIPTION", "ROOF_ENERGY_EFF", "ROOF_ENV_EFF", "MAINHEAT_DESCRIPTION", - "MAINHEAT_ENERGY_EFF", "MAINHEAT_ENV_EFF", "MAINHEATCONT_DESCRIPTION", - "MAINHEATC_ENERGY_EFF", "MAINHEATC_ENV_EFF", "LIGHTING_DESCRIPTION", - "LIGHTING_ENERGY_EFF", "LIGHTING_ENV_EFF", "MAIN_FUEL", "WIND_TURBINE_COUNT", - "HEAT_LOSS_CORRIDOR", "UNHEATED_CORRIDOR_LENGTH", "FLOOR_HEIGHT", - "PHOTO_SUPPLY", "SOLAR_WATER_HEATING_FLAG", "MECHANICAL_VENTILATION", - "ADDRESS", "LOCAL_AUTHORITY_LABEL", "CONSTITUENCY_LABEL", "POSTTOWN", - "CONSTRUCTION_AGE_BAND", "LODGEMENT_DATETIME", "TENURE", - "FIXED_LIGHTING_OUTLETS_COUNT", "LOW_ENERGY_FIXED_LIGHT_COUNT", "UPRN", - "UPRN_SOURCE", "REPORT_TYPE", + "LMK_KEY", + "ADDRESS1", + "ADDRESS2", + "ADDRESS3", + "POSTCODE", + "BUILDING_REFERENCE_NUMBER", + "CURRENT_ENERGY_RATING", + "POTENTIAL_ENERGY_RATING", + "CURRENT_ENERGY_EFFICIENCY", + "POTENTIAL_ENERGY_EFFICIENCY", + "PROPERTY_TYPE", + "BUILT_FORM", + "INSPECTION_DATE", + "LOCAL_AUTHORITY", + "CONSTITUENCY", + "COUNTY", + "LODGEMENT_DATE", + "TRANSACTION_TYPE", + "ENVIRONMENT_IMPACT_CURRENT", + "ENVIRONMENT_IMPACT_POTENTIAL", + "ENERGY_CONSUMPTION_CURRENT", + "ENERGY_CONSUMPTION_POTENTIAL", + "CO2_EMISSIONS_CURRENT", + "CO2_EMISS_CURR_PER_FLOOR_AREA", + "CO2_EMISSIONS_POTENTIAL", + "LIGHTING_COST_CURRENT", + "LIGHTING_COST_POTENTIAL", + "HEATING_COST_CURRENT", + "HEATING_COST_POTENTIAL", + "HOT_WATER_COST_CURRENT", + "HOT_WATER_COST_POTENTIAL", + "TOTAL_FLOOR_AREA", + "ENERGY_TARIFF", + "MAINS_GAS_FLAG", + "FLOOR_LEVEL", + "FLAT_TOP_STOREY", + "FLAT_STOREY_COUNT", + "MAIN_HEATING_CONTROLS", + "MULTI_GLAZE_PROPORTION", + "GLAZED_TYPE", + "GLAZED_AREA", + "EXTENSION_COUNT", + "NUMBER_HABITABLE_ROOMS", + "NUMBER_HEATED_ROOMS", + "LOW_ENERGY_LIGHTING", + "NUMBER_OPEN_FIREPLACES", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "HOT_WATER_ENV_EFF", + "FLOOR_DESCRIPTION", + "FLOOR_ENERGY_EFF", + "FLOOR_ENV_EFF", + "WINDOWS_DESCRIPTION", + "WINDOWS_ENERGY_EFF", + "WINDOWS_ENV_EFF", + "WALLS_DESCRIPTION", + "WALLS_ENERGY_EFF", + "WALLS_ENV_EFF", + "SECONDHEAT_DESCRIPTION", + "SHEATING_ENERGY_EFF", + "SHEATING_ENV_EFF", + "ROOF_DESCRIPTION", + "ROOF_ENERGY_EFF", + "ROOF_ENV_EFF", + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEAT_ENV_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAINHEATC_ENV_EFF", + "LIGHTING_DESCRIPTION", + "LIGHTING_ENERGY_EFF", + "LIGHTING_ENV_EFF", + "MAIN_FUEL", + "WIND_TURBINE_COUNT", + "HEAT_LOSS_CORRIDOR", + "UNHEATED_CORRIDOR_LENGTH", + "FLOOR_HEIGHT", + "PHOTO_SUPPLY", + "SOLAR_WATER_HEATING_FLAG", + "MECHANICAL_VENTILATION", + "ADDRESS", + "LOCAL_AUTHORITY_LABEL", + "CONSTITUENCY_LABEL", + "POSTTOWN", + "CONSTRUCTION_AGE_BAND", + "LODGEMENT_DATETIME", + "TENURE", + "FIXED_LIGHTING_OUTLETS_COUNT", + "LOW_ENERGY_FIXED_LIGHT_COUNT", + "UPRN", + "UPRN_SOURCE", + "REPORT_TYPE", ] @@ -63,7 +127,9 @@ def _build_df(rows: list[dict]) -> pd.DataFrame: @pytest.fixture def patch_postcode_valid(): - with patch.object(matcher_mod.AddressMatch, "is_valid_postcode", return_value=True) as m: + with patch.object( + matcher_mod.AddressMatch, "is_valid_postcode", return_value=True + ) as m: yield m @@ -106,10 +172,12 @@ class TestMatchAddressesForPostcode: self, patch_read, patch_postcode_valid ): # Disjoint number sets => hard zero. Still kept in matches. - patch_read.return_value = _build_df([ - _row("47 GORDON ROAD", "100"), - _row("999 SOMEWHERE ELSE", "200"), - ]) + patch_read.return_value = _build_df( + [ + _row("47 GORDON ROAD", "100"), + _row("999 SOMEWHERE ELSE", "200"), + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") assert isinstance(result, HistoricEpcMatches) assert len(result.matches) == 2 @@ -117,10 +185,12 @@ class TestMatchAddressesForPostcode: def test_top_has_lexirank_one_and_lexiscore_monotone( self, patch_read, patch_postcode_valid ): - patch_read.return_value = _build_df([ - _row("48 GORDON ROAD", "200"), # near miss - _row("47 GORDON ROAD", "100"), # exact (after normalisation) - ]) + patch_read.return_value = _build_df( + [ + _row("48 GORDON ROAD", "200"), # near miss + _row("47 GORDON ROAD", "100"), # exact (after normalisation) + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") assert result.top().lexirank == 1 scores = [m.lexiscore for m in result.matches] @@ -173,19 +243,23 @@ class TestMatchAddressesForPostcode: class TestUnambiguousUprn: def test_exact_match_returns_uprn(self, patch_read, patch_postcode_valid): - patch_read.return_value = _build_df([ - _row("47 GORDON ROAD", "100"), - _row("48 GORDON ROAD", "200"), - ]) + patch_read.return_value = _build_df( + [ + _row("47 GORDON ROAD", "100"), + _row("48 GORDON ROAD", "200"), + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") assert result.unambiguous_uprn() == "100" def test_ambiguous_tie_returns_none(self, patch_read, patch_postcode_valid): # Two duplicate addresses with different UPRNs share rank-1. - patch_read.return_value = _build_df([ - _row("47 GORDON ROAD", "100"), - _row("47 GORDON ROAD", "200"), - ]) + patch_read.return_value = _build_df( + [ + _row("47 GORDON ROAD", "100"), + _row("47 GORDON ROAD", "200"), + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") assert result.unambiguous_uprn() is None @@ -193,10 +267,12 @@ class TestUnambiguousUprn: self, patch_read, patch_postcode_valid ): # User address has building number 47; no row has 47 -> all hard-zero. - patch_read.return_value = _build_df([ - _row("999 ELSEWHERE", "100"), - _row("888 ELSEWHERE", "200"), - ]) + patch_read.return_value = _build_df( + [ + _row("999 ELSEWHERE", "100"), + _row("888 ELSEWHERE", "200"), + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") assert all(m.lexiscore == 0.0 for m in result.matches) assert result.unambiguous_uprn() is None @@ -205,15 +281,22 @@ class TestUnambiguousUprn: self, patch_read, patch_postcode_valid ): # Use a real NaN in the UPRN cell. - patch_read.return_value = _build_df([ - _row("47 GORDON ROAD", np.nan), - _row("48 GORDON ROAD", "200"), - ]) - result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") - top = result.top() + patch_read.return_value = _build_df( + [ + _row("47 GORDON ROAD", np.nan), + _row("48 GORDON ROAD", "200"), + ] + ) + result: HistoricEpcMatches = match_addresses_for_postcode( + "47 Gordon Road", "AB33 8AL" + ) + top: Optional[ScoredHistoricEpc] = result.top() # pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"), # so unambiguous_uprn's truthiness check correctly drops the row. - assert top.record.uprn == "" + if top: + assert top.record.uprn == "" + else: + pytest.fail("should have an epc score, no results found :(") # ---------- top / top_n ---------- @@ -222,11 +305,13 @@ class TestUnambiguousUprn: class TestTopHelpers: def test_top_n_returns_first_k(self, patch_read, patch_postcode_valid): - patch_read.return_value = _build_df([ - _row("47 GORDON ROAD", "100"), - _row("48 GORDON ROAD", "200"), - _row("49 GORDON ROAD", "300"), - ]) + patch_read.return_value = _build_df( + [ + _row("47 GORDON ROAD", "100"), + _row("48 GORDON ROAD", "200"), + _row("49 GORDON ROAD", "300"), + ] + ) result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL") top2 = result.top_n(2) assert len(top2) == 2 From 2c5c8337cc907e419277c0ef8e95f6eedb8c99ab Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:01:25 +0000 Subject: [PATCH 25/44] added more type hints --- backend/address2UPRN/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 642733a7..8832e157 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -68,15 +68,14 @@ def get_uprn_from_historic_epc( top: Optional[ScoredHistoricEpc] = result.top() if top is None: return None - - return (uprn, top.record.address, top.lexiscore) + return uprn, top.record.address, top.lexiscore def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, verbose: bool = False, -): +) -> Optional[str | tuple[str, str, float]]: """ Return uprn (str) using a pre-fetched EPC dataframe. This avoids calling the API multiple times for the same postcode. @@ -137,7 +136,7 @@ def get_uprn( """ df = get_epc_data_with_postcode(postcode=postcode) - result = get_uprn_with_epc_df( + result: Optional[] = get_uprn_with_epc_df( user_inputed_address=user_inputed_address, epc_df=df, verbose=True, @@ -445,7 +444,7 @@ def handler(event, context, local=False): continue # Get UPRN using the pre-fetched EPC data with all return options - result = get_uprn_with_epc_df( + result: Optional[tuple[str, str, float]] = get_uprn_with_epc_df( user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True, @@ -562,3 +561,4 @@ def handler(event, context, local=False): # Don't add results to return messages as its too verbose # capture the exepection as e, into s3, to find the logs go to s3 # Upload results to s3 as well as csv + From 8635e2a1aaf2072d4fc09e7fe7bc0de8984b71ea Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:08:00 +0000 Subject: [PATCH 26/44] change file name of epc client service --- backend/address2UPRN/main.py | 2 +- backend/epc_client/__init__.py | 2 +- backend/epc_client/client.py | 99 ------------------------- backend/epc_client/tests/conftest.py | 2 +- backend/epc_client/tests/test_client.py | 30 ++++++-- backend/utils/epc_address_match.py | 2 +- 6 files changed, 28 insertions(+), 109 deletions(-) delete mode 100644 backend/epc_client/client.py diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 8832e157..7e0baeaa 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -23,7 +23,7 @@ from backend.utils.addressMatch import ( from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) -from backend.epc_client.client import EpcClientService +from backend.epc_client.epc_client_service import EpcClientService from datatypes.epc.domain.historic_epc_matching import ScoredHistoricEpc logger = setup_logger() diff --git a/backend/epc_client/__init__.py b/backend/epc_client/__init__.py index ab46a266..84062592 100644 --- a/backend/epc_client/__init__.py +++ b/backend/epc_client/__init__.py @@ -1,3 +1,3 @@ -from backend.epc_client.client import EpcClientService +from backend.epc_client.epc_client_service import EpcClientService __all__ = ["EpcClientService"] diff --git a/backend/epc_client/client.py b/backend/epc_client/client.py deleted file mode 100644 index d00a164f..00000000 --- a/backend/epc_client/client.py +++ /dev/null @@ -1,99 +0,0 @@ -# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml -from __future__ import annotations - -from typing import Any, Optional - -import httpx - -from backend.epc_client.exceptions import ( - EpcApiError, - EpcNotFoundError, - EpcRateLimitError, -) -from backend.epc_client._retry import call_with_retry -from datatypes.epc.domain.epc_property_data import EpcPropertyData -from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from datatypes.epc.search import EpcSearchResult - - -class EpcClientService: - BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" - - def __init__(self, auth_token: str) -> None: - self._headers = { - "Authorization": f"Bearer {auth_token}", - "Accept": "application/json", - } - - def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: - raw = call_with_retry(lambda: self._fetch_certificate(cert_num)) - return EpcPropertyDataMapper.from_api_response(raw) - - def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: - results = call_with_retry(lambda: self._search(uprn=uprn)) - if not results: - return None - latest = max(results, key=lambda r: r.registration_date) - return self.get_by_certificate_number(latest.certificate_number) - - def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: - return call_with_retry(lambda: self._search(postcode=postcode)) - - # ------------------------------------------------------------------ - # Private helpers - # ------------------------------------------------------------------ - - def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: - resp = httpx.get( - f"{self.BASE_URL}/api/certificate", - params={"certificate_number": cert_num}, - headers=self._headers, - ) - if resp.status_code == 404: - raise EpcNotFoundError(cert_num) - if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") - if not resp.is_success: - raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") - return resp.json()["data"] - - def _search( - self, - postcode: Optional[str] = None, - uprn: Optional[int] = None, - ) -> list[EpcSearchResult]: - params: dict[str, str | int] = {} - if postcode: - params["postcode"] = postcode - if uprn is not None: - params["uprn"] = uprn - - resp = httpx.get( - f"{self.BASE_URL}/api/domestic/search", - params=params, - headers=self._headers, - ) - if resp.status_code == 404: - return [] - if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") - if not resp.is_success: - raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") - - rows = resp.json().get("data", []) - return [self._parse_search_result(r) for r in rows] - - @staticmethod - def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult: - return EpcSearchResult( - certificate_number=row["certificateNumber"], - address_line_1=row["addressLine1"], - address_line_2=row.get("addressLine2"), - address_line_3=row.get("addressLine3"), - address_line_4=row.get("addressLine4"), - postcode=row["postcode"], - post_town=row["postTown"], - uprn=row.get("uprn"), - current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], - registration_date=row["registrationDate"], - ) diff --git a/backend/epc_client/tests/conftest.py b/backend/epc_client/tests/conftest.py index 2ed444af..2dab138e 100644 --- a/backend/epc_client/tests/conftest.py +++ b/backend/epc_client/tests/conftest.py @@ -2,7 +2,7 @@ import json import pathlib import pytest -from backend.epc_client.client import EpcClientService +from backend.epc_client.epc_client_service import EpcClientService SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples") diff --git a/backend/epc_client/tests/test_client.py b/backend/epc_client/tests/test_client.py index 7933f21d..849b4a25 100644 --- a/backend/epc_client/tests/test_client.py +++ b/backend/epc_client/tests/test_client.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock, patch, call import pytest -from backend.epc_client.client import EpcClientService +from backend.epc_client.epc_client_service import EpcClientService from backend.utils.epc_address_match import find_best_epc_match from datatypes.epc.search import EpcSearchResult from backend.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError @@ -22,7 +22,10 @@ def _mock_response(status_code=200, json_data=None): # Test 1: get_by_certificate_number happy path # --------------------------------------------------------------------------- -def test_get_by_certificate_number_returns_epc_property_data(epc_service, rdsap_21_0_1_cert): + +def test_get_by_certificate_number_returns_epc_property_data( + epc_service, rdsap_21_0_1_cert +): cert_response = {"data": rdsap_21_0_1_cert} with patch("httpx.get", return_value=_mock_response(200, cert_response)): result = epc_service.get_by_certificate_number("CERT-001") @@ -34,6 +37,7 @@ def test_get_by_certificate_number_returns_epc_property_data(epc_service, rdsap_ # Test 2: get_by_certificate_number 404 → EpcNotFoundError # --------------------------------------------------------------------------- + def test_get_by_certificate_number_404_raises_not_found(epc_service): with patch("httpx.get", return_value=_mock_response(404)): with pytest.raises(EpcNotFoundError): @@ -44,7 +48,10 @@ def test_get_by_certificate_number_404_raises_not_found(epc_service): # Test 3: 429 retried, succeeds on 3rd attempt # --------------------------------------------------------------------------- -def test_get_by_certificate_number_retries_on_429_and_succeeds(epc_service, rdsap_21_0_1_cert): + +def test_get_by_certificate_number_retries_on_429_and_succeeds( + epc_service, rdsap_21_0_1_cert +): cert_response = {"data": rdsap_21_0_1_cert} responses = [ _mock_response(429), @@ -61,6 +68,7 @@ def test_get_by_certificate_number_retries_on_429_and_succeeds(epc_service, rdsa # Test 4: get_by_uprn empty search → None # --------------------------------------------------------------------------- + def test_get_by_uprn_returns_none_when_no_results(epc_service): with patch("httpx.get", return_value=_mock_response(200, {"data": []})): result = epc_service.get_by_uprn(100023336956) @@ -72,6 +80,7 @@ def test_get_by_uprn_returns_none_when_no_results(epc_service): # Test 5: get_by_uprn multiple results → fetches latest by registration_date # --------------------------------------------------------------------------- + def test_get_by_uprn_picks_most_recent_certificate(epc_service, rdsap_21_0_1_cert): search_rows = [ make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"), @@ -98,6 +107,7 @@ def test_get_by_uprn_picks_most_recent_certificate(epc_service, rdsap_21_0_1_cer # Test 6: search_by_postcode returns list[EpcSearchResult] # --------------------------------------------------------------------------- + def test_search_by_postcode_returns_results(epc_service): rows = [ make_search_row(cert_num="CERT-A", address_line_1="1 High Street"), @@ -116,6 +126,7 @@ def test_search_by_postcode_returns_results(epc_service): # Test 7: search_by_postcode 404 → empty list # --------------------------------------------------------------------------- + def test_search_by_postcode_404_returns_empty_list(epc_service): with patch("httpx.get", return_value=_mock_response(404)): results = epc_service.search_by_postcode("ZZ9 9ZZ") @@ -127,6 +138,7 @@ def test_search_by_postcode_404_returns_empty_list(epc_service): # Tests 8-10: find_best_epc_match — real scoring, only HTTP mocked # --------------------------------------------------------------------------- + def test_find_best_match_clear_winner_on_first_pass(epc_service, rdsap_21_0_1_cert): search_rows = [ make_search_row(cert_num="CERT-WIN", address_line_1="1 High Street"), @@ -145,7 +157,9 @@ def test_find_best_match_clear_winner_on_first_pass(epc_service, rdsap_21_0_1_ce assert isinstance(result, EpcPropertyData) -def test_find_best_match_resolves_on_second_pass_using_full_address(epc_service, rdsap_21_0_1_cert): +def test_find_best_match_resolves_on_second_pass_using_full_address( + epc_service, rdsap_21_0_1_cert +): # Both candidates share address_line_1 — round 1 is ambiguous. # Round 2 scores against full_address and picks the correct floor. search_rows = [ @@ -168,7 +182,9 @@ def test_find_best_match_resolves_on_second_pass_using_full_address(epc_service, return _mock_response(200, cert_response) with patch("httpx.get", side_effect=fake_get): - result = find_best_epc_match(epc_service, "SW1A 1AA", "1 High Street Ground Floor") + result = find_best_epc_match( + epc_service, "SW1A 1AA", "1 High Street Ground Floor" + ) assert isinstance(result, EpcPropertyData) @@ -177,6 +193,8 @@ def test_find_best_match_returns_none_when_no_good_match(epc_service): search_rows = [make_search_row(cert_num="CERT-X", address_line_1="99 Nowhere Lane")] with patch("httpx.get", return_value=_mock_response(200, {"data": search_rows})): - result = find_best_epc_match(epc_service, "SW1A 1AA", "1 Completely Different Road") + result = find_best_epc_match( + epc_service, "SW1A 1AA", "1 Completely Different Road" + ) assert result is None diff --git a/backend/utils/epc_address_match.py b/backend/utils/epc_address_match.py index f73d6d1d..0df56eca 100644 --- a/backend/utils/epc_address_match.py +++ b/backend/utils/epc_address_match.py @@ -7,7 +7,7 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.search import EpcSearchResult if TYPE_CHECKING: - from backend.epc_client.client import EpcClientService + from backend.epc_client.epc_client_service import EpcClientService _MIN_MATCH_SCORE = 0.6 From f52fe001cc4b8077ffb8bb16affa3ed0d960482c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:14:16 +0000 Subject: [PATCH 27/44] renamed file --- backend/epc_client/epc_client_service.py | 99 ++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 backend/epc_client/epc_client_service.py diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py new file mode 100644 index 00000000..d00a164f --- /dev/null +++ b/backend/epc_client/epc_client_service.py @@ -0,0 +1,99 @@ +# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml +from __future__ import annotations + +from typing import Any, Optional + +import httpx + +from backend.epc_client.exceptions import ( + EpcApiError, + EpcNotFoundError, + EpcRateLimitError, +) +from backend.epc_client._retry import call_with_retry +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.search import EpcSearchResult + + +class EpcClientService: + BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" + + def __init__(self, auth_token: str) -> None: + self._headers = { + "Authorization": f"Bearer {auth_token}", + "Accept": "application/json", + } + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: + raw = call_with_retry(lambda: self._fetch_certificate(cert_num)) + return EpcPropertyDataMapper.from_api_response(raw) + + def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]: + results = call_with_retry(lambda: self._search(uprn=uprn)) + if not results: + return None + latest = max(results, key=lambda r: r.registration_date) + return self.get_by_certificate_number(latest.certificate_number) + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + return call_with_retry(lambda: self._search(postcode=postcode)) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: + resp = httpx.get( + f"{self.BASE_URL}/api/certificate", + params={"certificate_number": cert_num}, + headers=self._headers, + ) + if resp.status_code == 404: + raise EpcNotFoundError(cert_num) + if resp.status_code == 429: + raise EpcRateLimitError("Rate limited by EPC API") + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + return resp.json()["data"] + + def _search( + self, + postcode: Optional[str] = None, + uprn: Optional[int] = None, + ) -> list[EpcSearchResult]: + params: dict[str, str | int] = {} + if postcode: + params["postcode"] = postcode + if uprn is not None: + params["uprn"] = uprn + + resp = httpx.get( + f"{self.BASE_URL}/api/domestic/search", + params=params, + headers=self._headers, + ) + if resp.status_code == 404: + return [] + if resp.status_code == 429: + raise EpcRateLimitError("Rate limited by EPC API") + if not resp.is_success: + raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") + + rows = resp.json().get("data", []) + return [self._parse_search_result(r) for r in rows] + + @staticmethod + def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult: + return EpcSearchResult( + certificate_number=row["certificateNumber"], + address_line_1=row["addressLine1"], + address_line_2=row.get("addressLine2"), + address_line_3=row.get("addressLine3"), + address_line_4=row.get("addressLine4"), + postcode=row["postcode"], + post_town=row["postTown"], + uprn=row.get("uprn"), + current_energy_efficiency_band=row["currentEnergyEfficiencyBand"], + registration_date=row["registrationDate"], + ) From b364df89ad9bba94479554aa1c48b75aabb4c811 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:31:54 +0000 Subject: [PATCH 28/44] forgot to add tuple typing --- backend/address2UPRN/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 7e0baeaa..7ac5a54e 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -136,7 +136,7 @@ def get_uprn( """ df = get_epc_data_with_postcode(postcode=postcode) - result: Optional[] = get_uprn_with_epc_df( + result: Optional[tuple[str, str, float]] = get_uprn_with_epc_df( user_inputed_address=user_inputed_address, epc_df=df, verbose=True, @@ -561,4 +561,3 @@ def handler(event, context, local=False): # Don't add results to return messages as its too verbose # capture the exepection as e, into s3, to find the logs go to s3 # Upload results to s3 as well as csv - From bec5c4f3c3bb4f2fc63167c8115188c3cd5a1c62 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:51:27 +0000 Subject: [PATCH 29/44] one place to have df_has_single_uprn --- CLAUDE.md | 1 + backend/address2UPRN/main.py | 3 +-- backend/utils/addressMatch.py | 10 ---------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 23d465a7..f88a59d5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -62,6 +62,7 @@ bash .devcontainer/backend/install-claude-skills.sh ## Type Safety All new code must pass `pyright` with zero errors under `typeCheckingMode = strict`. +Use Optional over | None Annotate all function return types. Use `dict[str, Any]` for untyped external API payloads — never bare `dict`. Add `pandas-stubs` when introducing pandas to a module. diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 7ac5a54e..b2cb4d98 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -17,9 +17,8 @@ from datetime import datetime from backend.utils.addressMatch import ( AddressMatch, get_uprn_candidates, - df_has_single_uprn, - score_addresses, ) +from backend.address2UPRN.scoring import df_has_single_uprn from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index ee9d1004..7618e9ac 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -259,13 +259,3 @@ def get_uprn_candidates( out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) - - -def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: - """Returns True if all non-null UPRNs in df match the given uprn.""" - if column not in df.columns: - return False - uprns = df[column].dropna().astype(str).str.strip().unique() - if len(uprns) == 0: - return False - return len(uprns) == 1 and uprns[0] == str(uprn) From 35fea20fc7e2bbdc51f1da2a3218105e518c9b38 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 10:54:45 +0000 Subject: [PATCH 30/44] changed function name --- backend/address2UPRN/main.py | 6 +++--- backend/address2UPRN/scoring.py | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index b2cb4d98..6006fec1 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -18,7 +18,7 @@ from backend.utils.addressMatch import ( AddressMatch, get_uprn_candidates, ) -from backend.address2UPRN.scoring import df_has_single_uprn +from backend.address2UPRN.scoring import all_uprns_match from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) @@ -98,7 +98,7 @@ def get_uprn_with_epc_df( top_rank_df = scored_df[scored_df["lexirank"] == 1] # If rank-1 rows do not agree on a single UPRN → ambiguous - if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]): + if not all_uprns_match(top_rank_df, target_uprn=top_rank_df.iloc[0]["uprn"]): return None address = top_rank_df["address"].values[0] @@ -207,7 +207,7 @@ def resolve_uprns_for_postcode_group( top_rank_df = scored_df[scored_df["lexirank"] == 1] - if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]): + if not all_uprns_match(top_rank_df, top_rank_df.iloc[0]["uprn"]): results.append( { "found_uprn": None, diff --git a/backend/address2UPRN/scoring.py b/backend/address2UPRN/scoring.py index d31b9aea..bfda2e71 100644 --- a/backend/address2UPRN/scoring.py +++ b/backend/address2UPRN/scoring.py @@ -3,12 +3,11 @@ import pandas as pd from backend.utils.addressMatch import AddressMatch -def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: - """ - Returns True if all non-null UPRNs in df match the given uprn. - Returns False otherwise. - """ - +def all_uprns_match( + df: pd.DataFrame, + target_uprn: str, + column: str = "uprn", +) -> bool: if column not in df.columns: return False @@ -17,7 +16,7 @@ def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> boo if len(uprns) == 0: return False - return len(uprns) == 1 and uprns[0] == str(uprn) + return len(uprns) == 1 and uprns[0] == str(target_uprn) def get_uprn_candidates( From 18ea95b67d3e15c41dbe57fa4228bd21a762719b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 12:34:17 +0000 Subject: [PATCH 31/44] added env variables for boto --- .../backend/install-claude-skills.sh | 14 ------ .github/workflows/unit_tests.yml | 3 ++ backend/address2UPRN/tests/test_csv.py | 47 ++++--------------- 3 files changed, 13 insertions(+), 51 deletions(-) delete mode 100755 .devcontainer/backend/install-claude-skills.sh diff --git a/.devcontainer/backend/install-claude-skills.sh b/.devcontainer/backend/install-claude-skills.sh deleted file mode 100755 index a54f69e0..00000000 --- a/.devcontainer/backend/install-claude-skills.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -# Run this in an existing container to install the mattpocock skills -# without rebuilding the image. New containers get them automatically via Dockerfile. -set -euo pipefail - -echo "Installing Claude Code skills (mattpocock/skills)..." - -npx skills@latest add --global --yes mattpocock/skills/grill-me -npx skills@latest add --global --yes mattpocock/skills/to-prd -npx skills@latest add --global --yes mattpocock/skills/ubiquitous-language -npx skills@latest add --global --yes mattpocock/skills/tdd -npx skills@latest add --global --yes mattpocock/skills/improve-codebase-architecture - -echo "Done. Available: /grill-me /to-prd /ubiquitous-language /tdd /improve-codebase-architecture" diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index e1f4fb48..fa4fdf2a 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -51,6 +51,9 @@ jobs: -e EPC_AUTH_TOKEN=${{ secrets.DEV_EPC_AUTH_TOKEN }} \ -e OPEN_EPC_API_TOKEN=${{ secrets.DEV_OPEN_EPC_API_TOKEN }} \ -e HUBSPOT_API_KEY=${{ secrets.HUBSPOT_API_KEY }} \ + -e AWS_ACCESS_KEY_ID=${{ secrets.DEV_AWS_ACCESS_KEY_ID }} \ + -e AWS_SECRET_ACCESS_KEY=${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} \ + -e AWS_DEFAULT_REGION=${{ secrets.DEV_AWS_REGION }} \ -e DB_HOST=localhost \ -e DB_NAME=test \ -e DB_USERNAME=test \ diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py index d8f54c39..70e7a9f9 100644 --- a/backend/address2UPRN/tests/test_csv.py +++ b/backend/address2UPRN/tests/test_csv.py @@ -1,54 +1,25 @@ # tests/test_address_to_uprn_csv.py import csv -import json import pytest -from datetime import date from pathlib import Path from backend.address2UPRN.main import get_uprn FIXTURE_PATH = Path(__file__).parent / "test_data.csv" -SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json" -NEW_API_CUTOFF = date(2012, 1, 1) - - -def _load_sidecar() -> dict: - if SIDECAR_PATH.exists(): - return json.loads(SIDECAR_PATH.read_text()) - return {} def load_test_cases(): - sidecar = _load_sidecar() with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) - cases = [] - for row in reader: - key = f"{row['User Input']}|{row['Postcode']}" - entry = sidecar.get(key, {}) - lodgement_date = entry.get("lodgement_date") - - marks = [] - if lodgement_date: - parsed = date.fromisoformat(lodgement_date[:10]) - if parsed < NEW_API_CUTOFF: - marks.append( - pytest.mark.xfail( - reason=f"EPC lodged {lodgement_date} — predates new API coverage (Jan 2012)", - strict=False, - ) - ) - - cases.append( - pytest.param( - row["User Input"], - row["Postcode"], - row["Manual UPRN Code"], - id=f'{row["User Input"]} [{row["Postcode"]}]', - marks=marks, - ) + return [ + pytest.param( + row["User Input"], + row["Postcode"], + row["Manual UPRN Code"], + id=f'{row["User Input"]} [{row["Postcode"]}]', ) - return cases + for row in reader + ] @pytest.mark.parametrize( @@ -60,6 +31,8 @@ def test_uprn_resolution_matches_manual( postcode: str, expected_uprn: str, ): + from utils.logger import setup_logger + uprn = get_uprn(user_input, postcode) if uprn: assert uprn == expected_uprn From 5cd21d85224ab84a84e2ebbecbcd7a117c15dbc3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 12:55:50 +0000 Subject: [PATCH 32/44] get rid of khalim's json --- .../tests/test_lodgement_dates.json | 1230 ----------------- 1 file changed, 1230 deletions(-) delete mode 100644 backend/address2UPRN/tests/test_lodgement_dates.json diff --git a/backend/address2UPRN/tests/test_lodgement_dates.json b/backend/address2UPRN/tests/test_lodgement_dates.json deleted file mode 100644 index c58be704..00000000 --- a/backend/address2UPRN/tests/test_lodgement_dates.json +++ /dev/null @@ -1,1230 +0,0 @@ -{ - "47 The Fairway|OX16 0RR": { - "lodgement_date": "2010-03-16", - "found_in_old_api": true - }, - "11 REGENT COURT|SL1 3LG": { - "lodgement_date": "2022-05-04", - "found_in_old_api": true - }, - "3/137a Windmill Road|TW8 9NH": { - "lodgement_date": "2025-01-30", - "found_in_old_api": true - }, - "Flat 33|SW18 4BE": { - "lodgement_date": "2022-04-27", - "found_in_old_api": true - }, - "FLAT 1 Brendon Grove|N2 8JE": { - "lodgement_date": "2011-02-17", - "found_in_old_api": true - }, - "Flat 15|KT8 2NE": { - "lodgement_date": "2018-03-26", - "found_in_old_api": true - }, - "FLAT 5 Stonehill Road|W4 3AH": { - "lodgement_date": "2025-09-22", - "found_in_old_api": true - }, - "Flat 10|W4 3AH": { - "lodgement_date": "2023-06-15", - "found_in_old_api": true - }, - "Flat 11|W4 3AH": { - "lodgement_date": "2023-10-19", - "found_in_old_api": true - }, - "Flat 12, Forbes House|W4 3AH": { - "lodgement_date": "2023-10-04", - "found_in_old_api": true - }, - "Flat 13|W4 3AH": { - "lodgement_date": "2012-05-14", - "found_in_old_api": true - }, - "Flat 14|W4 3AH": { - "lodgement_date": "2022-10-15", - "found_in_old_api": true - }, - "Flat 15|W4 3AH": { - "lodgement_date": "2009-08-25", - "found_in_old_api": true - }, - "Flat 16|W4 3AH": { - "lodgement_date": "2012-05-23", - "found_in_old_api": true - }, - "Flat 17|W4 3AH": { - "lodgement_date": "2023-08-31", - "found_in_old_api": true - }, - "Flat 19|W4 3AH": { - "lodgement_date": "2025-07-16", - "found_in_old_api": true - }, - "Flat 20|W4 3AH": { - "lodgement_date": "2024-10-27", - "found_in_old_api": true - }, - "Flat 21|W4 3AH": { - "lodgement_date": "2023-08-08", - "found_in_old_api": true - }, - "Flat 22|W4 3AH": { - "lodgement_date": "2022-10-15", - "found_in_old_api": true - }, - "Flat 23|W4 3AH": { - "lodgement_date": "2022-10-15", - "found_in_old_api": true - }, - "Flat 24|W4 3AH": { - "lodgement_date": "2024-01-12", - "found_in_old_api": true - }, - "10 Douglas Court|SL7 1UQ": { - "lodgement_date": "2018-10-25", - "found_in_old_api": true - }, - "1 Windmill Road|HP17 8JA": { - "lodgement_date": "2009-08-25", - "found_in_old_api": true - }, - "31 Denewood|HP13 7LH": { - "lodgement_date": "2009-03-23", - "found_in_old_api": true - }, - "10, Greenways Drive|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 11|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "12, Greenways Drive|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 13|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 14|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 15|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 16|TW4 5DD": { - "lodgement_date": "2025-02-26", - "found_in_old_api": true - }, - "Flat 17|TW4 5DD": { - "lodgement_date": "2012-11-29", - "found_in_old_api": true - }, - "Flat 18|TW4 5DD": { - "lodgement_date": "2022-12-28", - "found_in_old_api": true - }, - "FLAT 1 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 2 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 3 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 4 Goodstone Court|HA1 4FL": { - "lodgement_date": "2022-12-14", - "found_in_old_api": true - }, - "FLAT 5 Goodstone Court|HA1 4FL": { - "lodgement_date": "2016-10-04", - "found_in_old_api": true - }, - "FLAT 6 Goodstone Court|HA1 4FL": { - "lodgement_date": "2024-06-05", - "found_in_old_api": true - }, - "FLAT 7 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 8 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 9 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 10 Goodstone Court|HA1 4FL": { - "lodgement_date": "2023-09-21", - "found_in_old_api": true - }, - "FLAT 11 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 12 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 13 Goodstone Court|HA1 4FL": { - "lodgement_date": "2022-12-13", - "found_in_old_api": true - }, - "FLAT 14 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 15 Goodstone Court|HA1 4FL": { - "lodgement_date": "2024-02-09", - "found_in_old_api": true - }, - "FLAT 16 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 17 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 18 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 19 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 20 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 21 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 22 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 23 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 24 Goodstone Court|HA1 4FL": { - "lodgement_date": "2024-10-24", - "found_in_old_api": true - }, - "FLAT 25 Goodstone Court|HA1 4FL": { - "lodgement_date": "2020-01-18", - "found_in_old_api": true - }, - "FLAT 26 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 27 Goodstone Court|HA1 4FL": { - "lodgement_date": "2022-11-04", - "found_in_old_api": true - }, - "FLAT 28 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 29 Goodstone Court|HA1 4FL": { - "lodgement_date": "2023-10-13", - "found_in_old_api": true - }, - "FLAT 30 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 31 Goodstone Court|HA1 4FL": { - "lodgement_date": "2023-04-19", - "found_in_old_api": true - }, - "FLAT 32 Goodstone Court|HA1 4FL": { - "lodgement_date": "2025-11-18", - "found_in_old_api": true - }, - "FLAT 33 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 34 Goodstone Court|HA1 4FL": { - "lodgement_date": "2022-09-19", - "found_in_old_api": true - }, - "FLAT 35 Goodstone Court|HA1 4FL": { - "lodgement_date": "2021-10-13", - "found_in_old_api": true - }, - "FLAT 36 Goodstone Court|HA1 4FL": { - "lodgement_date": "2022-10-12", - "found_in_old_api": true - }, - "FLAT 37 Goodstone Court|HA1 4FL": { - "lodgement_date": "2024-08-26", - "found_in_old_api": true - }, - "FLAT 38 Goodstone Court|HA1 4FL": { - "lodgement_date": "2023-05-26", - "found_in_old_api": true - }, - "FLAT 39 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 40 Goodstone Court|HA1 4FL": { - "lodgement_date": "2023-10-05", - "found_in_old_api": true - }, - "FLAT 41 Goodstone Court|HA1 4FL": { - "lodgement_date": "2025-11-24", - "found_in_old_api": true - }, - "FLAT 42 Goodstone Court|HA1 4FL": { - "lodgement_date": "2012-11-06", - "found_in_old_api": true - }, - "FLAT 43 Goodstone Court|HA1 4FL": { - "lodgement_date": "2025-07-08", - "found_in_old_api": true - }, - "30c, Bosanquet Close|UB8 3PE": { - "lodgement_date": "2019-05-27", - "found_in_old_api": true - }, - "30e, Bosanquet Close|UB8 3PE": { - "lodgement_date": "2024-07-30", - "found_in_old_api": true - }, - "13 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2025-07-05", - "found_in_old_api": true - }, - "14 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2012-07-18", - "found_in_old_api": true - }, - "15 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2012-06-11", - "found_in_old_api": true - }, - "16 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2022-07-01", - "found_in_old_api": true - }, - "17 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2025-01-07", - "found_in_old_api": true - }, - "18 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2012-07-18", - "found_in_old_api": true - }, - "19 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2025-03-22", - "found_in_old_api": true - }, - "20 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2022-08-15", - "found_in_old_api": true - }, - "21 Stubwick Court, Old Saw Mill Place|HP6 6FF": { - "lodgement_date": "2012-07-18", - "found_in_old_api": true - }, - "90a Murray Road|W5 4DA": { - "lodgement_date": "2013-12-12", - "found_in_old_api": true - }, - "Flat 1, 6 Wolverton Gardens|W5 3LJ": { - "lodgement_date": "2017-10-13", - "found_in_old_api": true - }, - "1, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "10, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "20, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "2, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "3, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "4, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "5, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "6, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "7, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "8, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "9, Monsted House|UB1 1FG": { - "lodgement_date": "2019-02-08", - "found_in_old_api": true - }, - "1 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "2 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "3 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "4 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "5 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "6 Cullis House, 1, Accolade Avenue|UB1 1FH": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "1 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-10", - "found_in_old_api": true - }, - "2 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-10", - "found_in_old_api": true - }, - "3 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "4 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "5 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "6 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "7 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "8 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "9 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "10 Genteel House Samara Drive|UB1 1FJ": { - "lodgement_date": "2019-05-13", - "found_in_old_api": true - }, - "Flat 1 Ash Tree House, 2, Thompson Avenue|SE5 0TE": { - "lodgement_date": "2018-09-05", - "found_in_old_api": true - }, - "Flat 3 ASH TREE HOUSE|SE5 0TE": { - "lodgement_date": "2018-09-05", - "found_in_old_api": true - }, - "Flat 5 ASH TREE HOUSE|SE5 0TE": { - "lodgement_date": "2019-09-12", - "found_in_old_api": true - }, - "Flat 8 ASH TREE HOUSE|SE5 0TE": { - "lodgement_date": "2011-10-26", - "found_in_old_api": true - }, - "Flat 12 ASH TREE HOUSE|SE5 0TE": { - "lodgement_date": "2018-09-05", - "found_in_old_api": true - }, - "FLAT 1 599 HARROW ROAD|W10 4RA": { - "lodgement_date": "2017-01-12", - "found_in_old_api": true - }, - "FLAT 2 599 HARROW ROAD|W10 4RA": { - "lodgement_date": "2020-07-28", - "found_in_old_api": true - }, - "FLAT 5 599 HARROW ROAD|W10 4RA": { - "lodgement_date": "2017-01-12", - "found_in_old_api": true - }, - "Flat 1, Ohio Building|SE13 7RX": { - "lodgement_date": "2023-08-15", - "found_in_old_api": true - }, - "Flat 2, Ohio Building|SE13 7RX": { - "lodgement_date": "2017-06-09", - "found_in_old_api": true - }, - "Apartment 1 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2017-01-05", - "found_in_old_api": true - }, - "Apartment 2 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2014-01-22", - "found_in_old_api": true - }, - "Apartment 3 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 4 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2017-01-05", - "found_in_old_api": true - }, - "Apartment 5 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 6 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 7 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2022-10-24", - "found_in_old_api": true - }, - "Apartment 8 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 9 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 10 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 11 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2011-08-17", - "found_in_old_api": true - }, - "Apartment 12 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 13 Block B, 105, Benwell Road|N7 7BW": { - "lodgement_date": "2009-02-25", - "found_in_old_api": true - }, - "Apartment 1 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2023-07-19", - "found_in_old_api": true - }, - "Apartment 2 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2022-10-20", - "found_in_old_api": true - }, - "Apartment 3 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 4 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 5 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 6 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2012-11-08", - "found_in_old_api": true - }, - "Apartment 7 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2015-08-30", - "found_in_old_api": true - }, - "Apartment 8 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2020-08-02", - "found_in_old_api": true - }, - "Apartment 9 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2021-10-12", - "found_in_old_api": true - }, - "Apartment 10 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 11 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 12 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2022-02-22", - "found_in_old_api": true - }, - "Apartment 13 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 14 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 15 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 16 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2009-05-15", - "found_in_old_api": true - }, - "Apartment 17Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2019-01-22", - "found_in_old_api": true - }, - "Apartment 18 Block D, 32, Hornsey Road|N7 7AT": { - "lodgement_date": "2013-06-03", - "found_in_old_api": true - }, - "FLAT B 158 LEAHURST ROAD|SE13 5NL": { - "lodgement_date": "2014-01-24", - "found_in_old_api": true - }, - "2 COLLEGE HOUSE|CM7 1JS": { - "lodgement_date": "2017-01-12", - "found_in_old_api": true - }, - "3 COLLEGE HOUSE|CM7 1JS": { - "lodgement_date": "2017-01-12", - "found_in_old_api": true - }, - "2 Anita Street|M4 5DU": { - "lodgement_date": "2019-10-18", - "found_in_old_api": true - }, - "5 Anita Street|M4 5DU": { - "lodgement_date": "2012-12-21", - "found_in_old_api": true - }, - "6 Anita Street|M4 5DU": { - "lodgement_date": "2021-02-16", - "found_in_old_api": true - }, - "10 Anita Street|M4 5DU": { - "lodgement_date": "2021-07-01", - "found_in_old_api": true - }, - "12 Anita Street|M4 5DU": { - "lodgement_date": "2025-08-08", - "found_in_old_api": true - }, - "26 Anita Street|M4 5DU": { - "lodgement_date": "2010-06-25", - "found_in_old_api": true - }, - "33 Anita Street|M4 5DU": { - "lodgement_date": "2017-03-10", - "found_in_old_api": true - }, - "35 Anita Street|M4 5DU": { - "lodgement_date": "2015-11-18", - "found_in_old_api": true - }, - "36 Anita Street|M4 5DU": { - "lodgement_date": "2013-09-12", - "found_in_old_api": true - }, - "23 George Leigh Street|M4 5DR": { - "lodgement_date": "2025-03-11", - "found_in_old_api": true - }, - "35 George Leigh Street|M4 5DR": { - "lodgement_date": "2024-05-29", - "found_in_old_api": true - }, - "39 George Leigh Street|M4 5DR": { - "lodgement_date": "2024-05-28", - "found_in_old_api": true - }, - "51 George Leigh Street|M4 5DR": { - "lodgement_date": "2022-02-03", - "found_in_old_api": true - }, - "1a, Victoria Square|M4 5DX": { - "lodgement_date": "2016-01-08", - "found_in_old_api": true - }, - "4a, Victoria Square|M4 5DX": { - "lodgement_date": "2012-09-19", - "found_in_old_api": true - }, - "5a Victoria Square|M4 5DX": { - "lodgement_date": "2012-06-25", - "found_in_old_api": true - }, - " 6a Victoria Square|M4 5DX": { - "lodgement_date": "2023-02-13", - "found_in_old_api": true - }, - "7a Victoria Square|M4 5DX": { - "lodgement_date": "2017-03-15", - "found_in_old_api": true - }, - "8a Victoria Square|M4 5DX": { - "lodgement_date": "2019-11-25", - "found_in_old_api": true - }, - "9a Victoria Square|M4 5DX": { - "lodgement_date": "2026-02-24", - "found_in_old_api": true - }, - "10a Victoria Square|M4 5DX": { - "lodgement_date": "2013-10-16", - "found_in_old_api": true - }, - "11a Victoria Square|M4 5DX": { - "lodgement_date": "2015-11-06", - "found_in_old_api": true - }, - "12a Victoria Square|M4 5DX": { - "lodgement_date": "2022-11-08", - "found_in_old_api": true - }, - "13a Victoria Square|M4 5DX": { - "lodgement_date": "2025-04-27", - "found_in_old_api": true - }, - "14a Victoria Square|M4 5DX": { - "lodgement_date": "2010-11-09", - "found_in_old_api": true - }, - "15a Victoria Square|M4 5DX": { - "lodgement_date": "2012-03-26", - "found_in_old_api": true - }, - "16a Victoria Square|M4 5DX": { - "lodgement_date": "2009-05-28", - "found_in_old_api": true - }, - "17a Victoria Square|M4 5DX": { - "lodgement_date": "2012-12-20", - "found_in_old_api": true - }, - "18a Victoria Square|M4 5DX": { - "lodgement_date": "2022-07-21", - "found_in_old_api": true - }, - "19a Victoria Square|M4 5DX": { - "lodgement_date": "2009-08-18", - "found_in_old_api": true - }, - "20a Victoria Square|M4 5DX": { - "lodgement_date": "2014-05-27", - "found_in_old_api": true - }, - "21a Victoria Square|M4 5DY": { - "lodgement_date": "2010-04-08", - "found_in_old_api": true - }, - "23a Victoria Square|M4 5DY": { - "lodgement_date": "2016-04-05", - "found_in_old_api": true - }, - "24a Victoria Square|M4 5DY": { - "lodgement_date": "2022-03-23", - "found_in_old_api": true - }, - "25a Victoria Square|M4 5DY": { - "lodgement_date": "2024-10-13", - "found_in_old_api": true - }, - "26a Victoria Square|M4 5DY": { - "lodgement_date": "2024-03-25", - "found_in_old_api": true - }, - "27a Victoria Square|M4 5DY": { - "lodgement_date": "2009-10-05", - "found_in_old_api": true - }, - "29a Victoria Square|M4 5DY": { - "lodgement_date": "2024-05-27", - "found_in_old_api": true - }, - "30a Victoria Square|M4 5DY": { - "lodgement_date": "2011-09-07", - "found_in_old_api": true - }, - "31a Victoria Square|M4 5DY": { - "lodgement_date": "2010-12-09", - "found_in_old_api": true - }, - "32a Victoria Square|M4 5DY": { - "lodgement_date": "2021-02-17", - "found_in_old_api": true - }, - "33a Victoria Square|M4 5DY": { - "lodgement_date": "2011-04-05", - "found_in_old_api": true - }, - "34a Victoria Square|M4 5DY": { - "lodgement_date": "2021-08-13", - "found_in_old_api": true - }, - "36a Victoria Square|M4 5DY": { - "lodgement_date": "2011-04-05", - "found_in_old_api": true - }, - "37a Victoria Square|M4 5DY": { - "lodgement_date": "2018-07-02", - "found_in_old_api": true - }, - "38a Victoria Square|M4 5DY": { - "lodgement_date": "2010-02-02", - "found_in_old_api": true - }, - "39a Victoria Square|M4 5DY": { - "lodgement_date": "2018-01-04", - "found_in_old_api": true - }, - "41a Victoria Square|M4 5DY": { - "lodgement_date": "2011-05-23", - "found_in_old_api": true - }, - "42a Victoria Square|M4 5DY": { - "lodgement_date": "2010-10-14", - "found_in_old_api": true - }, - "43a Victoria Square|M4 5DY": { - "lodgement_date": "2018-10-11", - "found_in_old_api": true - }, - "44a Victoria Square|M4 5DY": { - "lodgement_date": "2010-06-08", - "found_in_old_api": true - }, - "45a Victoria Square|M4 5DY": { - "lodgement_date": "2023-03-08", - "found_in_old_api": true - }, - "46a Victoria Square|M4 5DY": { - "lodgement_date": "2010-12-09", - "found_in_old_api": true - }, - "47a Victoria Square|M4 5DY": { - "lodgement_date": "2010-02-09", - "found_in_old_api": true - }, - "48a Victoria Square|M4 5DY": { - "lodgement_date": "2011-04-12", - "found_in_old_api": true - }, - "49a Victoria Square|M4 5DY": { - "lodgement_date": "2010-11-09", - "found_in_old_api": true - }, - "50a Victoria Square|M4 5DY": { - "lodgement_date": "2025-09-06", - "found_in_old_api": true - }, - "51a Victoria Square|M4 5DY": { - "lodgement_date": "2009-10-05", - "found_in_old_api": true - }, - "52a Victoria Square|M4 5DY": { - "lodgement_date": "2010-12-17", - "found_in_old_api": true - }, - "53a Victoria Square|M4 5DY": { - "lodgement_date": "2022-11-10", - "found_in_old_api": true - }, - "54a Victoria Square|M4 5DY": { - "lodgement_date": "2021-01-08", - "found_in_old_api": true - }, - "55a Victoria Square|M4 5DY": { - "lodgement_date": "2009-08-18", - "found_in_old_api": true - }, - "56a Victoria Square|M4 5DZ": { - "lodgement_date": "2019-03-15", - "found_in_old_api": true - }, - "58a Victoria Square|M4 5DZ": { - "lodgement_date": "2018-11-14", - "found_in_old_api": true - }, - "59a Victoria Square|M4 5DZ": { - "lodgement_date": "2013-11-26", - "found_in_old_api": true - }, - "60a Victoria Square|M4 5DZ": { - "lodgement_date": "2024-06-12", - "found_in_old_api": true - }, - "61a Victoria Square|M4 5DZ": { - "lodgement_date": "2024-08-05", - "found_in_old_api": true - }, - "62a Victoria Square|M4 5DZ": { - "lodgement_date": "2013-05-24", - "found_in_old_api": true - }, - "64a Victoria Square|M4 5DZ": { - "lodgement_date": "2021-07-29", - "found_in_old_api": true - }, - "65a Victoria Square|M4 5DZ": { - "lodgement_date": "2011-08-26", - "found_in_old_api": true - }, - "68a Victoria Square|M4 5DZ": { - "lodgement_date": "2022-03-29", - "found_in_old_api": true - }, - "69a Victoria Square|M4 5DZ": { - "lodgement_date": "2011-01-19", - "found_in_old_api": true - }, - "70a Victoria Square|M4 5DZ": { - "lodgement_date": "2011-07-27", - "found_in_old_api": true - }, - "71a Victoria Square|M4 5DZ": { - "lodgement_date": "2016-11-22", - "found_in_old_api": true - }, - "72a Victoria Square|M4 5DZ": { - "lodgement_date": "2019-01-07", - "found_in_old_api": true - }, - "73a Victoria Square|M4 5DZ": { - "lodgement_date": "2014-07-25", - "found_in_old_api": true - }, - "75a Victoria Square|M4 5DZ": { - "lodgement_date": "2016-01-20", - "found_in_old_api": true - }, - "76a Victoria Square|M4 5DZ": { - "lodgement_date": "2018-01-26", - "found_in_old_api": true - }, - "78a Victoria Square|M4 5DZ": { - "lodgement_date": "2011-06-02", - "found_in_old_api": true - }, - "79a Victoria Square|M4 5DZ": { - "lodgement_date": "2022-01-26", - "found_in_old_api": true - }, - "80a Victoria Square|M4 5DZ": { - "lodgement_date": "2018-11-05", - "found_in_old_api": true - }, - "81a Victoria Square|M4 5DZ": { - "lodgement_date": "2017-03-05", - "found_in_old_api": true - }, - "83a Victoria Square|M4 5DZ": { - "lodgement_date": "2012-05-01", - "found_in_old_api": true - }, - "85a Victoria Square|M4 5DZ": { - "lodgement_date": "2009-10-21", - "found_in_old_api": true - }, - "86a Victoria Square|M4 5DZ": { - "lodgement_date": "2024-05-29", - "found_in_old_api": true - }, - "87a Victoria Square|M4 5DZ": { - "lodgement_date": "2025-07-13", - "found_in_old_api": true - }, - "89a Victoria Square|M4 5DZ": { - "lodgement_date": "2016-05-12", - "found_in_old_api": true - }, - "90a Victoria Square|M4 5DZ": { - "lodgement_date": "2012-05-09", - "found_in_old_api": true - }, - "91a Victoria Square|M4 5DZ": { - "lodgement_date": "2025-04-30", - "found_in_old_api": true - }, - "92a Victoria Square|M4 5DZ": { - "lodgement_date": "2021-07-29", - "found_in_old_api": true - }, - "93a Victoria Square|M4 5EA": { - "lodgement_date": "2013-02-26", - "found_in_old_api": true - }, - "95a Victoria Square|M4 5EA": { - "lodgement_date": "2020-09-06", - "found_in_old_api": true - }, - "96a Victoria Square|M4 5EA": { - "lodgement_date": "2022-06-30", - "found_in_old_api": true - }, - "97a Victoria Square|M4 5EA": { - "lodgement_date": "2016-09-05", - "found_in_old_api": true - }, - "98a Victoria Square|M4 5EA": { - "lodgement_date": "2019-12-19", - "found_in_old_api": true - }, - "99a Victoria Square|M4 5EA": { - "lodgement_date": "2009-03-05", - "found_in_old_api": true - }, - "100a Victoria Square|M4 5EA": { - "lodgement_date": "2011-03-31", - "found_in_old_api": true - }, - "103a Victoria Square|M4 5EA": { - "lodgement_date": "2009-03-05", - "found_in_old_api": true - }, - "104a Victoria Square|M4 5EA": { - "lodgement_date": "2010-01-21", - "found_in_old_api": true - }, - "106a Victoria Square|M4 5EA": { - "lodgement_date": "2015-12-10", - "found_in_old_api": true - }, - "107a Victoria Square|M4 5EA": { - "lodgement_date": "2013-07-01", - "found_in_old_api": true - }, - "108a Victoria Square|M4 5EA": { - "lodgement_date": "2023-03-01", - "found_in_old_api": true - }, - "109a Victoria Square|M4 5EA": { - "lodgement_date": "2010-03-24", - "found_in_old_api": true - }, - "110a Victoria Square|M4 5EA": { - "lodgement_date": "2019-02-25", - "found_in_old_api": true - }, - "111a Victoria Square|M4 5EA": { - "lodgement_date": "2010-02-01", - "found_in_old_api": true - }, - "113a Victoria Square|M4 5EA": { - "lodgement_date": "2012-11-21", - "found_in_old_api": true - }, - "114a Victoria Square|M4 5EA": { - "lodgement_date": "2013-12-06", - "found_in_old_api": true - }, - "115a Victoria Square|M4 5EA": { - "lodgement_date": "2022-08-25", - "found_in_old_api": true - }, - "116a Victoria Square|M4 5EA": { - "lodgement_date": "2011-02-25", - "found_in_old_api": true - }, - "119a Victoria Square|M4 5EA": { - "lodgement_date": "2024-04-12", - "found_in_old_api": true - }, - "120a Victoria Square|M4 5EA": { - "lodgement_date": "2011-04-04", - "found_in_old_api": true - }, - "121a Victoria Square|M4 5EA": { - "lodgement_date": "2010-11-09", - "found_in_old_api": true - }, - "122a Victoria Square|M4 5EA": { - "lodgement_date": "2012-05-01", - "found_in_old_api": true - }, - "123a Victoria Square|M4 5EA": { - "lodgement_date": "2022-01-12", - "found_in_old_api": true - }, - "125a Victoria Square|M4 5EA": { - "lodgement_date": "2023-11-22", - "found_in_old_api": true - }, - "126a Victoria Square|M4 5EA": { - "lodgement_date": "2010-08-24", - "found_in_old_api": true - }, - "127a Victoria Square|M4 5EA": { - "lodgement_date": "2020-03-01", - "found_in_old_api": true - }, - "128a Victoria Square|M4 5EA": { - "lodgement_date": "2015-02-04", - "found_in_old_api": true - }, - "129a Victoria Square|M4 5EA": { - "lodgement_date": "2010-07-07", - "found_in_old_api": true - }, - "130a Victoria Square|M4 5FA": { - "lodgement_date": "2026-02-11", - "found_in_old_api": true - }, - "131a Victoria Square|M4 5FA": { - "lodgement_date": "2025-05-29", - "found_in_old_api": true - }, - "132a Victoria Square|M4 5FA": { - "lodgement_date": "2019-12-24", - "found_in_old_api": true - }, - "134a Victoria Square|M4 5FA": { - "lodgement_date": "2011-08-18", - "found_in_old_api": true - }, - "135a Victoria Square|M4 5FA": { - "lodgement_date": "2019-09-05", - "found_in_old_api": true - }, - "136a Victoria Square|M4 5FA": { - "lodgement_date": "2025-02-14", - "found_in_old_api": true - }, - "137a Victoria Square|M4 5FA": { - "lodgement_date": "2024-07-17", - "found_in_old_api": true - }, - "138a Victoria Square|M4 5FA": { - "lodgement_date": "2023-10-11", - "found_in_old_api": true - }, - "139a Victoria Square|M4 5FA": { - "lodgement_date": "2021-06-22", - "found_in_old_api": true - }, - "140a Victoria Square|M4 5FA": { - "lodgement_date": "2020-06-15", - "found_in_old_api": true - }, - "141a Victoria Square|M4 5FA": { - "lodgement_date": "2025-12-22", - "found_in_old_api": true - }, - "142a Victoria Square|M4 5FA": { - "lodgement_date": "2025-12-22", - "found_in_old_api": true - }, - "143a Victoria Square|M4 5FA": { - "lodgement_date": "2023-01-18", - "found_in_old_api": true - }, - "144a Victoria Square|M4 5FA": { - "lodgement_date": "2011-04-04", - "found_in_old_api": true - }, - "146a Victoria Square|M4 5FA": { - "lodgement_date": "2022-09-21", - "found_in_old_api": true - }, - "147a Victoria Square|M4 5FA": { - "lodgement_date": "2011-05-04", - "found_in_old_api": true - }, - "148a Victoria Square|M4 5FA": { - "lodgement_date": "2014-11-18", - "found_in_old_api": true - }, - "149a Victoria Square|M4 5FA": { - "lodgement_date": "2009-12-14", - "found_in_old_api": true - }, - "150a Victoria Square|M4 5FA": { - "lodgement_date": "2009-12-14", - "found_in_old_api": true - }, - "152a Victoria Square|M4 5FA": { - "lodgement_date": "2017-06-23", - "found_in_old_api": true - }, - "154a Victoria Square|M4 5FA": { - "lodgement_date": "2025-04-29", - "found_in_old_api": true - }, - "156a Victoria Square|M4 5FA": { - "lodgement_date": "2011-04-05", - "found_in_old_api": true - }, - "157a Victoria Square|M4 5FA": { - "lodgement_date": "2023-09-11", - "found_in_old_api": true - }, - "158a Victoria Square|M4 5FA": { - "lodgement_date": "2021-12-07", - "found_in_old_api": true - }, - "160a Victoria Square|M4 5FA": { - "lodgement_date": "2011-02-04", - "found_in_old_api": true - }, - "163a Victoria Square|M4 5FA": { - "lodgement_date": "2010-02-02", - "found_in_old_api": true - }, - "164a Victoria Square|M4 5FA": { - "lodgement_date": "2020-10-19", - "found_in_old_api": true - }, - "165a Victoria Square|M4 5FA": { - "lodgement_date": "2019-12-13", - "found_in_old_api": true - } -} \ No newline at end of file From 46ec68e5db29d891deef01e130d460f708ff108b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 13:41:59 +0000 Subject: [PATCH 33/44] save match building number --- backend/epc_client/epc_client_service.py | 6 ++--- backend/utils/addressMatch.py | 30 ++++++++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index d00a164f..777e8d14 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -40,8 +40,8 @@ class EpcClientService: return call_with_retry(lambda: self._search(postcode=postcode)) # ------------------------------------------------------------------ - # Private helpers - # ------------------------------------------------------------------ + # Private helperEpcRateLimpolarss + # ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars-------------------------------------------- def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: resp = httpx.get( @@ -52,7 +52,7 @@ class EpcClientService: if resp.status_code == 404: raise EpcNotFoundError(cert_num) if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") + raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API") if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") return resp.json()["data"] diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 7618e9ac..69be6f59 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -101,6 +101,16 @@ class AddressMatch: tokens.append(replacement) return " ".join(tokens) + @staticmethod + def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]: + if re.fullmatch(r"\d+[a-z]", token): + return token + if re.fullmatch(r"\d+", token): + if next_token is not None and re.fullmatch(r"[a-z]", next_token): + return token + next_token + return token + return None + @staticmethod def levenshtein(a: str, b: str) -> float: """ @@ -146,13 +156,9 @@ class AddressMatch: # first remaining number is building number; recombine with a # single-letter suffix when normalisation has split "82a" → "82 a" for i, t in enumerate(cleaned): - if re.fullmatch(r"\d+[a-z]", t): - return t - if re.fullmatch(r"\d+", t): - nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None - if nxt is not None and re.fullmatch(r"[a-z]", nxt): - return t + nxt - return t + nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None + if (match := AddressMatch._match_building_number(t, nxt)) is not None: + return match return None @@ -259,3 +265,13 @@ def get_uprn_candidates( out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) + + +def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: + """Returns True if all non-null UPRNs in df match the given uprn.""" + if column not in df.columns: + return False + uprns = df[column].dropna().astype(str).str.strip().unique() + if len(uprns) == 0: + return False + return len(uprns) == 1 and uprns[0] == str(uprn) From b0e935d49710137006da287e1f5746f153faed7e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 13:43:12 +0000 Subject: [PATCH 34/44] make sensible naming for column for address column in df --- backend/utils/addressMatch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 69be6f59..3a7e7494 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -235,11 +235,11 @@ class AddressMatch: def score_addresses( df: pd.DataFrame, user_address: str, - column: str = "address", + address_column: str = "address", ) -> pd.Series: - if column not in df.columns: - raise ValueError(f"Missing column: {column}") - return df[column].apply(lambda x: AddressMatch.score(user_address, x)) + if address_column not in df.columns: + raise ValueError(f"Missing column: {address_column}") + return df[address_column].apply(lambda x: AddressMatch.score(user_address, x)) def get_uprn_candidates( From 27f2ef5e8370cc356fa90a7f38abfb2dfcfd2e7d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 13:46:02 +0000 Subject: [PATCH 35/44] get rid of duplicate function and make better sensible variable name --- backend/address2UPRN/main.py | 11 +++----- backend/address2UPRN/scoring.py | 2 +- backend/utils/addressMatch.py | 25 ------------------- datatypes/epc/domain/historic_epc_matching.py | 4 +-- 4 files changed, 7 insertions(+), 35 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 6006fec1..9c19eca9 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -14,11 +14,8 @@ from utils.s3 import ( ) from datetime import datetime -from backend.utils.addressMatch import ( - AddressMatch, - get_uprn_candidates, -) -from backend.address2UPRN.scoring import all_uprns_match +from backend.utils.addressMatch import AddressMatch +from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) @@ -82,7 +79,7 @@ def get_uprn_with_epc_df( if epc_df.empty: return None - scored_df = get_uprn_candidates( + scored_df = rank_by_address_similarity( epc_df, user_address=user_inputed_address, ) @@ -174,7 +171,7 @@ def resolve_uprns_for_postcode_group( for _, row in group_df.iterrows(): user_address = str(row[address_col]).strip() - scored_df = get_uprn_candidates( + scored_df = rank_by_address_similarity( epc_df, user_address=user_address, ) diff --git a/backend/address2UPRN/scoring.py b/backend/address2UPRN/scoring.py index bfda2e71..2a681ad2 100644 --- a/backend/address2UPRN/scoring.py +++ b/backend/address2UPRN/scoring.py @@ -19,7 +19,7 @@ def all_uprns_match( return len(uprns) == 1 and uprns[0] == str(target_uprn) -def get_uprn_candidates( +def rank_by_address_similarity( df: pd.DataFrame, user_address: str, address_column: str = "address", diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 3a7e7494..81896140 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -242,31 +242,6 @@ def score_addresses( return df[address_column].apply(lambda x: AddressMatch.score(user_address, x)) -def get_uprn_candidates( - df: pd.DataFrame, - user_address: str, - address_column: str = "address", - uprn_column: str = "uprn", -) -> pd.DataFrame: - """ - Annotate EPC results with lexicographical similarity scores and ranks. - Returns a DataFrame sorted by descending lexiscore. - """ - if address_column not in df.columns: - raise ValueError(f"Missing column: {address_column}") - if uprn_column not in df.columns: - raise ValueError(f"Missing column: {uprn_column}") - - out = df.copy() - user_norm = AddressMatch.normalise_address(user_address) - out["lexiscore"] = out[address_column].apply( - lambda x: AddressMatch.levenshtein(user_norm, x) - ) - out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) - out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) - return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) - - def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: """Returns True if all non-null UPRNs in df match the given uprn.""" if column not in df.columns: diff --git a/datatypes/epc/domain/historic_epc_matching.py b/datatypes/epc/domain/historic_epc_matching.py index 95ca9d9f..6ea2118b 100644 --- a/datatypes/epc/domain/historic_epc_matching.py +++ b/datatypes/epc/domain/historic_epc_matching.py @@ -4,7 +4,7 @@ from typing import Optional import pandas as pd from botocore.exceptions import ClientError -from backend.address2UPRN.scoring import get_uprn_candidates +from backend.address2UPRN.scoring import rank_by_address_similarity from backend.utils.addressMatch import AddressMatch from datatypes.epc.domain.historic_epc import HistoricEpc from utils.pandas_utils import pandas_cell_to_str @@ -85,7 +85,7 @@ def match_addresses_for_postcode( ) from e raise - scored = get_uprn_candidates( + scored = rank_by_address_similarity( df, user_address=user_address, address_column=address_column, From 8b27a5173bbbe1a348afbe901dd09b2ef6f8a349 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 14:05:30 +0000 Subject: [PATCH 36/44] fix typo for rate limit error --- backend/epc_client/epc_client_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index 777e8d14..abb5b826 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -52,7 +52,7 @@ class EpcClientService: if resp.status_code == 404: raise EpcNotFoundError(cert_num) if resp.status_code == 429: - raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API") + raise EpcRateLimitError("Rate limited by EPC API") if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") return resp.json()["data"] From dfc100f78b81d2ef213c3430ffc5467510781a9a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 12 May 2026 16:02:01 +0000 Subject: [PATCH 37/44] rank address similiarity --- backend/address2UPRN/main.py | 6 +++--- backend/address2UPRN/scoring.py | 10 +++++----- datatypes/epc/domain/historic_epc_matching.py | 4 ++-- etl/hubspot/hubspotClient.py | 17 +++++++++++++---- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 9c19eca9..389816cc 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -15,7 +15,7 @@ from utils.s3 import ( from datetime import datetime from backend.utils.addressMatch import AddressMatch -from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity +from backend.address2UPRN.scoring import all_uprns_match, rank_address_similarity from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) @@ -79,7 +79,7 @@ def get_uprn_with_epc_df( if epc_df.empty: return None - scored_df = rank_by_address_similarity( + scored_df = rank_address_similarity( epc_df, user_address=user_inputed_address, ) @@ -171,7 +171,7 @@ def resolve_uprns_for_postcode_group( for _, row in group_df.iterrows(): user_address = str(row[address_col]).strip() - scored_df = rank_by_address_similarity( + scored_df = rank_address_similarity( epc_df, user_address=user_address, ) diff --git a/backend/address2UPRN/scoring.py b/backend/address2UPRN/scoring.py index 2a681ad2..dcb86d49 100644 --- a/backend/address2UPRN/scoring.py +++ b/backend/address2UPRN/scoring.py @@ -19,8 +19,8 @@ def all_uprns_match( return len(uprns) == 1 and uprns[0] == str(target_uprn) -def rank_by_address_similarity( - df: pd.DataFrame, +def rank_address_similarity( + address_list_df: pd.DataFrame, user_address: str, address_column: str = "address", uprn_column: str = "uprn", @@ -32,13 +32,13 @@ def rank_by_address_similarity( DOES NOT choose or return a UPRN. """ - if address_column not in df.columns: + if address_column not in address_list_df.columns: raise ValueError(f"Missing column: {address_column}") - if uprn_column not in df.columns: + if uprn_column not in address_list_df.columns: raise ValueError(f"Missing column: {uprn_column}") - out = df.copy() + out = address_list_df.copy() user_norm = AddressMatch.normalise_address(user_address) diff --git a/datatypes/epc/domain/historic_epc_matching.py b/datatypes/epc/domain/historic_epc_matching.py index 6ea2118b..86c44b59 100644 --- a/datatypes/epc/domain/historic_epc_matching.py +++ b/datatypes/epc/domain/historic_epc_matching.py @@ -4,7 +4,7 @@ from typing import Optional import pandas as pd from botocore.exceptions import ClientError -from backend.address2UPRN.scoring import rank_by_address_similarity +from backend.address2UPRN.scoring import rank_address_similarity from backend.utils.addressMatch import AddressMatch from datatypes.epc.domain.historic_epc import HistoricEpc from utils.pandas_utils import pandas_cell_to_str @@ -85,7 +85,7 @@ def match_addresses_for_postcode( ) from e raise - scored = rank_by_address_similarity( + scored = rank_address_similarity( df, user_address=user_address, address_column=address_column, diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index 92a6c7e1..4c9cb1e6 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -1,6 +1,7 @@ import os import time from enum import Enum +from http import HTTPStatus from typing import Optional, cast, Callable, Any from hubspot.client import Client # type: ignore[reportMissingTypeStubs] @@ -86,19 +87,27 @@ class HubspotClient: def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any: """ - Call fn(), retrying up to max_retries times on 429 rate-limit errors. + Call fn(), retrying up to max_retries times on 429 rate-limit errors + or transient 5xx server errors. Waits the minimal amount: the remaining interval window reported by HubSpot headers. Falls back to the full interval (10s) if headers are absent. Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException - class with no shared base beyond Exception, so we detect 429s via duck-typing. + class with no shared base beyond Exception, so we detect retryable statuses via duck-typing. """ + retryable_statuses = { + HTTPStatus.TOO_MANY_REQUESTS, + HTTPStatus.INTERNAL_SERVER_ERROR, + HTTPStatus.BAD_GATEWAY, + HTTPStatus.SERVICE_UNAVAILABLE, + HTTPStatus.GATEWAY_TIMEOUT, + } for attempt in range(max_retries + 1): try: return fn() except Exception as e: status = getattr(e, "status", None) - if status != 429 or attempt == max_retries: + if status not in retryable_statuses or attempt == max_retries: raise headers = getattr(e, "headers", None) or {} interval_ms = int( @@ -106,7 +115,7 @@ class HubspotClient: ) wait_s = interval_ms / 1000.0 self.logger.warning( - f"HubSpot 429 (attempt {attempt + 1}/{max_retries}), " + f"HubSpot {status} (attempt {attempt + 1}/{max_retries}), " f"waiting {wait_s:.1f}s before retry." ) time.sleep(wait_s) From 3fd7321337d26dfa80a5e3ad8bf039bf05faeeaa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 13 May 2026 08:18:43 +0000 Subject: [PATCH 38/44] remove comment --- backend/epc_client/epc_client_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index abb5b826..b1ed2017 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -41,7 +41,7 @@ class EpcClientService: # ------------------------------------------------------------------ # Private helperEpcRateLimpolarss - # ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars-------------------------------------------- + # ------------------------------------------------------------------ def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: resp = httpx.get( From 566c70077a9ffd7df5a6d08d918b4496713940eb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 13 May 2026 08:29:22 +0000 Subject: [PATCH 39/44] removing redundant code --- backend/epc_client/tests/test_client.py | 67 ------------------------- backend/utils/epc_address_match.py | 67 ------------------------- 2 files changed, 134 deletions(-) delete mode 100644 backend/utils/epc_address_match.py diff --git a/backend/epc_client/tests/test_client.py b/backend/epc_client/tests/test_client.py index 849b4a25..0e95a844 100644 --- a/backend/epc_client/tests/test_client.py +++ b/backend/epc_client/tests/test_client.py @@ -2,7 +2,6 @@ from unittest.mock import MagicMock, patch, call import pytest from backend.epc_client.epc_client_service import EpcClientService -from backend.utils.epc_address_match import find_best_epc_match from datatypes.epc.search import EpcSearchResult from backend.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError from datatypes.epc.domain.epc_property_data import EpcPropertyData @@ -132,69 +131,3 @@ def test_search_by_postcode_404_returns_empty_list(epc_service): results = epc_service.search_by_postcode("ZZ9 9ZZ") assert results == [] - - -# --------------------------------------------------------------------------- -# Tests 8-10: find_best_epc_match — real scoring, only HTTP mocked -# --------------------------------------------------------------------------- - - -def test_find_best_match_clear_winner_on_first_pass(epc_service, rdsap_21_0_1_cert): - search_rows = [ - make_search_row(cert_num="CERT-WIN", address_line_1="1 High Street"), - make_search_row(cert_num="CERT-LOSE", address_line_1="99 Nowhere Lane"), - ] - cert_response = {"data": rdsap_21_0_1_cert} - - def fake_get(url, params=None, **kwargs): - if "search" in url: - return _mock_response(200, {"data": search_rows}) - return _mock_response(200, cert_response) - - with patch("httpx.get", side_effect=fake_get): - result = find_best_epc_match(epc_service, "SW1A 1AA", "1 High Street") - - assert isinstance(result, EpcPropertyData) - - -def test_find_best_match_resolves_on_second_pass_using_full_address( - epc_service, rdsap_21_0_1_cert -): - # Both candidates share address_line_1 — round 1 is ambiguous. - # Round 2 scores against full_address and picks the correct floor. - search_rows = [ - make_search_row( - cert_num="CERT-A", - address_line_1="1 High Street", - address_line_2="Ground Floor", - ), - make_search_row( - cert_num="CERT-B", - address_line_1="1 High Street", - address_line_2="First Floor", - ), - ] - cert_response = {"data": rdsap_21_0_1_cert} - - def fake_get(url, params=None, **kwargs): - if "search" in url: - return _mock_response(200, {"data": search_rows}) - return _mock_response(200, cert_response) - - with patch("httpx.get", side_effect=fake_get): - result = find_best_epc_match( - epc_service, "SW1A 1AA", "1 High Street Ground Floor" - ) - - assert isinstance(result, EpcPropertyData) - - -def test_find_best_match_returns_none_when_no_good_match(epc_service): - search_rows = [make_search_row(cert_num="CERT-X", address_line_1="99 Nowhere Lane")] - - with patch("httpx.get", return_value=_mock_response(200, {"data": search_rows})): - result = find_best_epc_match( - epc_service, "SW1A 1AA", "1 Completely Different Road" - ) - - assert result is None diff --git a/backend/utils/epc_address_match.py b/backend/utils/epc_address_match.py deleted file mode 100644 index 0df56eca..00000000 --- a/backend/utils/epc_address_match.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -from backend.utils.addressMatch import AddressMatch -from datatypes.epc.domain.epc_property_data import EpcPropertyData -from datatypes.epc.search import EpcSearchResult - -if TYPE_CHECKING: - from backend.epc_client.epc_client_service import EpcClientService - -_MIN_MATCH_SCORE = 0.6 - - -def find_best_epc_match( - service: EpcClientService, - postcode: str, - address: str, -) -> Optional[EpcPropertyData]: - candidates = service.search_by_postcode(postcode) - if not candidates: - return None - - cert_num = _pick_best_cert(candidates, address, use_full_address=False) - if cert_num: - return _safe_get(service, cert_num) - - cert_num = _pick_best_cert(candidates, address, use_full_address=True) - if cert_num: - return _safe_get(service, cert_num) - - return None - - -def _pick_best_cert( - candidates: list[EpcSearchResult], - user_address: str, - use_full_address: bool, -) -> Optional[str]: - scored: list[tuple[float, str]] = [ - ( - AddressMatch.score( - user_address, - r.full_address if use_full_address else r.address_line_1, - ), - r.certificate_number, - ) - for r in candidates - ] - if not scored: - return None - best_score = max(s for s, _ in scored) - if best_score < _MIN_MATCH_SCORE: - return None - top = [cert for s, cert in scored if s == best_score] - if len(top) != 1: - return None - return top[0] - - -def _safe_get(service: EpcClientService, cert_num: str) -> Optional[EpcPropertyData]: - from backend.epc_client.exceptions import EpcNotFoundError - - try: - return service.get_by_certificate_number(cert_num) - except EpcNotFoundError: - return None From c347865b9e056c6ea903a834ab2e695cf4c0ad72 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 13 May 2026 09:34:51 +0000 Subject: [PATCH 40/44] retry --- backend/epc_client/_retry.py | 7 ++++++- backend/epc_client/epc_client_service.py | 23 +++++++++++++++++++++-- backend/epc_client/exceptions.py | 7 +++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/backend/epc_client/_retry.py b/backend/epc_client/_retry.py index e290e95b..bbdd0cff 100644 --- a/backend/epc_client/_retry.py +++ b/backend/epc_client/_retry.py @@ -11,6 +11,7 @@ def call_with_retry( max_retries: int = 5, backoff_base: float = 1.0, backoff_multiplier: float = 2.0, + max_backoff: float = 60.0, ) -> T: last_exc: EpcRateLimitError | None = None for attempt in range(max_retries + 1): @@ -19,5 +20,9 @@ def call_with_retry( except EpcRateLimitError as exc: last_exc = exc if attempt < max_retries: - time.sleep(backoff_base * (backoff_multiplier ** attempt)) + if exc.retry_after is not None: + delay = exc.retry_after + else: + delay = backoff_base * (backoff_multiplier ** attempt) + time.sleep(min(delay, max_backoff)) raise last_exc # type: ignore[misc] diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index b1ed2017..86caeea3 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -18,6 +18,7 @@ from datatypes.epc.search import EpcSearchResult class EpcClientService: BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk" + REQUEST_TIMEOUT = 10.0 def __init__(self, auth_token: str) -> None: self._headers = { @@ -25,6 +26,16 @@ class EpcClientService: "Accept": "application/json", } + @staticmethod + def _parse_retry_after(resp: httpx.Response) -> Optional[float]: + header = resp.headers.get("Retry-After") + if header is None: + return None + try: + return float(header) + except (TypeError, ValueError): + return None + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: raw = call_with_retry(lambda: self._fetch_certificate(cert_num)) return EpcPropertyDataMapper.from_api_response(raw) @@ -48,11 +59,15 @@ class EpcClientService: f"{self.BASE_URL}/api/certificate", params={"certificate_number": cert_num}, headers=self._headers, + timeout=self.REQUEST_TIMEOUT, ) if resp.status_code == 404: raise EpcNotFoundError(cert_num) if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") return resp.json()["data"] @@ -72,11 +87,15 @@ class EpcClientService: f"{self.BASE_URL}/api/domestic/search", params=params, headers=self._headers, + timeout=self.REQUEST_TIMEOUT, ) if resp.status_code == 404: return [] if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") + raise EpcRateLimitError( + "Rate limited by EPC API", + retry_after=self._parse_retry_after(resp), + ) if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") diff --git a/backend/epc_client/exceptions.py b/backend/epc_client/exceptions.py index 49f1542a..fb7d96fa 100644 --- a/backend/epc_client/exceptions.py +++ b/backend/epc_client/exceptions.py @@ -1,3 +1,6 @@ +from typing import Optional + + class EpcApiError(Exception): """Base for all EPC client errors.""" @@ -8,3 +11,7 @@ class EpcNotFoundError(EpcApiError): class EpcRateLimitError(EpcApiError): """Raised when the API returns 429 and all retries are exhausted.""" + + def __init__(self, message: str, retry_after: Optional[float] = None) -> None: + super().__init__(message) + self.retry_after = retry_after From ff4ad07a2b719147a96530d6f1ad893230425831 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 13 May 2026 11:41:21 +0000 Subject: [PATCH 41/44] retry --- backend/epc_client/tests/test_client.py | 86 ++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/backend/epc_client/tests/test_client.py b/backend/epc_client/tests/test_client.py index 0e95a844..70425a92 100644 --- a/backend/epc_client/tests/test_client.py +++ b/backend/epc_client/tests/test_client.py @@ -8,12 +8,13 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData from backend.epc_client.tests.conftest import make_search_row -def _mock_response(status_code=200, json_data=None): +def _mock_response(status_code=200, json_data=None, headers=None): resp = MagicMock() resp.status_code = status_code resp.is_success = 200 <= status_code < 300 resp.json.return_value = json_data or {} resp.text = str(json_data) + resp.headers = headers or {} return resp @@ -63,6 +64,89 @@ def test_get_by_certificate_number_retries_on_429_and_succeeds( assert isinstance(result, EpcPropertyData) +# --------------------------------------------------------------------------- +# Test 3b: 429 with Retry-After header → sleeps for that value +# --------------------------------------------------------------------------- + + +def test_429_retry_after_header_drives_sleep_duration( + epc_service, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "7"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch( + "backend.epc_client._retry.time.sleep" + ) as mock_sleep: + epc_service.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(7.0) + + +# --------------------------------------------------------------------------- +# Test 3c: 429 without Retry-After → falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_without_retry_after_uses_exponential_backoff( + epc_service, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429), + _mock_response(429), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch( + "backend.epc_client._retry.time.sleep" + ) as mock_sleep: + epc_service.get_by_certificate_number("CERT-001") + + assert mock_sleep.call_args_list == [call(1.0), call(2.0)] + + +# --------------------------------------------------------------------------- +# Test 3d: malformed Retry-After header → falls back to exponential backoff +# --------------------------------------------------------------------------- + + +def test_429_malformed_retry_after_falls_back_to_backoff( + epc_service, rdsap_21_0_1_cert +): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch( + "backend.epc_client._retry.time.sleep" + ) as mock_sleep: + epc_service.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(1.0) + + +# --------------------------------------------------------------------------- +# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values +# --------------------------------------------------------------------------- + + +def test_429_retry_after_capped_by_max_backoff(epc_service, rdsap_21_0_1_cert): + cert_response = {"data": rdsap_21_0_1_cert} + responses = [ + _mock_response(429, headers={"Retry-After": "9999"}), + _mock_response(200, cert_response), + ] + with patch("httpx.get", side_effect=responses), patch( + "backend.epc_client._retry.time.sleep" + ) as mock_sleep: + epc_service.get_by_certificate_number("CERT-001") + + mock_sleep.assert_called_once_with(60.0) + + # --------------------------------------------------------------------------- # Test 4: get_by_uprn empty search → None # --------------------------------------------------------------------------- From 2fb6a99956822f48f2d5ec2654c692d55c26ee68 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 13 May 2026 14:02:36 +0000 Subject: [PATCH 42/44] throttle added --- backend/address2UPRN/tests/test_csv.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py index 70e7a9f9..73d94388 100644 --- a/backend/address2UPRN/tests/test_csv.py +++ b/backend/address2UPRN/tests/test_csv.py @@ -1,12 +1,24 @@ # tests/test_address_to_uprn_csv.py import csv +import time import pytest from pathlib import Path from backend.address2UPRN.main import get_uprn FIXTURE_PATH = Path(__file__).parent / "test_data.csv" +# Delay between live EPC API calls to stay under the (undocumented) rate limit. +# Each parametrized case fires at least one EPC request; without throttling, +# GitHub-hosted runners burst fast enough to hit 429s. +EPC_THROTTLE_SECONDS = 1.0 + + +@pytest.fixture(autouse=True) +def _throttle_epc_requests(): + yield + time.sleep(EPC_THROTTLE_SECONDS) + def load_test_cases(): with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: From 51460d1cd341fef7e2c39cecae41163bdca5b287 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 13 May 2026 14:47:24 +0000 Subject: [PATCH 43/44] route at th ebeginnign --- backend/app/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/main.py b/backend/app/main.py index 64152dc0..2904fb97 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -56,10 +56,10 @@ async def health(): return {"status": "ok", "sha": os.getenv("GITHUB_SHA", "unknown")} +app.include_router(tasks_router.router, prefix="/v1") app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") app.include_router(whlg_router.router, prefix="/v1") -app.include_router(tasks_router.router, prefix="/v1") app.include_router(bulk_uploads_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": From 27b5602608d57d9d09dc1957e16de45a422ac90f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 13 May 2026 15:08:06 +0000 Subject: [PATCH 44/44] remove pandas --- backend/app/requirements/requirements.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 80907a79..cd9bd069 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -16,6 +16,3 @@ pytz sqlmodel # HTTP client httpx==0.28.1 -# Data -pandas -pandas-stubs \ No newline at end of file