From 124a34597a5e5a373b757e7b2cc36e6238e6578d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Mar 2026 12:15:22 +0000 Subject: [PATCH] save so i can run it from mealcraft --- .devcontainer/backend/post-install.sh | 24 +++++----- backend/app/config.py | 1 + backend/ordnanceSurvey/helpers.py | 66 +++++++++++++++++++++++++++ backend/ordnanceSurvey/main.py | 21 +++------ backend/ordnanceSurvey/types.py | 44 ++++++++++++++++++ backend/utils/ordnance_survey.py | 44 ------------------ 6 files changed, 129 insertions(+), 71 deletions(-) create mode 100644 backend/ordnanceSurvey/helpers.py create mode 100644 backend/ordnanceSurvey/types.py delete mode 100644 backend/utils/ordnance_survey.py diff --git a/.devcontainer/backend/post-install.sh b/.devcontainer/backend/post-install.sh index 48fbfde1..20c699d6 100644 --- a/.devcontainer/backend/post-install.sh +++ b/.devcontainer/backend/post-install.sh @@ -1,14 +1,14 @@ -mkdir -p ~/.ipython/profile_default/startup +# mkdir -p ~/.ipython/profile_default/startup -cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py -from dotenv import load_dotenv -import os +# cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py +# from dotenv import load_dotenv +# import os -# Adjust path as needed -env_path = "/workspaces/model/backend/.env" -if os.path.exists(env_path): - load_dotenv(env_path) - print("✔ Loaded .env into Jupyter kernel") -else: - print("⚠ No .env file found to load") -EOF +# # Adjust path as needed +# env_path = "/workspaces/model/backend/.env" +# if os.path.exists(env_path): +# load_dotenv(env_path) +# print("✔ Loaded .env into Jupyter kernel") +# else: +# print("⚠ No .env file found to load") +# EOF diff --git a/backend/app/config.py b/backend/app/config.py index b5b29137..e87f8374 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -64,6 +64,7 @@ class Settings(BaseSettings): ENERGY_ASSESSMENTS_BUCKET: str = "changeme" ORDNANCE_SURVEY_API_KEY: str = "changeme" + PLAN_TRIGGER_BUCKET: str = "changeme" # Optional AWS creds (only required in local) AWS_ACCESS_KEY_ID: Optional[str] = None diff --git a/backend/ordnanceSurvey/helpers.py b/backend/ordnanceSurvey/helpers.py new file mode 100644 index 00000000..d59060b8 --- /dev/null +++ b/backend/ordnanceSurvey/helpers.py @@ -0,0 +1,66 @@ +import urllib.parse +from pydantic import ValidationError +import requests +import pandas as pd +from utils.logger import setup_logger +from backend.ordnanceSurvey.types import PostcodeResponse + +logger = setup_logger() + + +def os_places_results_to_dataframe(data: dict) -> pd.DataFrame: + """ + Flatten the OS Places API response results into a DataFrame. + Each result contains either a DPA or LPI record. + """ + results = data.get("results", []) + rows = [] + for r in results: + if "DPA" in r: + rows.append(r["DPA"]) + elif "LPI" in r: + rows.append(r["LPI"]) + return pd.DataFrame(rows) + + +import urllib.parse +import requests +import logging +from pydantic import ValidationError + +logger = logging.getLogger(__name__) + + +def lookup_os_places(postcode: str, api_key: str) -> PostcodeResponse: + """ + Lookup a postcode using the OS Places API. + Returns a validated PostcodeResponse. + Raises exceptions on failure. + """ + if not api_key: + raise ValueError("Ordnance Survey API key not specified") + + encoded_postcode = urllib.parse.quote(postcode) + + url = ( + f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}" + f"&dataset=DPA,LPI&key={api_key}" + ) + + response = requests.get(url) + + if response.status_code != 200: + logger.error( + "OS Places API error for postcode %s: %s", + postcode, + response.status_code, + ) + raise RuntimeError(f"OS Places lookup failed for postcode {postcode}") + + try: + raw = response.json() + return PostcodeResponse.model_validate(raw) + + except ValidationError as e: + logger.error("OS Places response validation failed: %s", e) + raise RuntimeError("Invalid response format from OS Places API") from e diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py index bf8cfdaf..835910f5 100644 --- a/backend/ordnanceSurvey/main.py +++ b/backend/ordnanceSurvey/main.py @@ -1,5 +1,6 @@ from typing import Any import json +from backend.ordnanceSurvey.types import PostcodeResponse from utils.logger import setup_logger import logging from backend.utils.subtasks import subtask_handler @@ -11,7 +12,7 @@ from utils.s3 import ( from backend.utils.addressMatch import addressMatch from backend.app.db.connection import get_db_session from backend.app.db.models.postcode_search import PostcodeSearchModel -from backend.utils.ordnance_survey import ( +from backend.ordnanceSurvey.helpers import ( lookup_os_places, os_places_results_to_dataframe, ) @@ -27,6 +28,7 @@ logger: logging.Logger = setup_logger() def check_if_post_code_exists_in_db_cache(postcode): + postcode = "SE22 9AL" with get_db_session() as session: result = ( @@ -43,16 +45,12 @@ def check_if_post_code_exists_in_db_cache(postcode): # Cache miss — fetch from OS Places API api_key = get_settings().ORDNANCE_SURVEY_API_KEY - response = lookup_os_places(postcode, api_key) - - if response.get("status") != 200 or "data" not in response: - logger.error(f"OS Places API failed for {postcode}: {response}") - return pd.DataFrame() + response: PostcodeResponse = lookup_os_places(postcode, api_key) # Save to cache new_record = PostcodeSearchModel( postcode=postcode, - result_data=response["data"], + result_data=response.results, ) session.add(new_record) session.commit() @@ -60,13 +58,6 @@ def check_if_post_code_exists_in_db_cache(postcode): return os_places_results_to_dataframe(response["data"]) -def get_ordance_survey_record(row, cache=None): - if cache is None: - cache = check_if_post_code_exists_in_db_cache(postcode) - - # process cache with row - - def save_results_to_s3( results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None ) -> bool: @@ -100,7 +91,7 @@ def save_results_to_s3( logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}") return True else: - logger.error(f"Failed to save results to S3") + logger.error(f"Failed to save results to S3 {bucket_name}/{file_key}") return False except Exception as e: diff --git a/backend/ordnanceSurvey/types.py b/backend/ordnanceSurvey/types.py new file mode 100644 index 00000000..0631ff67 --- /dev/null +++ b/backend/ordnanceSurvey/types.py @@ -0,0 +1,44 @@ +from pydantic import BaseModel +from typing import List + + +class OrdnanceSurveyResponse(BaseModel): + RPC: str + UPRN: str + MATCH: int + UDPRN: str + STATUS: str + ADDRESS: str + LANGUAGE: str + POSTCODE: str + POST_TOWN: str + WARD_CODE: str + ENTRY_DATE: str + COUNTRY_CODE: str + X_COORDINATE: int + Y_COORDINATE: int + BUILDING_NAME: str + BLPU_STATE_CODE: str + BLPU_STATE_DATE: str + LAST_UPDATE_DATE: str + MATCH_DESCRIPTION: str + THOROUGHFARE_NAME: str + CLASSIFICATION_CODE: str + LOGICAL_STATUS_CODE: str + POSTAL_ADDRESS_CODE: str + LOCAL_CUSTODIAN_CODE: int + DELIVERY_POINT_SUFFIX: str + TOPOGRAPHY_LAYER_TOID: str + COUNTRY_CODE_DESCRIPTION: str + BLPU_STATE_CODE_DESCRIPTION: str + CLASSIFICATION_CODE_DESCRIPTION: str + POSTAL_ADDRESS_CODE_DESCRIPTION: str + LOCAL_CUSTODIAN_CODE_DESCRIPTION: str + + +class Result(BaseModel): + DPA: OrdnanceSurveyResponse + + +class PostcodeResponse(BaseModel): + results: List[Result] diff --git a/backend/utils/ordnance_survey.py b/backend/utils/ordnance_survey.py deleted file mode 100644 index 03a0e57b..00000000 --- a/backend/utils/ordnance_survey.py +++ /dev/null @@ -1,44 +0,0 @@ -import urllib.parse -import requests -import pandas as pd -from utils.logger import setup_logger - -logger = setup_logger() - - -def os_places_results_to_dataframe(data: dict) -> pd.DataFrame: - """ - Flatten the OS Places API response results into a DataFrame. - Each result contains either a DPA or LPI record. - """ - results = data.get("results", []) - rows = [] - for r in results: - if "DPA" in r: - rows.append(r["DPA"]) - elif "LPI" in r: - rows.append(r["LPI"]) - return pd.DataFrame(rows) - - -def lookup_os_places(postcode: str, api_key: str) -> dict: - """ - Lookup a postcode using the OS Places API. - Returns the full API response data or an error dict. - """ - if not api_key: - return {"error": "Ordnance Survey API key not specified", "status": 400} - - encoded_postcode = urllib.parse.quote(postcode) - url = ( - f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}" - f"&dataset=DPA,LPI&key={api_key}" - ) - - response = requests.get(url) - if response.status_code != 200: - logger.error(f"OS Places API error for postcode {postcode}: {response.status_code}") - return {"error": "Failed to fetch address data", "status": response.status_code} - - data = response.json() - return {"data": data, "status": 200}