save so i can run it from mealcraft

This commit is contained in:
Jun-te Kim 2026-03-09 12:15:22 +00:00
parent 19ab0ad757
commit 124a34597a
6 changed files with 129 additions and 71 deletions

View file

@ -1,14 +1,14 @@
mkdir -p ~/.ipython/profile_default/startup
# mkdir -p ~/.ipython/profile_default/startup
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
from dotenv import load_dotenv
import os
# cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
# from dotenv import load_dotenv
# import os
# Adjust path as needed
env_path = "/workspaces/model/backend/.env"
if os.path.exists(env_path):
load_dotenv(env_path)
print("✔ Loaded .env into Jupyter kernel")
else:
print("⚠ No .env file found to load")
EOF
# # Adjust path as needed
# env_path = "/workspaces/model/backend/.env"
# if os.path.exists(env_path):
# load_dotenv(env_path)
# print("✔ Loaded .env into Jupyter kernel")
# else:
# print("⚠ No .env file found to load")
# EOF

View file

@ -64,6 +64,7 @@ class Settings(BaseSettings):
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
ORDNANCE_SURVEY_API_KEY: str = "changeme"
PLAN_TRIGGER_BUCKET: str = "changeme"
# Optional AWS creds (only required in local)
AWS_ACCESS_KEY_ID: Optional[str] = None

View file

@ -0,0 +1,66 @@
import urllib.parse
from pydantic import ValidationError
import requests
import pandas as pd
from utils.logger import setup_logger
from backend.ordnanceSurvey.types import PostcodeResponse
logger = setup_logger()
def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
"""
Flatten the OS Places API response results into a DataFrame.
Each result contains either a DPA or LPI record.
"""
results = data.get("results", [])
rows = []
for r in results:
if "DPA" in r:
rows.append(r["DPA"])
elif "LPI" in r:
rows.append(r["LPI"])
return pd.DataFrame(rows)
import urllib.parse
import requests
import logging
from pydantic import ValidationError
logger = logging.getLogger(__name__)
def lookup_os_places(postcode: str, api_key: str) -> PostcodeResponse:
"""
Lookup a postcode using the OS Places API.
Returns a validated PostcodeResponse.
Raises exceptions on failure.
"""
if not api_key:
raise ValueError("Ordnance Survey API key not specified")
encoded_postcode = urllib.parse.quote(postcode)
url = (
f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
f"&dataset=DPA,LPI&key={api_key}"
)
response = requests.get(url)
if response.status_code != 200:
logger.error(
"OS Places API error for postcode %s: %s",
postcode,
response.status_code,
)
raise RuntimeError(f"OS Places lookup failed for postcode {postcode}")
try:
raw = response.json()
return PostcodeResponse.model_validate(raw)
except ValidationError as e:
logger.error("OS Places response validation failed: %s", e)
raise RuntimeError("Invalid response format from OS Places API") from e

View file

@ -1,5 +1,6 @@
from typing import Any
import json
from backend.ordnanceSurvey.types import PostcodeResponse
from utils.logger import setup_logger
import logging
from backend.utils.subtasks import subtask_handler
@ -11,7 +12,7 @@ from utils.s3 import (
from backend.utils.addressMatch import addressMatch
from backend.app.db.connection import get_db_session
from backend.app.db.models.postcode_search import PostcodeSearchModel
from backend.utils.ordnance_survey import (
from backend.ordnanceSurvey.helpers import (
lookup_os_places,
os_places_results_to_dataframe,
)
@ -27,6 +28,7 @@ logger: logging.Logger = setup_logger()
def check_if_post_code_exists_in_db_cache(postcode):
postcode = "SE22 9AL"
with get_db_session() as session:
result = (
@ -43,16 +45,12 @@ def check_if_post_code_exists_in_db_cache(postcode):
# Cache miss — fetch from OS Places API
api_key = get_settings().ORDNANCE_SURVEY_API_KEY
response = lookup_os_places(postcode, api_key)
if response.get("status") != 200 or "data" not in response:
logger.error(f"OS Places API failed for {postcode}: {response}")
return pd.DataFrame()
response: PostcodeResponse = lookup_os_places(postcode, api_key)
# Save to cache
new_record = PostcodeSearchModel(
postcode=postcode,
result_data=response["data"],
result_data=response.results,
)
session.add(new_record)
session.commit()
@ -60,13 +58,6 @@ def check_if_post_code_exists_in_db_cache(postcode):
return os_places_results_to_dataframe(response["data"])
def get_ordance_survey_record(row, cache=None):
if cache is None:
cache = check_if_post_code_exists_in_db_cache(postcode)
# process cache with row
def save_results_to_s3(
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
) -> bool:
@ -100,7 +91,7 @@ def save_results_to_s3(
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
return True
else:
logger.error(f"Failed to save results to S3")
logger.error(f"Failed to save results to S3 {bucket_name}/{file_key}")
return False
except Exception as e:

View file

@ -0,0 +1,44 @@
from pydantic import BaseModel
from typing import List
class OrdnanceSurveyResponse(BaseModel):
RPC: str
UPRN: str
MATCH: int
UDPRN: str
STATUS: str
ADDRESS: str
LANGUAGE: str
POSTCODE: str
POST_TOWN: str
WARD_CODE: str
ENTRY_DATE: str
COUNTRY_CODE: str
X_COORDINATE: int
Y_COORDINATE: int
BUILDING_NAME: str
BLPU_STATE_CODE: str
BLPU_STATE_DATE: str
LAST_UPDATE_DATE: str
MATCH_DESCRIPTION: str
THOROUGHFARE_NAME: str
CLASSIFICATION_CODE: str
LOGICAL_STATUS_CODE: str
POSTAL_ADDRESS_CODE: str
LOCAL_CUSTODIAN_CODE: int
DELIVERY_POINT_SUFFIX: str
TOPOGRAPHY_LAYER_TOID: str
COUNTRY_CODE_DESCRIPTION: str
BLPU_STATE_CODE_DESCRIPTION: str
CLASSIFICATION_CODE_DESCRIPTION: str
POSTAL_ADDRESS_CODE_DESCRIPTION: str
LOCAL_CUSTODIAN_CODE_DESCRIPTION: str
class Result(BaseModel):
DPA: OrdnanceSurveyResponse
class PostcodeResponse(BaseModel):
results: List[Result]

View file

@ -1,44 +0,0 @@
import urllib.parse
import requests
import pandas as pd
from utils.logger import setup_logger
logger = setup_logger()
def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
"""
Flatten the OS Places API response results into a DataFrame.
Each result contains either a DPA or LPI record.
"""
results = data.get("results", [])
rows = []
for r in results:
if "DPA" in r:
rows.append(r["DPA"])
elif "LPI" in r:
rows.append(r["LPI"])
return pd.DataFrame(rows)
def lookup_os_places(postcode: str, api_key: str) -> dict:
"""
Lookup a postcode using the OS Places API.
Returns the full API response data or an error dict.
"""
if not api_key:
return {"error": "Ordnance Survey API key not specified", "status": 400}
encoded_postcode = urllib.parse.quote(postcode)
url = (
f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
f"&dataset=DPA,LPI&key={api_key}"
)
response = requests.get(url)
if response.status_code != 200:
logger.error(f"OS Places API error for postcode {postcode}: {response.status_code}")
return {"error": "Failed to fetch address data", "status": response.status_code}
data = response.json()
return {"data": data, "status": 200}