This commit is contained in:
Jun-te Kim 2026-05-22 08:15:11 +00:00
parent cf14a4e3aa
commit 5b677dedbe
8 changed files with 149 additions and 0 deletions

View file

@ -0,0 +1,34 @@
FROM public.ecr.aws/lambda/python:3.11
# Postgres host/port/database are baked into the image at build time from
# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ENV POSTGRES_HOST=${DEV_DB_HOST}
ENV POSTGRES_PORT=${DEV_DB_PORT}
ENV POSTGRES_DATABASE=${DEV_DB_NAME}
WORKDIR /var/task
COPY applications/postcode_splitter/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the layered source the handler imports from. The new splitter pulls
# only DDD-shaped packages — no pandas, no legacy backend/.
COPY domain/ domain/
COPY infrastructure/ infrastructure/
COPY orchestration/ orchestration/
COPY repositories/ repositories/
COPY utilities/ utilities/
COPY applications/ applications/
# Place the handler at the Lambda task root so the runtime can resolve
# ``main.handler`` without an extra package prefix.
COPY applications/landlord_description_overrides/handler.py /var/task/main.py
CMD ["main.handler"]

View file

@ -0,0 +1,46 @@
from typing import Any
import boto3
from orchestration.landlord_description_overrides_orchestrator import (
SALOrchestrator,
)
from infrastructure.csv_s3_client import CsvS3Client
from repositories.raw_address.raw_address_csv_s3_repository import (
RawAddressCsvS3Repository,
)
from domain.addresses.raw_address import AddressList
def handler(
body: dict[str, Any],
context: Any,
) -> dict[str, list[str]]:
s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
bucket = "retrofit-data-dev"
# boto3.client is overloaded per-service in the installed stubs; cast
# to Any so the strict-mode checker treats it as opaque.
boto3_client: Any = boto3.client # noqa
boto_s3: Any = boto3_client("s3")
csv_client = CsvS3Client(boto_s3, bucket)
raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
orchestrator = SALOrchestrator(
raw_address_repo=raw_address_repo,
)
list_of_raw_address: AddressList = orchestrator.get_raw_addresses(
input_s3_uri=s3_uri
)
col_to_desc_map = orchestrator.get_col_to_description_mappings(
list_of_raw_address=list_of_raw_address
)
# Read csv of user input
# get the column and unique variations of each description
# { walls: "wall variation 1", "wall varition 2"}
# Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
return {"hello world": ["hello world"]}

View file

@ -0,0 +1,5 @@
POSTGRES_HOST=
POSTGRES_PORT=5432
POSTGRES_USERNAME=
POSTGRES_PASSWORD=
POSTGRES_DATABASE=

View file

@ -0,0 +1,9 @@
services:
landlord_overrides:
build:
context: ../../../
dockerfile: applications/landlord_description_overrides/Dockerfile
ports:
- "9002:8080"
env_file:
- .env.local

View file

@ -0,0 +1,16 @@
#!/usr/bin/env python3
import json
import requests
HOST = "localhost"
PORT = "9002"
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
payload = {"Records": [{"body": json.dumps({})}]}
response = requests.post(LAMBDA_URL, json=payload)
print("Status code:", response.status_code)
print("Response:")
print(response.text)

View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
if [ ! -f .env.local ]; then
cp .env.local.example .env.local
echo "Created .env.local from the template — fill it in, then re-run." >&2
exit 1
fi
docker compose build --no-cache
docker compose up --force-recreate

View file

@ -0,0 +1,4 @@
boto3
pydantic
sqlmodel
psycopg2-binary

View file

@ -0,0 +1,23 @@
from repositories.raw_address.raw_address_repository import RawAddressRepository
from domain.addresses.raw_address import AddressList
class SALOrchestrator:
def __init__(self, raw_address_repo: RawAddressRepository) -> None:
self._raw_address_repo = raw_address_repo
def get_raw_addresses(
self,
input_s3_uri: str,
) -> AddressList:
return self._raw_address_repo.load_batch(input_s3_uri)
def get_col_to_description_mappings(
self, list_of_raw_address: AddressList
) -> dict[str, set[str]]:
mappings: dict[str, set[str]] = {}
for raw_address in list_of_raw_address:
for key, value in raw_address.additional_info.items():
# Lower-case so case-only typos collapse to one variant.
mappings.setdefault(key, set()).add(value.lower())
return mappings