better smoke tests to check what is failign

This commit is contained in:
Jun-te Kim 2026-06-17 11:53:33 +00:00
parent d08c35ee03
commit 8abd5e8b54
3 changed files with 95 additions and 25 deletions

View file

@ -2,7 +2,7 @@ version: '3.8'
# Unique Compose project name so this repo's devcontainer doesn't collide with
# other model-* clones (which all live in .devcontainer/backend/ and would
# otherwise default to the same project name "backend", clobbering each other).
name: model-backend
name: landlord-backend
services:
model-backend:

View file

@ -1,5 +1,7 @@
FROM public.ecr.aws/lambda/python:3.10
# FROM python:3.11.10-bullseye
# 3.11: domain/modelling/measure_type.py (pulled in transitively via
# backend.app.db.models -> infrastructure.postgres.modelling -> domain) uses
# enum.StrEnum, which only exists in Python 3.11+.
FROM public.ecr.aws/lambda/python:3.11
# Set working directory (Lambda task root)
WORKDIR /var/task
@ -17,6 +19,11 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY backend/ backend/
COPY utils/ utils/
COPY datatypes/ datatypes/
# main -> backend.app.db.models.{epc_property,recommendations} ->
# infrastructure.postgres.{epc_property_table,modelling} -> domain.modelling.
# Without these the lambda fails at init with "No module named 'infrastructure'".
COPY infrastructure/ infrastructure/
COPY domain/ domain/
COPY etl/hubspot etl/hubspot
# Copy the handler

View file

@ -64,23 +64,51 @@ def _is_type_checking(test: ast.expr) -> bool:
return False
def _file_package_parts(path: Path) -> list[str]:
"""The components of ``__package__`` Python assigns when importing ``path``.
For a regular module ``a/b/c.py`` and for a package ``a/b/__init__.py`` alike
this is the containing directory (``["a", "b"]``) i.e. the anchor that
``from . import x`` resolves against."""
return list(path.relative_to(REPO_ROOT).parts)[:-1]
def _import_time_imports(path: Path) -> list[str]:
"""Absolute module names imported when ``path`` is imported (i.e. at Lambda
init). Descends into module-level if/try/with and class bodies, but not into
function bodies (lazy) or ``if TYPE_CHECKING:`` blocks (never executed)."""
function bodies (lazy) or ``if TYPE_CHECKING:`` blocks (never executed).
Relative imports (``from .x import y``) are resolved to their absolute name
against ``path``'s package — the codebase re-exports through package
``__init__.py`` files this way, so dropping them would hide real init-time
dependencies (e.g. ``functions/__init__.py`` -> ``from .portfolio_functions
import *`` -> ... -> ``infrastructure``)."""
try:
tree = ast.parse(path.read_text(encoding="utf-8"), str(path))
except (SyntaxError, UnicodeDecodeError):
return []
pkg_parts = _file_package_parts(path)
out: list[str] = []
def _relative_base(level: int) -> list[str]:
# level 1 anchors on the package itself; each extra level climbs one up.
keep = len(pkg_parts) - (level - 1)
return pkg_parts[:keep] if keep > 0 else []
def visit(stmts: list[ast.stmt]) -> None:
for node in stmts:
if isinstance(node, ast.Import):
out.extend(alias.name for alias in node.names)
elif isinstance(node, ast.ImportFrom):
if not node.level and node.module: # absolute imports only
out.append(node.module)
if not node.level: # absolute import
if node.module:
out.append(node.module)
else: # relative import — resolve against this file's package
base = _relative_base(node.level)
if node.module: # from .pkg.mod import name
out.append(".".join(base + node.module.split(".")))
else: # from . import a, b -> base.a, base.b (submodules)
out.extend(".".join(base + [alias.name]) for alias in node.names)
elif isinstance(node, ast.If):
if _is_type_checking(node.test):
continue
@ -102,17 +130,27 @@ def _import_time_imports(path: Path) -> list[str]:
return out
def _module_to_file(module: str) -> Optional[Path]:
"""Resolve a dotted module to its repo source file (``foo.bar`` ->
``foo/bar.py`` or ``foo/bar/__init__.py``)."""
base = REPO_ROOT.joinpath(*module.split("."))
py = base.with_suffix(".py")
if py.is_file():
return py
init = base / "__init__.py"
if init.is_file():
return init
return None
def _module_files(module: str) -> list[Path]:
"""Every repo file executed when ``module`` is imported: the module's own
file *plus* each ancestor package's ``__init__.py``.
Importing ``a.b.c`` runs ``a/__init__.py``, ``a/b/__init__.py`` and
``a/b/c.py`` (or ``a/b/c/__init__.py``) in turn so an ``__init__.py`` part
way down the path can pull in a whole subtree (and the package it lives in
must be COPYed). ``_module_to_file`` resolves only the leaf, which is why the
closure used to stop short of those intermediate packages."""
parts = module.split(".")
files: list[Path] = []
for depth in range(1, len(parts) + 1):
base = REPO_ROOT.joinpath(*parts[:depth])
init = base / "__init__.py"
if init.is_file():
files.append(init)
if depth == len(parts): # the leaf may be a plain module file
leaf = base.with_suffix(".py")
if leaf.is_file():
files.append(leaf)
return files
def _import_closure(start: Path) -> dict[Path, Optional[Path]]:
@ -128,9 +166,9 @@ def _import_closure(start: Path) -> dict[Path, Optional[Path]]:
for module in _import_time_imports(path):
if module.split(".")[0] not in _TOP:
continue # stdlib / third-party — not our concern here
target = _module_to_file(module)
if target is not None and target not in reached:
stack.append((target, path))
for target in _module_files(module):
if target not in reached:
stack.append((target, path))
return reached
@ -206,6 +244,21 @@ def _is_copied(rel_path: str, copies: list[tuple[list[str], str]]) -> bool:
return False
def _package_dir_present(pkg_rel: str, copies: list[tuple[list[str], str]]) -> bool:
"""Whether the image will contain ``pkg_rel`` as a directory because some
COPY brings in a file beneath it. Used to excuse an un-copied package
``__init__.py``: in Python 3 a directory present without its ``__init__.py``
imports fine as a *namespace package*, so the missing ``__init__`` is not a
cold-start ``ModuleNotFoundError`` (only a wholly-absent package is)."""
pkg_rel = _norm(pkg_rel)
for sources, _dest in copies:
for src in sources:
src_norm = _norm(src)
if src_norm == pkg_rel or src_norm.startswith(pkg_rel + "/"):
return True
return False
def _discover_handler_dockerfiles() -> list[Path]:
found: list[Path] = []
for path in REPO_ROOT.rglob("*Dockerfile*"):
@ -253,11 +306,21 @@ def test_lambda_image_copies_full_import_closure(dockerfile: Path) -> None:
missing: list[str] = []
for reached, importer in _import_closure(handler_file).items():
rel = str(reached.relative_to(REPO_ROOT))
if not _is_copied(rel, copies):
blame = (
str(importer.relative_to(REPO_ROOT)) if importer else "(handler entrypoint)"
)
missing.append(f" - {rel}\n imported by {blame}")
if _is_copied(rel, copies):
continue
# An un-copied package __init__.py is non-fatal when its directory still
# exists in the image (some other file under it is copied): Python falls
# back to a namespace package. We still traverse such __init__ files for
# their imports above; we just don't demand they be copied. A wholly
# absent package (no file under it copied) is a real ModuleNotFoundError.
if reached.name == "__init__.py" and _package_dir_present(
str(reached.parent.relative_to(REPO_ROOT)), copies
):
continue
blame = (
str(importer.relative_to(REPO_ROOT)) if importer else "(handler entrypoint)"
)
missing.append(f" - {rel}\n imported by {blame}")
assert not missing, (
f"{dockerfile.relative_to(REPO_ROOT)} runs `{spec}` but does not COPY "