"""Static packaging linter for Lambda container images. Every Lambda here ships as a Docker image that copies a *subset* of the repo (``COPY utils/ utils/``, ``COPY backend/ backend/``, ...) and then runs a handler via ``CMD [".handler"]``. If the handler's import graph reaches a top-level package the Dockerfile forgot to ``COPY``, the function dies at cold start with ``Runtime.ImportModuleError: No module named ''`` — but only *inside the image*. In the dev/test tree every package is present, so a plain ``import`` test can't see the gap. This is exactly how ``No module named 'domain'`` reached a deployed address2UPRN. The RIE smoke tests (.github/workflows/_smoke_test_lambda.yml) catch this too, but only by building the full image (minutes) and only for hand-listed services. This test catches the same class of bug in milliseconds, locally, for *every* handler Dockerfile — by statically computing each handler's import-time module graph and asserting every repo file it reaches is copied into the image. Scope: import-time (module-level) imports only — the ones that run at Lambda init, which is what ImportModuleError is about. Imports inside function bodies and under ``if TYPE_CHECKING:`` are deliberately ignored. Third-party / stdlib imports are out of scope (that's requirements.txt's job, covered by the RIE smoke test actually installing and importing). """ from __future__ import annotations import ast import json import re from pathlib import Path from typing import Optional import pytest REPO_ROOT = Path(__file__).resolve().parents[1] # Dockerfiles that are not Lambda handlers (test harness, dev containers). _SKIP_DOCKERFILES = {"Dockerfile.test", "Dockerfile.test.dockerignore"} _SKIP_PARTS = {".git", "node_modules", ".devcontainer"} def _toplevel_names() -> set[str]: """Top-level repo packages/modules — the namespace handler imports resolve against (imports are absolute: ``domain.x``, ``backend.y``).""" names: set[str] = set() for p in REPO_ROOT.iterdir(): if p.name.startswith(".") or p.name == "__pycache__": continue if p.is_dir(): names.add(p.name) elif p.suffix == ".py": names.add(p.stem) return names _TOP = _toplevel_names() def _is_type_checking(test: ast.expr) -> bool: if isinstance(test, ast.Name): return test.id == "TYPE_CHECKING" if isinstance(test, ast.Attribute): return test.attr == "TYPE_CHECKING" return False def _import_time_imports(path: Path) -> list[str]: """Absolute module names imported when ``path`` is imported (i.e. at Lambda init). Descends into module-level if/try/with and class bodies, but not into function bodies (lazy) or ``if TYPE_CHECKING:`` blocks (never executed).""" try: tree = ast.parse(path.read_text(encoding="utf-8"), str(path)) except (SyntaxError, UnicodeDecodeError): return [] out: list[str] = [] def visit(stmts: list[ast.stmt]) -> None: for node in stmts: if isinstance(node, ast.Import): out.extend(alias.name for alias in node.names) elif isinstance(node, ast.ImportFrom): if not node.level and node.module: # absolute imports only out.append(node.module) elif isinstance(node, ast.If): if _is_type_checking(node.test): continue visit(node.body) visit(node.orelse) elif isinstance(node, ast.Try): visit(node.body) visit(node.orelse) visit(node.finalbody) for handler in node.handlers: visit(handler.body) elif isinstance(node, ast.With): visit(node.body) elif isinstance(node, ast.ClassDef): visit(node.body) # FunctionDef / AsyncFunctionDef bodies are intentionally skipped. visit(tree.body) return out def _module_to_file(module: str) -> Optional[Path]: """Resolve a dotted module to its repo source file (``foo.bar`` -> ``foo/bar.py`` or ``foo/bar/__init__.py``).""" base = REPO_ROOT.joinpath(*module.split(".")) py = base.with_suffix(".py") if py.is_file(): return py init = base / "__init__.py" if init.is_file(): return init return None def _import_closure(start: Path) -> dict[Path, Optional[Path]]: """Repo files reachable from ``start`` via import-time imports, mapped to the first file that imported each (for blame in failure messages).""" reached: dict[Path, Optional[Path]] = {} stack: list[tuple[Path, Optional[Path]]] = [(start, None)] while stack: path, importer = stack.pop() if path in reached: continue reached[path] = importer for module in _import_time_imports(path): if module.split(".")[0] not in _TOP: continue # stdlib / third-party — not our concern here target = _module_to_file(module) if target is not None and target not in reached: stack.append((target, path)) return reached def _norm(path_token: str) -> str: return path_token.lstrip("./").rstrip("/") def _parse_handler_spec(dockerfile_text: str) -> Optional[str]: """The ``.handler`` string from the ``CMD`` line, or None if this isn't a Lambda handler image.""" match = re.search(r"^CMD\s+(\[.*\]|.+)$", dockerfile_text, re.MULTILINE) if not match: return None raw = match.group(1).strip() try: parsed = json.loads(raw) spec = parsed[0] if isinstance(parsed, list) and parsed else raw except json.JSONDecodeError: spec = raw.strip('"') return spec if isinstance(spec, str) and spec.endswith(".handler") else None def _parse_copies(dockerfile_text: str) -> list[tuple[list[str], str]]: """``COPY`` instructions as (sources, dest), dropping ``--flag`` tokens.""" copies: list[tuple[list[str], str]] = [] for match in re.finditer(r"^COPY\s+(.+)$", dockerfile_text, re.MULTILINE): tokens = [t for t in match.group(1).split() if not t.startswith("--")] if len(tokens) < 2: continue *sources, dest = tokens copies.append((sources, dest)) return copies def _resolve_handler_file( spec: str, copies: list[tuple[list[str], str]] ) -> Optional[Path]: """Map a handler spec to its repo source file. Handles both in-place layouts (``backend.foo.handler`` -> ``backend/foo/ handler.py``, present via ``COPY backend/ backend/``) and root-placed handlers (``main.handler`` where a ``COPY /var/task/main.py`` or ``COPY /main.py .`` puts the file at the image root).""" module_path, _func = spec.rsplit(".", 1) direct = REPO_ROOT / (module_path.replace(".", "/") + ".py") if direct.is_file(): return direct # Root-placed module: find the COPY whose destination basename matches. wanted = module_path.split("/")[-1] + ".py" for sources, dest in copies: dest_norm = _norm(dest) dest_is_named_file = Path(dest_norm).name == wanted dest_is_dir = dest_norm in ("", module_path.split("/")[-1]) or dest.endswith("/") for src in sources: src_path = REPO_ROOT / _norm(src) if not src_path.is_file(): continue if dest_is_named_file or (dest_is_dir and src_path.name == wanted): return src_path return None def _is_copied(rel_path: str, copies: list[tuple[list[str], str]]) -> bool: """Whether a repo-relative file path lands in the image via some COPY.""" rel_path = _norm(rel_path) for sources, _dest in copies: for src in sources: src_norm = _norm(src) if src_norm == "" or src_norm == rel_path or rel_path.startswith(src_norm + "/"): return True return False def _discover_handler_dockerfiles() -> list[Path]: found: list[Path] = [] for path in REPO_ROOT.rglob("*Dockerfile*"): if path.name in _SKIP_DOCKERFILES: continue if any(part in _SKIP_PARTS for part in path.relative_to(REPO_ROOT).parts): continue try: text = path.read_text(encoding="utf-8") except OSError: continue if _parse_handler_spec(text): found.append(path) return sorted(found) _HANDLER_DOCKERFILES = _discover_handler_dockerfiles() def test_handler_dockerfiles_discovered() -> None: """Guard against the discovery silently finding nothing (e.g. a refactor that renames Dockerfiles), which would make every check below vacuous.""" assert _HANDLER_DOCKERFILES, "no Lambda handler Dockerfiles found under repo root" @pytest.mark.parametrize( "dockerfile", _HANDLER_DOCKERFILES, ids=[str(p.relative_to(REPO_ROOT)) for p in _HANDLER_DOCKERFILES], ) def test_lambda_image_copies_full_import_closure(dockerfile: Path) -> None: """Every repo file the handler imports at init must be COPYed into the image.""" text = dockerfile.read_text(encoding="utf-8") spec = _parse_handler_spec(text) assert spec is not None # discovery guaranteed this copies = _parse_copies(text) handler_file = _resolve_handler_file(spec, copies) assert handler_file is not None, ( f"{dockerfile.relative_to(REPO_ROOT)}: could not locate the source file " f"for CMD handler {spec!r}. Update _resolve_handler_file if this is a new " f"handler layout." ) missing: list[str] = [] for reached, importer in _import_closure(handler_file).items(): rel = str(reached.relative_to(REPO_ROOT)) if not _is_copied(rel, copies): blame = ( str(importer.relative_to(REPO_ROOT)) if importer else "(handler entrypoint)" ) missing.append(f" - {rel}\n imported by {blame}") assert not missing, ( f"{dockerfile.relative_to(REPO_ROOT)} runs `{spec}` but does not COPY " f"{len(missing)} file(s) it imports at init. The Lambda will fail at cold " f"start with Runtime.ImportModuleError. Add the missing top-level " f"package(s) as `COPY / /`:\n" + "\n".join(sorted(missing)) )