mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
more tests to ensure we don't deploy something that is brokern
This commit is contained in:
parent
a3c80b6691
commit
4e02eb7c77
6 changed files with 288 additions and 0 deletions
|
|
@ -25,6 +25,9 @@ COPY infrastructure/ infrastructure/
|
|||
COPY orchestration/ orchestration/
|
||||
COPY repositories/ repositories/
|
||||
COPY utilities/ utilities/
|
||||
# domain.property.site_notes -> datatypes.epc.domain.epc_property_data; without
|
||||
# this the lambda fails at init with "No module named 'datatypes'".
|
||||
COPY datatypes/ datatypes/
|
||||
COPY applications/ applications/
|
||||
|
||||
# Place the handler at the Lambda task root so the runtime can resolve
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ COPY datatypes/ datatypes/
|
|||
# main.py imports infrastructure.epc_client.epc_client_service (EpcClientService);
|
||||
# without this the lambda fails at init with "No module named 'infrastructure'".
|
||||
COPY infrastructure/ infrastructure/
|
||||
# EpcClientService -> datatypes.epc.domain.mapper -> domain.sap10_calculator;
|
||||
# without this the lambda fails at init with "No module named 'domain'".
|
||||
COPY domain/ domain/
|
||||
|
||||
# Copy the handler
|
||||
COPY backend/address2UPRN/main.py .
|
||||
|
|
|
|||
|
|
@ -40,6 +40,9 @@ COPY --from=build-image /usr/local/lib/python3.11/site-packages/ /usr/local/lib/
|
|||
COPY ./backend/ ./backend
|
||||
COPY ./recommendations/ ./recommendations
|
||||
COPY ./utils/ ./utils/
|
||||
# engine.py -> backend.apis.GoogleSolarApi -> infrastructure.solar; without this
|
||||
# the lambda fails at init with "No module named 'infrastructure'".
|
||||
COPY ./infrastructure/ ./infrastructure/
|
||||
COPY ./etl/epc/ ./etl/epc/
|
||||
COPY ./etl/epc_clean/ ./etl/epc_clean/
|
||||
COPY ./etl/bill_savings/ ./etl/bill_savings/
|
||||
|
|
|
|||
|
|
@ -13,6 +13,12 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# handler -> ecmk_service -> documents_parser.parser -> datatypes.epc.domain.mapper
|
||||
# -> domain.sap10_calculator, and -> db_writer -> epc_property model
|
||||
# -> infrastructure.postgres. Without these the lambda fails at init with
|
||||
# "No module named 'domain'" / "'infrastructure'".
|
||||
COPY domain/ domain/
|
||||
COPY infrastructure/ infrastructure/
|
||||
|
||||
# Local lambda entrypoint
|
||||
# ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,12 @@ WORKDIR /var/task
|
|||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
# handler -> pashub_service -> documents_parser.parser -> datatypes.epc.domain.mapper
|
||||
# -> domain.sap10_calculator, and -> db_writer -> epc_property model
|
||||
# -> infrastructure.postgres. Without these the lambda fails at init with
|
||||
# "No module named 'domain'" / "'infrastructure'".
|
||||
COPY domain/ domain/
|
||||
COPY infrastructure/ infrastructure/
|
||||
|
||||
COPY backend/pashub_fetcher/handler/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
|
|
|||
267
tests/test_lambda_packaging.py
Normal file
267
tests/test_lambda_packaging.py
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
"""Static packaging linter for Lambda container images.
|
||||
|
||||
Every Lambda here ships as a Docker image that copies a *subset* of the repo
|
||||
(``COPY utils/ utils/``, ``COPY backend/ backend/``, ...) and then runs a
|
||||
handler via ``CMD ["<module>.handler"]``. If the handler's import graph reaches
|
||||
a top-level package the Dockerfile forgot to ``COPY``, the function dies at cold
|
||||
start with ``Runtime.ImportModuleError: No module named '<pkg>'`` — but only
|
||||
*inside the image*. In the dev/test tree every package is present, so a plain
|
||||
``import`` test can't see the gap. This is exactly how ``No module named
|
||||
'domain'`` reached a deployed address2UPRN.
|
||||
|
||||
The RIE smoke tests (.github/workflows/_smoke_test_lambda.yml) catch this too,
|
||||
but only by building the full image (minutes) and only for hand-listed services.
|
||||
This test catches the same class of bug in milliseconds, locally, for *every*
|
||||
handler Dockerfile — by statically computing each handler's import-time module
|
||||
graph and asserting every repo file it reaches is copied into the image.
|
||||
|
||||
Scope: import-time (module-level) imports only — the ones that run at Lambda
|
||||
init, which is what ImportModuleError is about. Imports inside function bodies
|
||||
and under ``if TYPE_CHECKING:`` are deliberately ignored. Third-party / stdlib
|
||||
imports are out of scope (that's requirements.txt's job, covered by the RIE
|
||||
smoke test actually installing and importing).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
# Dockerfiles that are not Lambda handlers (test harness, dev containers).
|
||||
_SKIP_DOCKERFILES = {"Dockerfile.test", "Dockerfile.test.dockerignore"}
|
||||
_SKIP_PARTS = {".git", "node_modules", ".devcontainer"}
|
||||
|
||||
|
||||
def _toplevel_names() -> set[str]:
|
||||
"""Top-level repo packages/modules — the namespace handler imports resolve
|
||||
against (imports are absolute: ``domain.x``, ``backend.y``)."""
|
||||
names: set[str] = set()
|
||||
for p in REPO_ROOT.iterdir():
|
||||
if p.name.startswith(".") or p.name == "__pycache__":
|
||||
continue
|
||||
if p.is_dir():
|
||||
names.add(p.name)
|
||||
elif p.suffix == ".py":
|
||||
names.add(p.stem)
|
||||
return names
|
||||
|
||||
|
||||
_TOP = _toplevel_names()
|
||||
|
||||
|
||||
def _is_type_checking(test: ast.expr) -> bool:
|
||||
if isinstance(test, ast.Name):
|
||||
return test.id == "TYPE_CHECKING"
|
||||
if isinstance(test, ast.Attribute):
|
||||
return test.attr == "TYPE_CHECKING"
|
||||
return False
|
||||
|
||||
|
||||
def _import_time_imports(path: Path) -> list[str]:
|
||||
"""Absolute module names imported when ``path`` is imported (i.e. at Lambda
|
||||
init). Descends into module-level if/try/with and class bodies, but not into
|
||||
function bodies (lazy) or ``if TYPE_CHECKING:`` blocks (never executed)."""
|
||||
try:
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"), str(path))
|
||||
except (SyntaxError, UnicodeDecodeError):
|
||||
return []
|
||||
out: list[str] = []
|
||||
|
||||
def visit(stmts: list[ast.stmt]) -> None:
|
||||
for node in stmts:
|
||||
if isinstance(node, ast.Import):
|
||||
out.extend(alias.name for alias in node.names)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if not node.level and node.module: # absolute imports only
|
||||
out.append(node.module)
|
||||
elif isinstance(node, ast.If):
|
||||
if _is_type_checking(node.test):
|
||||
continue
|
||||
visit(node.body)
|
||||
visit(node.orelse)
|
||||
elif isinstance(node, ast.Try):
|
||||
visit(node.body)
|
||||
visit(node.orelse)
|
||||
visit(node.finalbody)
|
||||
for handler in node.handlers:
|
||||
visit(handler.body)
|
||||
elif isinstance(node, ast.With):
|
||||
visit(node.body)
|
||||
elif isinstance(node, ast.ClassDef):
|
||||
visit(node.body)
|
||||
# FunctionDef / AsyncFunctionDef bodies are intentionally skipped.
|
||||
|
||||
visit(tree.body)
|
||||
return out
|
||||
|
||||
|
||||
def _module_to_file(module: str) -> Optional[Path]:
|
||||
"""Resolve a dotted module to its repo source file (``foo.bar`` ->
|
||||
``foo/bar.py`` or ``foo/bar/__init__.py``)."""
|
||||
base = REPO_ROOT.joinpath(*module.split("."))
|
||||
py = base.with_suffix(".py")
|
||||
if py.is_file():
|
||||
return py
|
||||
init = base / "__init__.py"
|
||||
if init.is_file():
|
||||
return init
|
||||
return None
|
||||
|
||||
|
||||
def _import_closure(start: Path) -> dict[Path, Optional[Path]]:
|
||||
"""Repo files reachable from ``start`` via import-time imports, mapped to the
|
||||
first file that imported each (for blame in failure messages)."""
|
||||
reached: dict[Path, Optional[Path]] = {}
|
||||
stack: list[tuple[Path, Optional[Path]]] = [(start, None)]
|
||||
while stack:
|
||||
path, importer = stack.pop()
|
||||
if path in reached:
|
||||
continue
|
||||
reached[path] = importer
|
||||
for module in _import_time_imports(path):
|
||||
if module.split(".")[0] not in _TOP:
|
||||
continue # stdlib / third-party — not our concern here
|
||||
target = _module_to_file(module)
|
||||
if target is not None and target not in reached:
|
||||
stack.append((target, path))
|
||||
return reached
|
||||
|
||||
|
||||
def _norm(path_token: str) -> str:
|
||||
return path_token.lstrip("./").rstrip("/")
|
||||
|
||||
|
||||
def _parse_handler_spec(dockerfile_text: str) -> Optional[str]:
|
||||
"""The ``<module>.handler`` string from the ``CMD`` line, or None if this
|
||||
isn't a Lambda handler image."""
|
||||
match = re.search(r"^CMD\s+(\[.*\]|.+)$", dockerfile_text, re.MULTILINE)
|
||||
if not match:
|
||||
return None
|
||||
raw = match.group(1).strip()
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
spec = parsed[0] if isinstance(parsed, list) and parsed else raw
|
||||
except json.JSONDecodeError:
|
||||
spec = raw.strip('"')
|
||||
return spec if isinstance(spec, str) and spec.endswith(".handler") else None
|
||||
|
||||
|
||||
def _parse_copies(dockerfile_text: str) -> list[tuple[list[str], str]]:
|
||||
"""``COPY`` instructions as (sources, dest), dropping ``--flag`` tokens."""
|
||||
copies: list[tuple[list[str], str]] = []
|
||||
for match in re.finditer(r"^COPY\s+(.+)$", dockerfile_text, re.MULTILINE):
|
||||
tokens = [t for t in match.group(1).split() if not t.startswith("--")]
|
||||
if len(tokens) < 2:
|
||||
continue
|
||||
*sources, dest = tokens
|
||||
copies.append((sources, dest))
|
||||
return copies
|
||||
|
||||
|
||||
def _resolve_handler_file(
|
||||
spec: str, copies: list[tuple[list[str], str]]
|
||||
) -> Optional[Path]:
|
||||
"""Map a handler spec to its repo source file.
|
||||
|
||||
Handles both in-place layouts (``backend.foo.handler`` -> ``backend/foo/
|
||||
handler.py``, present via ``COPY backend/ backend/``) and root-placed
|
||||
handlers (``main.handler`` where a ``COPY <src> /var/task/main.py`` or
|
||||
``COPY <src>/main.py .`` puts the file at the image root)."""
|
||||
module_path, _func = spec.rsplit(".", 1)
|
||||
|
||||
direct = REPO_ROOT / (module_path.replace(".", "/") + ".py")
|
||||
if direct.is_file():
|
||||
return direct
|
||||
|
||||
# Root-placed module: find the COPY whose destination basename matches.
|
||||
wanted = module_path.split("/")[-1] + ".py"
|
||||
for sources, dest in copies:
|
||||
dest_norm = _norm(dest)
|
||||
dest_is_named_file = Path(dest_norm).name == wanted
|
||||
dest_is_dir = dest_norm in ("", module_path.split("/")[-1]) or dest.endswith("/")
|
||||
for src in sources:
|
||||
src_path = REPO_ROOT / _norm(src)
|
||||
if not src_path.is_file():
|
||||
continue
|
||||
if dest_is_named_file or (dest_is_dir and src_path.name == wanted):
|
||||
return src_path
|
||||
return None
|
||||
|
||||
|
||||
def _is_copied(rel_path: str, copies: list[tuple[list[str], str]]) -> bool:
|
||||
"""Whether a repo-relative file path lands in the image via some COPY."""
|
||||
rel_path = _norm(rel_path)
|
||||
for sources, _dest in copies:
|
||||
for src in sources:
|
||||
src_norm = _norm(src)
|
||||
if src_norm == "" or src_norm == rel_path or rel_path.startswith(src_norm + "/"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _discover_handler_dockerfiles() -> list[Path]:
|
||||
found: list[Path] = []
|
||||
for path in REPO_ROOT.rglob("*Dockerfile*"):
|
||||
if path.name in _SKIP_DOCKERFILES:
|
||||
continue
|
||||
if any(part in _SKIP_PARTS for part in path.relative_to(REPO_ROOT).parts):
|
||||
continue
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
except OSError:
|
||||
continue
|
||||
if _parse_handler_spec(text):
|
||||
found.append(path)
|
||||
return sorted(found)
|
||||
|
||||
|
||||
_HANDLER_DOCKERFILES = _discover_handler_dockerfiles()
|
||||
|
||||
|
||||
def test_handler_dockerfiles_discovered() -> None:
|
||||
"""Guard against the discovery silently finding nothing (e.g. a refactor
|
||||
that renames Dockerfiles), which would make every check below vacuous."""
|
||||
assert _HANDLER_DOCKERFILES, "no Lambda handler Dockerfiles found under repo root"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dockerfile",
|
||||
_HANDLER_DOCKERFILES,
|
||||
ids=[str(p.relative_to(REPO_ROOT)) for p in _HANDLER_DOCKERFILES],
|
||||
)
|
||||
def test_lambda_image_copies_full_import_closure(dockerfile: Path) -> None:
|
||||
"""Every repo file the handler imports at init must be COPYed into the image."""
|
||||
text = dockerfile.read_text(encoding="utf-8")
|
||||
spec = _parse_handler_spec(text)
|
||||
assert spec is not None # discovery guaranteed this
|
||||
copies = _parse_copies(text)
|
||||
|
||||
handler_file = _resolve_handler_file(spec, copies)
|
||||
assert handler_file is not None, (
|
||||
f"{dockerfile.relative_to(REPO_ROOT)}: could not locate the source file "
|
||||
f"for CMD handler {spec!r}. Update _resolve_handler_file if this is a new "
|
||||
f"handler layout."
|
||||
)
|
||||
|
||||
missing: list[str] = []
|
||||
for reached, importer in _import_closure(handler_file).items():
|
||||
rel = str(reached.relative_to(REPO_ROOT))
|
||||
if not _is_copied(rel, copies):
|
||||
blame = (
|
||||
str(importer.relative_to(REPO_ROOT)) if importer else "(handler entrypoint)"
|
||||
)
|
||||
missing.append(f" - {rel}\n imported by {blame}")
|
||||
|
||||
assert not missing, (
|
||||
f"{dockerfile.relative_to(REPO_ROOT)} runs `{spec}` but does not COPY "
|
||||
f"{len(missing)} file(s) it imports at init. The Lambda will fail at cold "
|
||||
f"start with Runtime.ImportModuleError. Add the missing top-level "
|
||||
f"package(s) as `COPY <pkg>/ <pkg>/`:\n" + "\n".join(sorted(missing))
|
||||
)
|
||||
Loading…
Add table
Reference in a new issue