From 1d8a77b29bfa0b9ffee5a7abdf284c9b5fef39e8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 3 May 2026 12:40:26 +0100 Subject: [PATCH] feat: scaffold agentic-toolkit (runner + skills + setup) Initial implementation of Domna's agentic toolkit per PRD #1: - Runner CLI (src/cli.ts) wrapping sandcastle.run() with Docker provider - Pure modules: PhaseScheduler, PromptBuilder, FailureHandler with tests - Project Status v2 GraphQL client + parsers with tests - BranchManager (git/gh wrapper) and LoopOrchestrator (per-tick algorithm) - Variant-aware: per-ticket (one PR per issue, phase-gated, exit between phases) vs single-pr (one PR for the whole DAG, halt on failure) - /to-project skill that creates a repo-level project, configures the Status schema the runner expects, and sets initial issue statuses - setup.sh that installs Matt Pocock skills + Domna skills via npx skills Out of scope at v1: remote runners, Slack notifications, stacked PRs, cross-repo projects, SHA-pinning of upstream skills (tracks HEAD until the skills CLI supports repo#sha). Co-Authored-By: Claude Opus 4.7 --- .gitignore | 8 + README.md | 107 +++++++ bin/run-sandcastle.js | 2 + package.json | 33 ++ setup.sh | 73 +++++ skills/engineering/to-project/SKILL.md | 78 +++++ src/cli.ts | 95 ++++++ src/modules/agent-runner.ts | 91 ++++++ src/modules/branch-manager.test.ts | 45 +++ src/modules/branch-manager.ts | 102 ++++++ src/modules/failure-handler.test.ts | 60 ++++ src/modules/failure-handler.ts | 30 ++ src/modules/loop-orchestrator.ts | 389 +++++++++++++++++++++++ src/modules/phase-scheduler.test.ts | 122 +++++++ src/modules/phase-scheduler.ts | 84 +++++ src/modules/project-state-client.test.ts | 180 +++++++++++ src/modules/project-state-client.ts | 315 ++++++++++++++++++ src/modules/prompt-builder.test.ts | 96 ++++++ src/modules/prompt-builder.ts | 53 +++ src/types.ts | 74 +++++ tsconfig.json | 21 ++ vitest.config.ts | 8 + 22 files changed, 2066 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 bin/run-sandcastle.js create mode 100644 package.json create mode 100755 setup.sh create mode 100644 skills/engineering/to-project/SKILL.md create mode 100644 src/cli.ts create mode 100644 src/modules/agent-runner.ts create mode 100644 src/modules/branch-manager.test.ts create mode 100644 src/modules/branch-manager.ts create mode 100644 src/modules/failure-handler.test.ts create mode 100644 src/modules/failure-handler.ts create mode 100644 src/modules/loop-orchestrator.ts create mode 100644 src/modules/phase-scheduler.test.ts create mode 100644 src/modules/phase-scheduler.ts create mode 100644 src/modules/project-state-client.test.ts create mode 100644 src/modules/project-state-client.ts create mode 100644 src/modules/prompt-builder.test.ts create mode 100644 src/modules/prompt-builder.ts create mode 100644 src/types.ts create mode 100644 tsconfig.json create mode 100644 vitest.config.ts diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..368543d --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +node_modules +dist +.DS_Store +*.log +.env +.env.local +coverage +.vitest-cache diff --git a/README.md b/README.md new file mode 100644 index 0000000..d83aa28 --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +# agentic-toolkit + +Domna's agentic toolkit. Two things in one repo: + +1. **A curated, version-pinned skill set** for Claude Code (Matt Pocock's skills + Domna's own), installable into any target repo with one script. +2. **A sandcastle-based runner** that executes a GitHub Project of issues against a target repo, in either per-ticket-PR mode or single-PR mode. + +## Quick start (consume in a target repo) + +From the root of any Domna repo: + +```sh +curl -fsSL https://raw.githubusercontent.com/Hestia-Homes/agentic-toolkit/main/setup.sh | bash +``` + +This installs the curated skills into the repo and writes `skills-lock.json`. Re-run whenever the toolkit bumps its pinned versions. + +After installation, run `/setup-matt-pocock-skills` once per repo to record the issue tracker, triage labels, and domain-doc layout. + +## Running the runner + +The runner is invoked from inside this repo, pointing at a target repo. + +Prerequisites: +- Docker Desktop running on macOS +- `GITHUB_TOKEN` env var with `repo` and `project` scopes +- A GitHub Project (v2) created via `/to-project` (or manually with the same Status schema) + +```sh +git clone https://github.com/Hestia-Homes/agentic-toolkit.git +cd agentic-toolkit +npm install +npm run build + +GITHUB_TOKEN=ghp_xxx \ +GITHUB_VIEWER_LOGIN=KhalimCK \ +node bin/run-sandcastle.js run \ + --project 7 \ + --mode per-ticket \ + --owner Hestia-Homes \ + --repo assessment-model \ + --target-repo ~/Documents/hestia/assessment-model +``` + +Modes: + +- `per-ticket` — one PR per issue, phase-gated. Runner exits between phases; re-run after PRs merge. +- `single-pr` — one PR for the whole DAG. Runner halts on any failure. + +## Workflow + +``` +/grill-me → /to-prd → /to-issues → /to-project → agentic-toolkit run --project N --mode +``` + +`to-project` lives in `skills/engineering/to-project/SKILL.md` and is installed by `setup.sh`. + +## Architecture (modules in `src/modules/`) + +| Module | Role | +|------------------------|------------------------------------------------------------------------------------------------| +| `PhaseScheduler` | Pure: topological sort of `Blocked by` → ordered phases. | +| `PromptBuilder` | Pure: build the per-ticket agent prompt. | +| `FailureHandler` | Pure state machine: retry / skip / halt given variant + retry count. | +| `ProjectStateClient` | GitHub Projects v2 + Issues GraphQL (read state, claim, set status, comment). | +| `BranchManager` | Git + `gh` ops in the target repo (push, open PR). | +| `AgentRunner` | Wraps `sandcastle.run()` with Docker provider and Claude Code agent. | +| `LoopOrchestrator` | Wires the above; runs the per-tick algorithm. | + +### Variant differences + +| Concern | per-ticket | single-pr | +|-----------------------|---------------------------|-----------------------------| +| Branches | one per issue | one per project, reused | +| PRs | one per issue | one for the whole DAG | +| Phase gates | yes (exit between phases) | no (topological order only) | +| HITL mid-run | issue parked; peers continue | runner halts | +| Failure after retry | skip + continue | halt | + +### Project Status field + +`/to-project` configures a single-select `Status` field with these options: + +- `Backlog` — has unmet blockers. +- `Ready` — runner-pickable; AFK with all blockers Done. +- `In progress` — being executed by an agent right now. +- `In review` — PR open, waiting for human merge. +- `Needs human` — failed twice, or HITL. +- `Done` — issue closed (set automatically on PR merge by Projects' built-in workflow). + +## Development + +```sh +npm install +npm test +npm run typecheck +``` + +Pure modules (`PhaseScheduler`, `PromptBuilder`, `FailureHandler`, `ProjectStateClient` parsers) are unit-tested. Integration with sandcastle / git / GraphQL is exercised manually before each release. + +## Out of scope (v1) + +- Remote / parallel runners across machines (local-first). +- Slack / email failure notifications (issue comments only). +- Stacked PRs and phase branches. +- Cross-repo projects. +- Pinning Matt Pocock skills to a specific commit SHA — `setup.sh` tracks HEAD for now; SHA pinning will land when the upstream `skills` CLI supports `repo#sha`. diff --git a/bin/run-sandcastle.js b/bin/run-sandcastle.js new file mode 100755 index 0000000..8fb1272 --- /dev/null +++ b/bin/run-sandcastle.js @@ -0,0 +1,2 @@ +#!/usr/bin/env node +import "../dist/cli.js"; diff --git a/package.json b/package.json new file mode 100644 index 0000000..b51db25 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "@domna/agentic-toolkit", + "version": "0.0.1", + "private": true, + "description": "Domna's agentic toolkit: curated skills + sandcastle-based runner for executing GitHub Project work", + "type": "module", + "engines": { + "node": ">=20" + }, + "bin": { + "agentic-toolkit": "./bin/run-sandcastle.js" + }, + "scripts": { + "build": "tsc -p tsconfig.json", + "start": "tsx src/cli.ts", + "test": "vitest run", + "test:watch": "vitest", + "typecheck": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@ai-hero/sandcastle": "^0.5.7", + "@octokit/graphql": "^8.1.1", + "@octokit/rest": "^21.0.2", + "commander": "^12.1.0", + "execa": "^9.5.1" + }, + "devDependencies": { + "@types/node": "^22.9.0", + "tsx": "^4.19.2", + "typescript": "^5.6.3", + "vitest": "^2.1.5" + } +} diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..9e35f18 --- /dev/null +++ b/setup.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# +# Install Domna's curated skill set into the current repo. +# +# Run from the root of the target repo: +# curl -fsSL https://raw.githubusercontent.com/Hestia-Homes/agentic-toolkit/main/setup.sh | bash +# Or, if you've cloned agentic-toolkit: +# bash /path/to/agentic-toolkit/setup.sh +# +# What this does: +# 1. Adds Matt Pocock's skills (mattpocock/skills) at the version Domna trusts. +# 2. Adds Domna's own skills (Hestia-Homes/agentic-toolkit). +# 3. Writes/updates skills-lock.json so the install is reproducible. +# +# To upgrade Matt's skills across all Domna repos: +# - Bump MATTPOCOCK_REF in this script (agentic-toolkit repo). +# - Devs re-run setup.sh in their target repos. +# +set -euo pipefail + +# --- pinned versions ----------------------------------------------------------- +# Bump these refs in agentic-toolkit when Domna decides to upgrade. Devs in +# target repos pick up the new pins on their next setup.sh run. +MATTPOCOCK_SOURCE="mattpocock/skills" +MATTPOCOCK_REF="" # leave empty to track HEAD; set to a commit SHA to pin + +DOMNA_SOURCE="Hestia-Homes/agentic-toolkit" +DOMNA_REF="" # leave empty to track HEAD; set to a commit SHA to pin + +AGENT_TARGET="claude" # Claude Code agent install layout + +# --- guards -------------------------------------------------------------------- +if ! command -v npx >/dev/null 2>&1; then + echo "error: npx is required (install Node.js >= 20)." >&2 + exit 1 +fi +if [[ ! -d .git ]]; then + echo "error: run this script from the root of a git repository." >&2 + exit 1 +fi + +# --- install ------------------------------------------------------------------- +mattpocock_pkg="$MATTPOCOCK_SOURCE${MATTPOCOCK_REF:+#$MATTPOCOCK_REF}" +domna_pkg="$DOMNA_SOURCE${DOMNA_REF:+#$DOMNA_REF}" + +echo "==> Installing Matt Pocock skills from $mattpocock_pkg" +npx --yes skills@latest add "$mattpocock_pkg" \ + --skill '*' \ + --agent "$AGENT_TARGET" \ + --copy \ + --yes + +echo "==> Installing Domna skills from $domna_pkg" +npx --yes skills@latest add "$domna_pkg" \ + --skill '*' \ + --agent "$AGENT_TARGET" \ + --copy \ + --yes + +# --- post-install reminder ----------------------------------------------------- +cat <<'EOF' + +==> Done. Next steps: + + 1. Run /setup-matt-pocock-skills (one-time per repo) to record the issue + tracker, triage labels, and domain-doc layout. + + 2. Commit the skills-lock.json + .claude/skills/ (or whichever directory + the installer wrote to) so teammates can reproduce. + + 3. Re-run setup.sh whenever agentic-toolkit bumps its pinned versions. + +EOF diff --git a/skills/engineering/to-project/SKILL.md b/skills/engineering/to-project/SKILL.md new file mode 100644 index 0000000..fd1a3c6 --- /dev/null +++ b/skills/engineering/to-project/SKILL.md @@ -0,0 +1,78 @@ +--- +name: to-project +description: Create a GitHub Project (v2) for the issues just produced by /to-issues, configure the Status field schema the agentic-toolkit runner expects, and set initial statuses (AFK no-blockers → Ready, AFK with blockers → Backlog, HITL → Needs human). Use after /to-issues to make a project the runner can execute against. +--- + +# To Project + +Create a repo-level GitHub Project (v2) from the issues `/to-issues` just produced, and prepare it so the Domna agentic-toolkit runner can execute against it without further configuration. + +The issue tracker and issue numbers should already be known from the conversation context (the most recent `/to-issues` run). If they aren't, ask the user for the issue numbers explicitly. + +## Process + +### 1. Gather context + +Confirm with the user: + +- **Project name** — defaults to the PRD title if a PRD is in context. +- **Project description** — defaults to a one-line summary of the PRD. +- **Repo** — the repo that owns the project. Defaults to the current `gh` repo. +- **Issue set** — the issue numbers to add. Defaults to issues created in the most recent `/to-issues` run. + +### 2. Create the project + +Use `gh project create` (or the GraphQL `createProjectV2` mutation) at the repo level. Project visibility defaults to whatever the repo's default is. Capture the project number and node id from the response. + +### 3. Configure the Status field + +The runner requires a single-select field named **`Status`** with these options, in this order: + +1. `Backlog` +2. `Ready` +3. `In progress` +4. `In review` +5. `Needs human` +6. `Done` + +If the project came with a default `Status` field, replace its options to match exactly — extra options are fine; missing ones must be added. If no `Status` field exists, create one. + +### 4. Add issues to the project + +For each issue in the issue set, add it to the project as an item. + +### 5. Set initial statuses + +For each issue, decide the starting status based on its labels and `Blocked by` references: + +| Issue properties | Starting status | +|-------------------------------------------------|------------------| +| Has `hitl` or `ready-for-human` label | `Needs human` | +| Has `Blocked by` references to issues *in this set* | `Backlog` | +| Otherwise (AFK, no remaining blockers) | `Ready` | + +Apply the status via the `updateProjectV2ItemFieldValue` mutation. + +### 6. Comment on each issue + +For each issue in the project, post a comment linking back to the project URL so the issue page shows the project context. One sentence is enough; example: + +> Tracked in project: + +### 7. Confirm to the user + +Print: + +- Project URL +- Number of issues added, broken down by initial status (e.g. "3 Ready, 2 Backlog, 1 Needs human") +- Suggested next command, e.g.: + ``` + cd path/to/agentic-toolkit + GITHUB_TOKEN=... npx . run --project --mode per-ticket --owner --repo --target-repo + ``` + +## Notes + +- The project is repo-level, not org-level, by design (smaller blast radius, simpler permissions). If you need a cross-repo project, do it manually for now. +- Projects created by this skill are intended to be short-lived: scoped to one PRD's worth of work, archived or deleted on completion. +- The runner's `setStatus` calls assume the option names listed above. Don't rename or translate them. diff --git a/src/cli.ts b/src/cli.ts new file mode 100644 index 0000000..7dc0c57 --- /dev/null +++ b/src/cli.ts @@ -0,0 +1,95 @@ +import { graphql } from "@octokit/graphql"; +import { Command } from "commander"; +import { resolve } from "node:path"; +import { AgentRunner } from "./modules/agent-runner.js"; +import { BranchManager } from "./modules/branch-manager.js"; +import { LoopOrchestrator } from "./modules/loop-orchestrator.js"; +import { ProjectStateClient } from "./modules/project-state-client.js"; +import type { Variant } from "./types.js"; + +const program = new Command(); + +program + .name("agentic-toolkit") + .description("Domna agentic toolkit — sandcastle-driven runner") + .version("0.0.1"); + +program + .command("run") + .description("Execute a GitHub Project of issues against a target repo.") + .requiredOption("--project ", "GitHub Project (v2) number", parseIntStrict) + .requiredOption( + "--mode ", + "Execution mode: per-ticket | single-pr", + parseVariant, + ) + .requiredOption("--owner ", "Owner login of the project repo") + .requiredOption("--repo ", "Repo name that owns the project") + .option( + "--target-repo ", + "Path to the target repo to operate on. Defaults to cwd.", + process.cwd(), + ) + .option( + "--base-branch ", + "Branch to base new work on. Defaults to current HEAD of the target repo.", + ) + .action(async (opts) => { + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + console.error( + "Set GITHUB_TOKEN in the environment (a PAT with repo + project scopes).", + ); + process.exit(2); + } + + const targetRepoPath = resolve(opts.targetRepo); + const authedGraphql = graphql.defaults({ + headers: { authorization: `token ${githubToken}` }, + }); + + const stateClient = new ProjectStateClient( + authedGraphql, + opts.owner, + opts.repo, + ); + const branchManager = new BranchManager({ targetRepoPath }); + const agentRunner = new AgentRunner({ targetRepoPath }); + + const orchestrator = new LoopOrchestrator( + { + projectNumber: opts.project, + variant: opts.mode, + targetRepoPath, + baseBranch: opts.baseBranch, + githubToken, + ownerLogin: opts.owner, + repo: opts.repo, + }, + { stateClient, agentRunner, branchManager }, + ); + + try { + const outcome = await orchestrator.run(); + console.log(outcome.message); + if (outcome.kind === "halted") process.exit(1); + } catch (err) { + console.error(err instanceof Error ? err.message : err); + process.exit(1); + } + }); + +function parseIntStrict(value: string): number { + const n = Number.parseInt(value, 10); + if (!Number.isFinite(n)) throw new Error(`Not a number: ${value}`); + return n; +} + +function parseVariant(value: string): Variant { + if (value !== "per-ticket" && value !== "single-pr") { + throw new Error(`Mode must be 'per-ticket' or 'single-pr', got: ${value}`); + } + return value; +} + +program.parseAsync(process.argv); diff --git a/src/modules/agent-runner.ts b/src/modules/agent-runner.ts new file mode 100644 index 0000000..d845a35 --- /dev/null +++ b/src/modules/agent-runner.ts @@ -0,0 +1,91 @@ +import { run as sandcastleRun, type RunResult } from "@ai-hero/sandcastle"; +import { claudeCode } from "@ai-hero/sandcastle"; +import docker from "@ai-hero/sandcastle/sandboxes/docker"; +import type { AgentResult, FailureKind } from "../types.js"; + +export interface AgentRunnerOptions { + targetRepoPath: string; + imageName?: string; + model?: string; + idleTimeoutSeconds?: number; +} + +const DEFAULT_IMAGE = "sandcastle:agentic-toolkit"; +const DEFAULT_MODEL = "claude-opus-4-7"; +const DEFAULT_IDLE_TIMEOUT = 900; + +/** + * Wraps `sandcastle.run()` with Domna's defaults: Docker provider, claudeCode + * agent, named-branch strategy. The branch is created on top of `baseBranch` + * if it doesn't exist; otherwise commits land on top of the existing branch + * (single-pr variant reuses one branch across calls). + */ +export class AgentRunner { + constructor(private readonly opts: AgentRunnerOptions) {} + + async runTicket(args: { + prompt: string; + branch: string; + baseBranch: string; + }): Promise { + try { + const result = await sandcastleRun({ + agent: claudeCode(this.opts.model ?? DEFAULT_MODEL), + sandbox: docker({ + imageName: this.opts.imageName ?? DEFAULT_IMAGE, + }), + cwd: this.opts.targetRepoPath, + prompt: args.prompt, + branchStrategy: { + type: "branch", + branch: args.branch, + baseBranch: args.baseBranch, + }, + idleTimeoutSeconds: + this.opts.idleTimeoutSeconds ?? DEFAULT_IDLE_TIMEOUT, + logging: { type: "stdout" }, + }); + + if (result.commits.length === 0) { + return { + success: false, + failure: { + kind: "agent-error", + logs: "Agent produced no commits.", + }, + result, + }; + } + + return { success: true, result }; + } catch (err) { + const kind = classifyFailure(err); + return { + success: false, + failure: { + kind, + logs: err instanceof Error ? err.stack ?? err.message : String(err), + }, + }; + } + } +} + +function classifyFailure(err: unknown): FailureKind { + if (err instanceof Error) { + const msg = err.message.toLowerCase(); + if (msg.includes("timeout") || msg.includes("timed out")) { + return "sandbox-timeout"; + } + if (msg.includes("test") && (msg.includes("fail") || msg.includes("red"))) { + return "tests-failed"; + } + if (msg.includes("build")) { + return "build-failed"; + } + if (msg.includes("agent")) { + return "agent-error"; + } + } + return "unknown"; +} diff --git a/src/modules/branch-manager.test.ts b/src/modules/branch-manager.test.ts new file mode 100644 index 0000000..e4e9bc7 --- /dev/null +++ b/src/modules/branch-manager.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from "vitest"; +import type { ProjectIssue } from "../types.js"; +import { branchNameFor, slugify } from "./branch-manager.js"; + +const issue = (n: number, title: string): ProjectIssue => ({ + number: n, + nodeId: `node-${n}`, + title, + body: "", + kind: "AFK", + status: "Ready", + blockedBy: [], +}); + +describe("slugify", () => { + it("lowercases and replaces non-alphanumerics with single dashes", () => { + expect(slugify("Wire up Assessment Export!")).toBe( + "wire-up-assessment-export", + ); + }); + + it("trims leading/trailing dashes", () => { + expect(slugify("---hello---")).toBe("hello"); + }); + + it("caps length at 50 characters", () => { + const long = "a".repeat(80); + expect(slugify(long).length).toBe(50); + }); +}); + +describe("branchNameFor", () => { + it("returns one branch per project for single-pr", () => { + const a = branchNameFor("single-pr", "auth-rewrite", issue(1, "First")); + const b = branchNameFor("single-pr", "auth-rewrite", issue(2, "Second")); + expect(a).toBe("claude/auth-rewrite"); + expect(b).toBe("claude/auth-rewrite"); + }); + + it("returns per-issue branches for per-ticket", () => { + expect( + branchNameFor("per-ticket", "auth", issue(42, "Wire up Auth Middleware")), + ).toBe("claude/auth/42-wire-up-auth-middleware"); + }); +}); diff --git a/src/modules/branch-manager.ts b/src/modules/branch-manager.ts new file mode 100644 index 0000000..789ab05 --- /dev/null +++ b/src/modules/branch-manager.ts @@ -0,0 +1,102 @@ +import { execa } from "execa"; +import type { ProjectIssue, Variant } from "../types.js"; + +const slugRegex = /[^a-z0-9]+/g; + +export function slugify(title: string): string { + return title + .toLowerCase() + .replace(slugRegex, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 50); +} + +export function branchNameFor( + variant: Variant, + projectSlug: string, + issue: ProjectIssue, +): string { + if (variant === "single-pr") { + return `claude/${projectSlug}`; + } + return `claude/${projectSlug}/${issue.number}-${slugify(issue.title)}`; +} + +export interface BranchManagerOptions { + targetRepoPath: string; +} + +/** + * Thin wrapper around git + gh in the target repo. Sandcastle handles branch + * creation and commit merge-back via its branch strategy, so this module + * exists for the host-side operations the runner needs around it: detect the + * launch HEAD (used as baseBranch), push, and open PRs. + */ +export class BranchManager { + constructor(private readonly opts: BranchManagerOptions) {} + + private async run( + bin: string, + args: string[], + ): Promise<{ stdout: string; stderr: string }> { + return execa(bin, args, { cwd: this.opts.targetRepoPath }); + } + + async detectCurrentBranch(): Promise { + const { stdout } = await this.run("git", [ + "rev-parse", + "--abbrev-ref", + "HEAD", + ]); + return stdout.trim(); + } + + async hasLocalBranch(branch: string): Promise { + try { + await this.run("git", [ + "show-ref", + "--verify", + "--quiet", + `refs/heads/${branch}`, + ]); + return true; + } catch { + return false; + } + } + + async push(branch: string): Promise { + await this.run("git", ["push", "-u", "origin", branch]); + } + + async commitsBetween(base: string, branch: string): Promise { + const { stdout } = await this.run("git", [ + "rev-list", + "--count", + `${base}..${branch}`, + ]); + return Number.parseInt(stdout.trim(), 10) || 0; + } + + async openPullRequest(opts: { + title: string; + body: string; + base: string; + head: string; + }): Promise<{ url: string }> { + const { stdout } = await this.run("gh", [ + "pr", + "create", + "--title", + opts.title, + "--body", + opts.body, + "--base", + opts.base, + "--head", + opts.head, + ]); + const url = stdout.trim().split("\n").pop() ?? ""; + return { url }; + } +} diff --git a/src/modules/failure-handler.test.ts b/src/modules/failure-handler.test.ts new file mode 100644 index 0000000..6ecf8a2 --- /dev/null +++ b/src/modules/failure-handler.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vitest"; +import type { FailureKind, Variant } from "../types.js"; +import { decide } from "./failure-handler.js"; + +const FAILURES: FailureKind[] = [ + "agent-error", + "tests-failed", + "build-failed", + "sandbox-timeout", + "unknown", +]; +const VARIANTS: Variant[] = ["per-ticket", "single-pr"]; + +describe("decide", () => { + describe("first failure", () => { + for (const variant of VARIANTS) { + for (const failure of FAILURES) { + it(`retries on first failure (variant=${variant}, kind=${failure})`, () => { + const action = decide(failure, { variant, retryCount: 0 }); + expect(action.kind).toBe("retry"); + }); + } + } + }); + + describe("after retry", () => { + for (const failure of FAILURES) { + it(`per-ticket variant skips after second failure (kind=${failure})`, () => { + const action = decide(failure, { + variant: "per-ticket", + retryCount: 1, + }); + expect(action.kind).toBe("skip"); + if (action.kind === "skip") { + expect(action.reason).toContain(failure); + } + }); + + it(`single-pr variant halts after second failure (kind=${failure})`, () => { + const action = decide(failure, { + variant: "single-pr", + retryCount: 1, + }); + expect(action.kind).toBe("halt"); + if (action.kind === "halt") { + expect(action.reason).toContain(failure); + } + }); + } + }); + + it("halts/skips on third+ failure too (defensive)", () => { + expect( + decide("unknown", { variant: "per-ticket", retryCount: 2 }).kind, + ).toBe("skip"); + expect(decide("unknown", { variant: "single-pr", retryCount: 5 }).kind).toBe( + "halt", + ); + }); +}); diff --git a/src/modules/failure-handler.ts b/src/modules/failure-handler.ts new file mode 100644 index 0000000..9ec96c1 --- /dev/null +++ b/src/modules/failure-handler.ts @@ -0,0 +1,30 @@ +import type { FailureAction, FailureContext, FailureKind } from "../types.js"; + +const MAX_RETRIES = 1; + +/** + * Decide what to do when a ticket execution fails. Pure state machine. + * + * Rules: + * - First failure (any kind, any variant): retry once. + * - Second failure on per-ticket variant: skip this ticket, continue with peers + * in the same phase. The phase gate naturally blocks advance until the human + * resolves the failed issue. + * - Second failure on single-pr variant: halt. The shared branch is corrupted + * and continuing would compound the damage. + */ +export function decide( + failure: FailureKind, + ctx: FailureContext, +): FailureAction { + if (ctx.retryCount < MAX_RETRIES) { + return { kind: "retry" }; + } + + const reason = `Ticket failed after ${ctx.retryCount + 1} attempt(s) with: ${failure}`; + + if (ctx.variant === "per-ticket") { + return { kind: "skip", reason }; + } + return { kind: "halt", reason }; +} diff --git a/src/modules/loop-orchestrator.ts b/src/modules/loop-orchestrator.ts new file mode 100644 index 0000000..25bd5e0 --- /dev/null +++ b/src/modules/loop-orchestrator.ts @@ -0,0 +1,389 @@ +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import type { + IssueStatus, + Phase, + ProjectIssue, + ProjectState, + RepoContext, + RunnerOptions, + Variant, +} from "../types.js"; +import type { AgentRunner } from "./agent-runner.js"; +import { BranchManager, branchNameFor } from "./branch-manager.js"; +import { decide } from "./failure-handler.js"; +import { currentPhase, schedule } from "./phase-scheduler.js"; +import { + type ProjectFieldRefs, + type ProjectStateClient, +} from "./project-state-client.js"; +import { build as buildPrompt } from "./prompt-builder.js"; + +export interface LoopDeps { + stateClient: ProjectStateClient; + agentRunner: AgentRunner; + branchManager: BranchManager; +} + +export interface LoopOutcome { + kind: + | "phase-complete" + | "all-done" + | "halted" + | "halted-no-ready-issues"; + message: string; + prsOpened: string[]; +} + +const PROJECT_SLUG_FALLBACK = "project"; + +export class LoopOrchestrator { + constructor( + private readonly opts: RunnerOptions, + private readonly deps: LoopDeps, + ) {} + + async run(): Promise { + const baseBranch = + this.opts.baseBranch ?? + (await this.deps.branchManager.detectCurrentBranch()); + + const { state, refs } = + await this.deps.stateClient.readProjectState(this.opts.projectNumber); + + const projectSlug = projectSlugFrom(state) ?? PROJECT_SLUG_FALLBACK; + const repoContext = detectRepoContext(this.opts.targetRepoPath); + + let workingState = await this.autopromoteAfk(state, refs); + + const phases = schedule(workingState.issues); + const phase = currentPhase(phases); + if (!phase) { + return { + kind: "all-done", + message: "All issues are Done. Nothing to do.", + prsOpened: [], + }; + } + + const prsOpened: string[] = []; + + while (true) { + workingState = await this.refresh(refs); + const refreshedPhases = schedule(workingState.issues); + const refreshedPhase = currentPhase(refreshedPhases); + + if (!refreshedPhase) { + return { + kind: "all-done", + message: "All issues are Done.", + prsOpened, + }; + } + if (refreshedPhase.index !== phase.index) { + return { + kind: "phase-complete", + message: phaseCompleteMessage(phase, prsOpened), + prsOpened, + }; + } + + const next = pickNextReady(refreshedPhase); + if (!next) { + if (this.opts.variant === "single-pr") { + return await this.finaliseSinglePr( + workingState, + phase, + projectSlug, + baseBranch, + prsOpened, + ); + } + return { + kind: "phase-complete", + message: phaseCompleteMessage(phase, prsOpened), + prsOpened, + }; + } + + const result = await this.executeTicket({ + issue: next, + refs, + repoContext, + projectSlug, + baseBranch, + }); + + if (result.kind === "halt") { + return { + kind: "halted", + message: result.reason, + prsOpened, + }; + } + if (result.kind === "skipped") { + continue; + } + if (result.kind === "merged-into-shared-branch") { + continue; + } + if (result.kind === "pr-opened" && result.prUrl) { + prsOpened.push(result.prUrl); + } + } + } + + private async finaliseSinglePr( + state: ProjectState, + phase: Phase, + projectSlug: string, + baseBranch: string, + prsOpened: string[], + ): Promise { + const branch = branchNameFor("single-pr", projectSlug, phase.issues[0]!); + const commits = await this.deps.branchManager + .commitsBetween(baseBranch, branch) + .catch(() => 0); + if (commits === 0) { + return { + kind: "halted-no-ready-issues", + message: "No commits to PR. Nothing was implemented.", + prsOpened, + }; + } + + await this.deps.branchManager.push(branch); + const issuesInPR = state.issues.filter((i) => i.status !== "Done"); + const pr = await this.deps.branchManager.openPullRequest({ + title: `Implement project #${state.projectNumber}`, + body: singlePrBody(state, issuesInPR), + base: baseBranch, + head: branch, + }); + for (const issue of issuesInPR) { + await this.setStatus(issue, "In review"); + } + return { + kind: "all-done", + message: `Single PR opened: ${pr.url}`, + prsOpened: [...prsOpened, pr.url], + }; + } + + private async executeTicket(args: { + issue: ProjectIssue; + refs: ProjectFieldRefs; + repoContext: RepoContext; + projectSlug: string; + baseBranch: string; + }): Promise< + | { kind: "pr-opened"; prUrl: string } + | { kind: "merged-into-shared-branch" } + | { kind: "skipped" } + | { kind: "halt"; reason: string } + > { + const { issue, refs, repoContext, projectSlug, baseBranch } = args; + + await this.claim(issue); + + const branch = branchNameFor(this.opts.variant, projectSlug, issue); + const prompt = buildPrompt({ issue, repoContext }); + + let retryCount = 0; + while (true) { + const result = await this.deps.agentRunner.runTicket({ + prompt, + branch, + baseBranch, + }); + if (result.success) break; + + const action = decide(result.failure!.kind, { + variant: this.opts.variant, + retryCount, + }); + + if (action.kind === "retry") { + retryCount++; + continue; + } + if (action.kind === "skip") { + await this.deps.stateClient.postIssueComment( + issue.nodeId, + failureCommentBody(action.reason, result.failure!.logs), + ); + await this.setStatus(issue, "Needs human", refs); + return { kind: "skipped" }; + } + // halt + await this.deps.stateClient.postIssueComment( + issue.nodeId, + failureCommentBody(action.reason, result.failure!.logs), + ); + await this.setStatus(issue, "Needs human", refs); + return { kind: "halt", reason: action.reason }; + } + + if (this.opts.variant === "single-pr") { + return { kind: "merged-into-shared-branch" }; + } + + await this.deps.branchManager.push(branch); + const pr = await this.deps.branchManager.openPullRequest({ + title: `${issue.title} (#${issue.number})`, + body: perTicketPrBody(issue), + base: baseBranch, + head: branch, + }); + await this.setStatus(issue, "In review", refs); + return { kind: "pr-opened", prUrl: pr.url }; + } + + private async claim(issue: ProjectIssue): Promise { + const refs = await this.deps.stateClient + .readProjectState(this.opts.projectNumber) + .then((r) => r.refs); + await this.setStatus(issue, "In progress", refs); + const me = await this.detectViewer(); + if (me) { + await this.deps.stateClient.assignIssue(issue.nodeId, me).catch(() => { + /* assignment is best-effort */ + }); + } + } + + private async detectViewer(): Promise { + return process.env.GITHUB_VIEWER_LOGIN; + } + + private async refresh(_refs: ProjectFieldRefs): Promise { + const { state, refs } = await this.deps.stateClient.readProjectState( + this.opts.projectNumber, + ); + return await this.autopromoteAfk(state, refs); + } + + private async autopromoteAfk( + state: ProjectState, + refs: ProjectFieldRefs, + ): Promise { + const doneNumbers = new Set( + state.issues.filter((i) => i.status === "Done").map((i) => i.number), + ); + const updates: ProjectIssue[] = []; + for (const issue of state.issues) { + const blockersResolved = issue.blockedBy.every( + (b) => + doneNumbers.has(b) || + !state.issues.find((i) => i.number === b), + ); + if ( + issue.kind === "AFK" && + issue.status === "Backlog" && + blockersResolved + ) { + await this.setStatus(issue, "Ready", refs); + updates.push({ ...issue, status: "Ready" }); + } else { + updates.push(issue); + } + } + return { ...state, issues: updates }; + } + + private async setStatus( + issue: ProjectIssue, + status: IssueStatus, + refs?: ProjectFieldRefs, + ): Promise { + const r = + refs ?? + (await this.deps.stateClient.readProjectState(this.opts.projectNumber)) + .refs; + const itemId = await this.findItemIdForIssue(issue); + if (!itemId) return; + const optionId = r.statusOptionIds[status]; + await this.deps.stateClient.setStatus( + r.projectId, + itemId, + r.statusFieldId, + optionId, + ); + } + + private async findItemIdForIssue( + issue: ProjectIssue, + ): Promise { + return issue.itemId; + } +} + +function pickNextReady(phase: Phase): ProjectIssue | undefined { + return phase.issues + .filter((i) => i.status === "Ready" && i.kind === "AFK") + .sort((a, b) => a.number - b.number)[0]; +} + +function projectSlugFrom(state: ProjectState): string | undefined { + return `${state.repo}-p${state.projectNumber}`; +} + +function detectRepoContext(targetRepoPath: string): RepoContext { + const ctx: RepoContext = {}; + if (existsSync(join(targetRepoPath, "CONTEXT.md"))) { + ctx.contextMdPath = "CONTEXT.md"; + } + if (existsSync(join(targetRepoPath, "docs", "adr"))) { + ctx.adrDirPath = "docs/adr"; + } + return ctx; +} + +function failureCommentBody(reason: string, logs: string): string { + const trimmed = logs.length > 6000 ? logs.slice(-6000) : logs; + return [ + `### Automated run failed`, + "", + reason, + "", + "
Logs (last 6k chars)", + "", + "```", + trimmed, + "```", + "", + "
", + ].join("\n"); +} + +function perTicketPrBody(issue: ProjectIssue): string { + return [ + `Closes #${issue.number}`, + "", + "Implemented by Domna agentic-toolkit.", + "", + "Review: confirm acceptance criteria satisfied; rebase on top of `main` if conflicts.", + ].join("\n"); +} + +function singlePrBody(state: ProjectState, issues: ProjectIssue[]): string { + const closes = issues.map((i) => `Closes #${i.number}`).join("\n"); + return [ + `Bundled implementation for project #${state.projectNumber}.`, + "", + closes, + "", + "Implemented by Domna agentic-toolkit (single-pr mode).", + ].join("\n"); +} + +function phaseCompleteMessage(phase: Phase, prs: string[]): string { + const list = prs.length === 0 ? "(no new PRs)" : prs.map((u) => `- ${u}`).join("\n"); + return [ + `Phase ${phase.index} complete. Review and merge the PRs below before re-running.`, + "", + list, + ].join("\n"); +} + +export type { Variant }; diff --git a/src/modules/phase-scheduler.test.ts b/src/modules/phase-scheduler.test.ts new file mode 100644 index 0000000..0acf4f5 --- /dev/null +++ b/src/modules/phase-scheduler.test.ts @@ -0,0 +1,122 @@ +import { describe, expect, it } from "vitest"; +import type { ProjectIssue } from "../types.js"; +import { currentPhase, schedule } from "./phase-scheduler.js"; + +const issue = ( + n: number, + blockedBy: number[] = [], + overrides: Partial = {}, +): ProjectIssue => ({ + number: n, + nodeId: `node-${n}`, + title: `Issue ${n}`, + body: "", + kind: "AFK", + status: "Ready", + blockedBy, + ...overrides, +}); + +describe("schedule", () => { + it("puts unblocked issues in phase 0", () => { + const phases = schedule([issue(1), issue(2), issue(3)]); + expect(phases).toHaveLength(1); + expect(phases[0]?.issues.map((i) => i.number)).toEqual([1, 2, 3]); + }); + + it("orders a linear chain into one phase per node", () => { + const phases = schedule([ + issue(1), + issue(2, [1]), + issue(3, [2]), + ]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [1], + [2], + [3], + ]); + }); + + it("groups fan-out children into the same phase", () => { + const phases = schedule([ + issue(1), + issue(2, [1]), + issue(3, [1]), + issue(4, [1]), + ]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [1], + [2, 3, 4], + ]); + }); + + it("handles a diamond DAG", () => { + const phases = schedule([ + issue(1), + issue(2, [1]), + issue(3, [1]), + issue(4, [2, 3]), + ]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [1], + [2, 3], + [4], + ]); + }); + + it("treats blockers outside the input set as satisfied", () => { + const phases = schedule([issue(2, [99]), issue(3, [2])]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [2], + [3], + ]); + }); + + it("excludes Done issues from phases but keeps their blockers satisfied", () => { + const phases = schedule([ + issue(1, [], { status: "Done" }), + issue(2, [1]), + issue(3, [2]), + ]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [2], + [3], + ]); + }); + + it("partitions disconnected components in parallel", () => { + const phases = schedule([ + issue(1), + issue(2, [1]), + issue(10), + issue(11, [10]), + ]); + expect(phases.map((p) => p.issues.map((i) => i.number))).toEqual([ + [1, 10], + [2, 11], + ]); + }); + + it("throws on a cycle", () => { + expect(() => schedule([issue(1, [2]), issue(2, [1])])).toThrow(/Cycle/); + }); +}); + +describe("currentPhase", () => { + it("returns the first phase containing any non-Done issue", () => { + const phases = schedule([ + issue(1, [], { status: "Done" }), + issue(2, [1], { status: "In progress" }), + issue(3, [2]), + ]); + expect(currentPhase(phases)?.issues.map((i) => i.number)).toEqual([2]); + }); + + it("returns undefined when all phases are Done", () => { + const phases = schedule([ + issue(1, [], { status: "Done" }), + issue(2, [1], { status: "Done" }), + ]); + expect(currentPhase(phases)).toBeUndefined(); + }); +}); diff --git a/src/modules/phase-scheduler.ts b/src/modules/phase-scheduler.ts new file mode 100644 index 0000000..dc66ab1 --- /dev/null +++ b/src/modules/phase-scheduler.ts @@ -0,0 +1,84 @@ +import type { Phase, ProjectIssue } from "../types.js"; + +export interface ScheduleOptions { + /** + * If true, issues already `Done` are excluded from phases entirely (their + * blockers are still considered satisfied for downstream issues). Default true. + */ + excludeDone?: boolean; +} + +/** + * Topologically partition the issue set into phases. + * + * Phase 0 = issues with no remaining blockers (after `Done` issues are + * resolved). Phase N = issues whose every blocker is in some phase < N. + * + * Issues referenced by `blockedBy` that don't exist in the input set are + * treated as already satisfied (they're outside this project's scope). + * + * Throws if the graph contains a cycle. + */ +export function schedule( + issues: ProjectIssue[], + options: ScheduleOptions = {}, +): Phase[] { + const { excludeDone = true } = options; + const byNumber = new Map(); + for (const i of issues) byNumber.set(i.number, i); + + const remaining = new Map(); + for (const i of issues) { + if (excludeDone && i.status === "Done") continue; + remaining.set(i.number, i); + } + + const phaseOf = new Map(); + for (const i of issues) { + if (i.status === "Done") phaseOf.set(i.number, -1); + } + + const phases: Phase[] = []; + let phaseIndex = 0; + + while (remaining.size > 0) { + const ready: ProjectIssue[] = []; + for (const issue of remaining.values()) { + const allBlockersResolved = issue.blockedBy.every((b) => { + const blocker = byNumber.get(b); + if (!blocker) return true; + const p = phaseOf.get(b); + return p !== undefined && p < phaseIndex; + }); + if (allBlockersResolved) ready.push(issue); + } + + if (ready.length === 0) { + const stuck = [...remaining.keys()].join(", "); + throw new Error( + `Cycle detected or unresolvable blocker chain among issues: ${stuck}`, + ); + } + + ready.sort((a, b) => a.number - b.number); + phases.push({ index: phaseIndex, issues: ready }); + for (const issue of ready) { + phaseOf.set(issue.number, phaseIndex); + remaining.delete(issue.number); + } + phaseIndex++; + } + + return phases; +} + +/** + * The "current" phase is the lowest-indexed phase that still has any + * non-`Done` work left. Returns undefined if every phase is fully complete. + */ +export function currentPhase(phases: Phase[]): Phase | undefined { + for (const phase of phases) { + if (phase.issues.some((i) => i.status !== "Done")) return phase; + } + return undefined; +} diff --git a/src/modules/project-state-client.test.ts b/src/modules/project-state-client.test.ts new file mode 100644 index 0000000..29cde49 --- /dev/null +++ b/src/modules/project-state-client.test.ts @@ -0,0 +1,180 @@ +import { describe, expect, it } from "vitest"; +import { + detectKindFromLabels, + parseBlockedByFromBody, + parseProjectResponse, +} from "./project-state-client.js"; + +describe("detectKindFromLabels", () => { + it("returns AFK for an empty label set", () => { + expect(detectKindFromLabels([])).toBe("AFK"); + }); + + it("returns HITL when a `hitl` label is present", () => { + expect(detectKindFromLabels(["bug", "hitl"])).toBe("HITL"); + }); + + it("recognises `ready-for-human` as HITL", () => { + expect(detectKindFromLabels(["ready-for-human"])).toBe("HITL"); + }); + + it("is case-insensitive", () => { + expect(detectKindFromLabels(["HITL"])).toBe("HITL"); + }); +}); + +describe("parseBlockedByFromBody", () => { + it("returns [] when there is no Blocked by section", () => { + expect(parseBlockedByFromBody("## What to build\nDo a thing")).toEqual([]); + }); + + it("extracts issue numbers from a Blocked by section", () => { + const body = [ + "## What to build", + "stuff", + "", + "## Blocked by", + "- #5", + "- #12", + "", + "## Acceptance criteria", + "- [ ] #99 should not count (different section)", + ].join("\n"); + expect(parseBlockedByFromBody(body)).toEqual([5, 12]); + }); + + it("dedupes and sorts", () => { + const body = "## Blocked by\n- #7\n- #3\n- #7\n"; + expect(parseBlockedByFromBody(body)).toEqual([3, 7]); + }); + + it("falls back to whole-body scan when no Blocked by header is present", () => { + expect(parseBlockedByFromBody("Refs #42")).toEqual([42]); + }); +}); + +describe("parseProjectResponse", () => { + const baseRaw = (overrides: Partial<{ items: any[] }> = {}) => ({ + repository: { + projectV2: { + id: "PROJECT_ID", + title: "Test", + field: { + id: "STATUS_FIELD_ID", + name: "Status", + options: [ + { id: "OPT_BACKLOG", name: "Backlog" }, + { id: "OPT_READY", name: "Ready" }, + { id: "OPT_IN_PROGRESS", name: "In progress" }, + { id: "OPT_IN_REVIEW", name: "In review" }, + { id: "OPT_NEEDS_HUMAN", name: "Needs human" }, + { id: "OPT_DONE", name: "Done" }, + ], + }, + items: { nodes: overrides.items ?? [] }, + }, + }, + }); + + const issueItem = (n: number, status: string, labels: string[] = []) => ({ + id: `ITEM_${n}`, + fieldValues: { + nodes: [{ name: status, field: { name: "Status" } }], + }, + content: { + id: `ISSUE_${n}`, + number: n, + title: `Issue ${n}`, + body: `body ${n}`, + assignees: { nodes: [] }, + labels: { nodes: labels.map((l) => ({ name: l })) }, + }, + }); + + it("throws when the project is missing", () => { + expect(() => + parseProjectResponse( + { repository: { projectV2: null } } as any, + "Hestia-Homes", + "agentic-toolkit", + 99, + ), + ).toThrow(/not found/); + }); + + it("throws when the Status field is missing", () => { + const raw = baseRaw(); + raw.repository.projectV2.field = null as any; + expect(() => + parseProjectResponse(raw as any, "Hestia-Homes", "agentic-toolkit", 1), + ).toThrow(/missing the required "Status"/); + }); + + it("throws when a required Status option is missing", () => { + const raw = baseRaw(); + raw.repository.projectV2.field!.options = raw.repository.projectV2.field!.options.filter( + (o) => o.name !== "Ready", + ); + expect(() => + parseProjectResponse(raw as any, "Hestia-Homes", "agentic-toolkit", 1), + ).toThrow(/Ready/); + }); + + it("maps issues with status, labels, and refs", () => { + const raw = baseRaw({ + items: [ + issueItem(1, "Ready", []), + issueItem(2, "Backlog", ["hitl"]), + ], + }); + const { state, refs } = parseProjectResponse( + raw as any, + "Hestia-Homes", + "agentic-toolkit", + 1, + ); + expect(state.issues).toHaveLength(2); + expect(state.issues[0]).toMatchObject({ + number: 1, + kind: "AFK", + status: "Ready", + }); + expect(state.issues[1]).toMatchObject({ + number: 2, + kind: "HITL", + status: "Backlog", + }); + expect(refs.statusFieldId).toBe("STATUS_FIELD_ID"); + expect(refs.statusOptionIds.Ready).toBe("OPT_READY"); + expect(refs.statusOptionIds.Done).toBe("OPT_DONE"); + }); + + it("defaults to Backlog when an item has no Status field value", () => { + const noStatus = issueItem(1, "Backlog"); + noStatus.fieldValues.nodes = []; + const raw = baseRaw({ items: [noStatus] }); + const { state } = parseProjectResponse( + raw as any, + "Hestia-Homes", + "agentic-toolkit", + 1, + ); + expect(state.issues[0]?.status).toBe("Backlog"); + }); + + it("skips non-Issue items (PRs, draft items)", () => { + const raw = baseRaw({ + items: [ + issueItem(1, "Ready"), + { id: "DRAFT", fieldValues: { nodes: [] }, content: {} }, + ], + }); + const { state } = parseProjectResponse( + raw as any, + "Hestia-Homes", + "agentic-toolkit", + 1, + ); + expect(state.issues).toHaveLength(1); + }); +}); diff --git a/src/modules/project-state-client.ts b/src/modules/project-state-client.ts new file mode 100644 index 0000000..0e51d6e --- /dev/null +++ b/src/modules/project-state-client.ts @@ -0,0 +1,315 @@ +import { graphql as defaultGraphql } from "@octokit/graphql"; +import type { + IssueKind, + IssueStatus, + ProjectIssue, + ProjectState, +} from "../types.js"; + +type GraphqlClient = typeof defaultGraphql; + +const STATUS_FIELD_NAME = "Status"; + +const STATUS_NAME_TO_TYPE: Record = { + Backlog: "Backlog", + Ready: "Ready", + "In progress": "In progress", + "In review": "In review", + "Needs human": "Needs human", + Done: "Done", +}; + +export const PROJECT_QUERY = /* GraphQL */ ` + query ProjectState($owner: String!, $repo: String!, $number: Int!) { + repository(owner: $owner, name: $repo) { + projectV2(number: $number) { + id + title + field(name: "Status") { + ... on ProjectV2SingleSelectField { + id + name + options { + id + name + } + } + } + items(first: 100) { + nodes { + id + fieldValues(first: 20) { + nodes { + ... on ProjectV2ItemFieldSingleSelectValue { + name + field { + ... on ProjectV2SingleSelectField { + name + } + } + } + } + } + content { + ... on Issue { + id + number + title + body + assignees(first: 5) { + nodes { + login + } + } + labels(first: 20) { + nodes { + name + } + } + } + } + } + } + } + } + } +`; + +interface RawProjectResponse { + repository: { + projectV2: { + id: string; + title: string; + field: { id: string; name: string; options: { id: string; name: string }[] } | null; + items: { nodes: RawItem[] }; + } | null; + }; +} + +interface RawItem { + id: string; + fieldValues: { + nodes: Array<{ + name?: string; + field?: { name?: string }; + }>; + }; + content: + | { + id: string; + number: number; + title: string; + body: string; + assignees: { nodes: { login: string }[] }; + labels: { nodes: { name: string }[] }; + } + | Record; +} + +export interface ProjectFieldRefs { + projectId: string; + statusFieldId: string; + statusOptionIds: Record; +} + +export function detectKindFromLabels(labels: string[]): IssueKind { + const lower = labels.map((l) => l.toLowerCase()); + if (lower.includes("hitl")) return "HITL"; + if (lower.includes("ready-for-human")) return "HITL"; + return "AFK"; +} + +export function parseBlockedByFromBody(body: string): number[] { + const out = new Set(); + const blockedSection = body.match( + /##\s*Blocked\s*by\s*\n([\s\S]*?)(\n##\s|$)/i, + ); + const haystack = blockedSection?.[1] ?? body; + const refRegex = /#(\d+)\b/g; + let m: RegExpExecArray | null; + while ((m = refRegex.exec(haystack)) !== null) { + if (m[1]) out.add(Number.parseInt(m[1], 10)); + } + return [...out].sort((a, b) => a - b); +} + +export function parseProjectResponse( + raw: RawProjectResponse, + ownerLogin: string, + repo: string, + projectNumber: number, +): { state: ProjectState; refs: ProjectFieldRefs } { + const project = raw.repository.projectV2; + if (!project) throw new Error(`Project #${projectNumber} not found in ${ownerLogin}/${repo}`); + + const statusField = project.field; + if (!statusField) { + throw new Error( + `Project #${projectNumber} is missing the required "${STATUS_FIELD_NAME}" single-select field. Run /to-project to scaffold it.`, + ); + } + + const statusOptionIds: Partial> = {}; + for (const opt of statusField.options) { + const mapped = STATUS_NAME_TO_TYPE[opt.name]; + if (mapped) statusOptionIds[mapped] = opt.id; + } + + const required: IssueStatus[] = [ + "Backlog", + "Ready", + "In progress", + "In review", + "Needs human", + "Done", + ]; + const missing = required.filter((s) => !statusOptionIds[s]); + if (missing.length > 0) { + throw new Error( + `Project #${projectNumber} Status field is missing options: ${missing.join(", ")}. Run /to-project to scaffold them.`, + ); + } + + const issues: ProjectIssue[] = []; + for (const item of project.items.nodes) { + const content = item.content; + if (!content || !("number" in content)) continue; + + const labels = content.labels.nodes.map((l) => l.name); + const statusName = item.fieldValues.nodes.find( + (n) => n.field?.name === STATUS_FIELD_NAME, + )?.name; + const status: IssueStatus = statusName + ? (STATUS_NAME_TO_TYPE[statusName] ?? "Backlog") + : "Backlog"; + + issues.push({ + number: content.number, + nodeId: content.id, + itemId: item.id, + title: content.title, + body: content.body ?? "", + kind: detectKindFromLabels(labels), + status, + blockedBy: parseBlockedByFromBody(content.body ?? ""), + assignee: content.assignees.nodes[0]?.login, + }); + } + + return { + state: { + projectId: project.id, + projectNumber, + ownerLogin, + repo, + issues, + }, + refs: { + projectId: project.id, + statusFieldId: statusField.id, + statusOptionIds: statusOptionIds as Record, + }, + }; +} + +export class ProjectStateClient { + constructor( + private readonly graphql: GraphqlClient, + private readonly ownerLogin: string, + private readonly repo: string, + ) {} + + async readProjectState( + projectNumber: number, + ): Promise<{ state: ProjectState; refs: ProjectFieldRefs }> { + const raw = await this.graphql(PROJECT_QUERY, { + owner: this.ownerLogin, + repo: this.repo, + number: projectNumber, + }); + return parseProjectResponse( + raw, + this.ownerLogin, + this.repo, + projectNumber, + ); + } + + async setStatus( + projectId: string, + itemId: string, + statusFieldId: string, + statusOptionId: string, + ): Promise { + await this.graphql( + /* GraphQL */ ` + mutation SetStatus( + $projectId: ID! + $itemId: ID! + $fieldId: ID! + $optionId: String! + ) { + updateProjectV2ItemFieldValue( + input: { + projectId: $projectId + itemId: $itemId + fieldId: $fieldId + value: { singleSelectOptionId: $optionId } + } + ) { + projectV2Item { + id + } + } + } + `, + { projectId, itemId, fieldId: statusFieldId, optionId: statusOptionId }, + ); + } + + async assignIssue(issueNodeId: string, userLogin: string): Promise { + const userQuery = await this.graphql<{ user: { id: string } }>( + /* GraphQL */ ` + query GetUser($login: String!) { + user(login: $login) { + id + } + } + `, + { login: userLogin }, + ); + await this.graphql( + /* GraphQL */ ` + mutation Assign($issueId: ID!, $userId: ID!) { + addAssigneesToAssignable( + input: { assignableId: $issueId, assigneeIds: [$userId] } + ) { + assignable { + ... on Issue { + id + } + } + } + } + `, + { issueId: issueNodeId, userId: userQuery.user.id }, + ); + } + + async postIssueComment(issueNodeId: string, body: string): Promise { + await this.graphql( + /* GraphQL */ ` + mutation Comment($issueId: ID!, $body: String!) { + addComment(input: { subjectId: $issueId, body: $body }) { + commentEdge { + node { + id + } + } + } + } + `, + { issueId: issueNodeId, body }, + ); + } +} diff --git a/src/modules/prompt-builder.test.ts b/src/modules/prompt-builder.test.ts new file mode 100644 index 0000000..3a1d876 --- /dev/null +++ b/src/modules/prompt-builder.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from "vitest"; +import type { ProjectIssue } from "../types.js"; +import { build } from "./prompt-builder.js"; + +const sampleIssue: ProjectIssue = { + number: 42, + nodeId: "node-42", + title: "Wire up assessment export", + body: [ + "## What to build", + "", + "Export an assessment as a PDF.", + "", + "## Acceptance criteria", + "", + "- [ ] Endpoint POST /assessments/:id/export returns a PDF", + "- [ ] PDF includes the customer name and address", + ].join("\n"), + kind: "AFK", + status: "Ready", + blockedBy: [], +}; + +describe("build", () => { + it("includes the issue number, title, and body", () => { + const out = build({ issue: sampleIssue, repoContext: {} }); + expect(out).toContain("#42"); + expect(out).toContain("Wire up assessment export"); + expect(out).toContain("Export an assessment as a PDF."); + }); + + it("omits the repo-context section when no context paths are provided", () => { + const out = build({ issue: sampleIssue, repoContext: {} }); + expect(out).not.toContain("## Repo context"); + }); + + it("includes context paths when provided", () => { + const out = build({ + issue: sampleIssue, + repoContext: { contextMdPath: "CONTEXT.md", adrDirPath: "docs/adr" }, + }); + expect(out).toContain("CONTEXT.md"); + expect(out).toContain("docs/adr"); + }); + + it("instructs the agent not to push or open a PR", () => { + const out = build({ issue: sampleIssue, repoContext: {} }); + expect(out).toContain("DO NOT push"); + expect(out).toContain("DO NOT open a pull request"); + }); + + it("renders an empty body placeholder", () => { + const empty: ProjectIssue = { ...sampleIssue, body: "" }; + const out = build({ issue: empty, repoContext: {} }); + expect(out).toContain("_(empty)_"); + }); + + it("matches snapshot", () => { + const out = build({ + issue: sampleIssue, + repoContext: { contextMdPath: "CONTEXT.md", adrDirPath: "docs/adr" }, + }); + expect(out).toMatchInlineSnapshot(` + "# Implement issue #42: Wire up assessment export + + ## Issue body + + ## What to build + + Export an assessment as a PDF. + + ## Acceptance criteria + + - [ ] Endpoint POST /assessments/:id/export returns a PDF + - [ ] PDF includes the customer name and address + + ## Repo context + + - Domain language: see \`CONTEXT.md\` for the project's vocabulary. Use those terms in code and commit messages. + - Architectural decisions: skim \`docs/adr\` for any ADR that touches the area you're changing. Respect existing decisions. + + ## Approach + + If this slice has any test-shaped surface, follow a red-green-refactor TDD loop: write a failing test that captures an acceptance criterion, make it pass with the smallest change, refactor. + + ## Output contract + + - Make as many commits on the current branch as needed to satisfy every acceptance criterion in the issue body. + - DO NOT push the branch. + - DO NOT open a pull request. + - DO NOT modify unrelated files outside the scope of this issue. + - Stop when every acceptance criterion is checked off. + " + `); + }); +}); diff --git a/src/modules/prompt-builder.ts b/src/modules/prompt-builder.ts new file mode 100644 index 0000000..3bd63e1 --- /dev/null +++ b/src/modules/prompt-builder.ts @@ -0,0 +1,53 @@ +import type { ProjectIssue, RepoContext } from "../types.js"; + +export interface PromptInput { + issue: ProjectIssue; + repoContext: RepoContext; +} + +const TDD_HINT = + "If this slice has any test-shaped surface, follow a red-green-refactor TDD loop: write a failing test that captures an acceptance criterion, make it pass with the smallest change, refactor."; + +/** + * Compose the per-ticket agent prompt. Pure: same inputs => same string. + * + * The agent is instructed to commit only — the runner handles push/PR. + */ +export function build({ issue, repoContext }: PromptInput): string { + const sections: string[] = []; + + sections.push(`# Implement issue #${issue.number}: ${issue.title}`); + + sections.push("## Issue body\n\n" + (issue.body.trim() || "_(empty)_")); + + const contextLines: string[] = []; + if (repoContext.contextMdPath) { + contextLines.push( + `- Domain language: see \`${repoContext.contextMdPath}\` for the project's vocabulary. Use those terms in code and commit messages.`, + ); + } + if (repoContext.adrDirPath) { + contextLines.push( + `- Architectural decisions: skim \`${repoContext.adrDirPath}\` for any ADR that touches the area you're changing. Respect existing decisions.`, + ); + } + if (contextLines.length > 0) { + sections.push("## Repo context\n\n" + contextLines.join("\n")); + } + + sections.push("## Approach\n\n" + TDD_HINT); + + sections.push( + [ + "## Output contract", + "", + "- Make as many commits on the current branch as needed to satisfy every acceptance criterion in the issue body.", + "- DO NOT push the branch.", + "- DO NOT open a pull request.", + "- DO NOT modify unrelated files outside the scope of this issue.", + "- Stop when every acceptance criterion is checked off.", + ].join("\n"), + ); + + return sections.join("\n\n") + "\n"; +} diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..236a7f9 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,74 @@ +export type Variant = "per-ticket" | "single-pr"; + +export type IssueKind = "AFK" | "HITL"; + +export type IssueStatus = + | "Backlog" + | "Ready" + | "In progress" + | "In review" + | "Needs human" + | "Done"; + +export interface ProjectIssue { + number: number; + nodeId: string; + /** ProjectV2Item id (the Project's row for this issue), distinct from `nodeId` (the Issue itself). */ + itemId: string; + title: string; + body: string; + kind: IssueKind; + status: IssueStatus; + blockedBy: number[]; + assignee?: string; +} + +export interface ProjectState { + projectId: string; + projectNumber: number; + ownerLogin: string; + repo: string; + issues: ProjectIssue[]; +} + +export interface Phase { + index: number; + issues: ProjectIssue[]; +} + +export type FailureKind = + | "agent-error" + | "tests-failed" + | "build-failed" + | "sandbox-timeout" + | "unknown"; + +export interface FailureContext { + variant: Variant; + retryCount: number; +} + +export type FailureAction = + | { kind: "retry" } + | { kind: "skip"; reason: string } + | { kind: "halt"; reason: string }; + +export interface RepoContext { + contextMdPath?: string; + adrDirPath?: string; +} + +export interface AgentResult { + success: boolean; + failure?: { kind: FailureKind; logs: string }; +} + +export interface RunnerOptions { + projectNumber: number; + variant: Variant; + targetRepoPath: string; + baseBranch?: string; + githubToken: string; + ownerLogin: string; + repo: string; +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..f311ba6 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022"], + "outDir": "dist", + "rootDir": "src", + "strict": true, + "noUncheckedIndexedAccess": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "resolveJsonModule": true + }, + "include": ["src"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 0000000..ce36a74 --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["src/**/*.test.ts"], + environment: "node", + }, +});