diff --git a/mist_infra/arc/forgejo/deploy.sh b/mist_infra/arc/forgejo/deploy.sh index a0ee926..6fc11d2 100755 --- a/mist_infra/arc/forgejo/deploy.sh +++ b/mist_infra/arc/forgejo/deploy.sh @@ -2,30 +2,57 @@ set -euo pipefail # ========================================================== -# Deploy Forgejo act_runner to Kubernetes +# Deploy Forgejo act_runner + KEDA autoscaling to Kubernetes # ========================================================== NAMESPACE="forgejo-runners" SCRIPT_DIR="$(dirname "$0")" -echo "=== Deploying Forgejo Runner ===" +echo "=== Deploying Forgejo Runner with KEDA Autoscaling ===" -# Prompt for token if not set in deployment.yaml -TOKEN="RPAjk4Jdc42By5vSxnULPPPrjU0goPLQIiKgwOIo" -echo +RUNNER_TOKEN="RPAjk4Jdc42By5vSxnULPPPrjU0goPLQIiKgwOIo" + +# Forgejo API token — create at https://git.juntekim.com/user/settings/applications +# Needs: read:repository scope (to list repos and query action tasks) +if [[ -z "${FORGEJO_API_TOKEN:-}" ]]; then + read -rsp "Forgejo API token (for metrics exporter): " FORGEJO_API_TOKEN + echo +fi + +# Install KEDA if not already installed +if ! kubectl get crd scaledobjects.keda.sh &>/dev/null; then + echo "=== Installing KEDA ===" + helm repo add kedacore https://kedacore.github.io/charts + helm repo update kedacore + helm install keda kedacore/keda --namespace keda --create-namespace + echo "✅ KEDA installed" +else + echo "✅ KEDA already installed" +fi kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - +# Runner registration token (used by act_runner to register with Forgejo) kubectl create secret generic forgejo-runner-secret \ --namespace "$NAMESPACE" \ - --from-literal=token="$TOKEN" \ + --from-literal=token="$RUNNER_TOKEN" \ + --dry-run=client -o yaml | kubectl apply -f - + +# Forgejo API token (used by metrics exporter to query job queue) +kubectl create secret generic forgejo-api-secret \ + --namespace "$NAMESPACE" \ + --from-literal=token="$FORGEJO_API_TOKEN" \ --dry-run=client -o yaml | kubectl apply -f - kubectl apply -f "$SCRIPT_DIR/deployment.yaml" +kubectl apply -f "$SCRIPT_DIR/metrics-exporter.yaml" +kubectl apply -f "$SCRIPT_DIR/keda-scaledobject.yaml" echo -echo "✅ Forgejo runner deployed" +echo "✅ Forgejo runner deployed with KEDA autoscaling" echo echo "Next steps:" -echo "- kubectl get pods -n $NAMESPACE" -echo "- Check runner appears at: https://git.juntekim.com/-/admin/runners" +echo " kubectl get pods -n $NAMESPACE" +echo " kubectl get scaledobject -n $NAMESPACE" +echo " kubectl logs -n $NAMESPACE deploy/forgejo-metrics-exporter" +echo " Check runners at: https://git.juntekim.com/-/admin/runners" diff --git a/mist_infra/arc/forgejo/deployment.yaml b/mist_infra/arc/forgejo/deployment.yaml index 6e3a80e..56c0546 100644 --- a/mist_infra/arc/forgejo/deployment.yaml +++ b/mist_infra/arc/forgejo/deployment.yaml @@ -4,19 +4,6 @@ metadata: name: forgejo-runners --- apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: forgejo-runner-data - namespace: forgejo-runners -spec: - accessModes: - - ReadWriteOnce - storageClassName: rook-ceph-block - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 kind: Secret metadata: name: forgejo-runner-secret @@ -26,12 +13,13 @@ stringData: token: "RPAjk4Jdc42By5vSxnULPPPrjU0goPLQIiKgwOIo" --- apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: name: forgejo-runner namespace: forgejo-runners spec: - replicas: 1 + replicas: 3 + serviceName: forgejo-runner selector: matchLabels: app: forgejo-runner @@ -40,6 +28,15 @@ spec: labels: app: forgejo-runner spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + app: forgejo-runner + topologyKey: kubernetes.io/hostname initContainers: - name: register image: gitea/act_runner:latest @@ -51,7 +48,7 @@ spec: act_runner register --no-interactive \ --instance https://git.juntekim.com \ --token "RPAjk4Jdc42By5vSxnULPPPrjU0goPLQIiKgwOIo" \ - --name mist-runner \ + --name "$(hostname)" \ --labels "self-hosted,linux,x64" else echo "Runner already registered, skipping." @@ -81,7 +78,13 @@ spec: volumeMounts: - name: runner-data mountPath: /data - volumes: - - name: runner-data - persistentVolumeClaim: - claimName: forgejo-runner-data + volumeClaimTemplates: + - metadata: + name: runner-data + spec: + accessModes: + - ReadWriteOnce + storageClassName: rook-ceph-block + resources: + requests: + storage: 1Gi diff --git a/mist_infra/arc/forgejo/keda-scaledobject.yaml b/mist_infra/arc/forgejo/keda-scaledobject.yaml new file mode 100644 index 0000000..78f9549 --- /dev/null +++ b/mist_infra/arc/forgejo/keda-scaledobject.yaml @@ -0,0 +1,21 @@ +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: forgejo-runner-scaler + namespace: forgejo-runners +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: StatefulSet + name: forgejo-runner + minReplicaCount: 1 # keep at least 1 runner registered + maxReplicaCount: 100 # tune based on cluster capacity, not node count + pollingInterval: 15 # seconds between KEDA polls + cooldownPeriod: 60 # seconds before scaling down after jobs clear + triggers: + - type: metrics-api + metadata: + # 1 replica per pending job (capped at maxReplicaCount) + targetValue: "1" + url: "http://forgejo-metrics-exporter.forgejo-runners.svc.cluster.local:8080" + valueLocation: "value" diff --git a/mist_infra/arc/forgejo/metrics-exporter.yaml b/mist_infra/arc/forgejo/metrics-exporter.yaml new file mode 100644 index 0000000..6e20a1b --- /dev/null +++ b/mist_infra/arc/forgejo/metrics-exporter.yaml @@ -0,0 +1,146 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: forgejo-metrics-script + namespace: forgejo-runners +data: + server.py: | + #!/usr/bin/env python3 + """ + Polls the Forgejo API for waiting action tasks and exposes the count as JSON. + KEDA metrics-api scaler reads: { "value": } + """ + import http.server + import json + import os + import threading + import time + import urllib.request + + FORGEJO_URL = os.environ["FORGEJO_URL"].rstrip("/") + TOKEN = "69eccfc51f720c21c615cfd5caa422fb02f0ab43" + REFRESH = int(os.environ.get("REFRESH_INTERVAL", "20")) + + _state = {"value": 0} + _lock = threading.Lock() + + + def fetch(path): + req = urllib.request.Request( + f"{FORGEJO_URL}/api/v1{path}", + headers={"Authorization": f"token {TOKEN}", "Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=10) as r: + return json.loads(r.read()) + + + def count_pending_jobs(): + total = 0 + page = 1 + while True: + repos = fetch(f"/repos/search?limit=50&page={page}") + items = repos.get("data", []) + for repo in items: + owner = repo["owner"]["login"] + name = repo["name"] + try: + result = fetch( + f"/repos/{owner}/{name}/actions/tasks?status=waiting&limit=1" + ) + total += result.get("total_count", 0) + except Exception: + pass + if len(items) < 50: + break + page += 1 + return total + + + def refresh_loop(): + while True: + try: + count = count_pending_jobs() + with _lock: + _state["value"] = count + print(f"pending jobs: {count}", flush=True) + except Exception as e: + print(f"refresh error: {e}", flush=True) + time.sleep(REFRESH) + + + threading.Thread(target=refresh_loop, daemon=True).start() + + + class Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + with _lock: + val = _state["value"] + body = json.dumps({"value": val}).encode() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(body) + + def log_message(self, *_): + pass + + + http.server.HTTPServer(("", 8080), Handler).serve_forever() +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: forgejo-metrics-exporter + namespace: forgejo-runners +spec: + replicas: 1 + selector: + matchLabels: + app: forgejo-metrics-exporter + template: + metadata: + labels: + app: forgejo-metrics-exporter + spec: + containers: + - name: exporter + image: python:3.12-alpine + command: ["python", "/scripts/server.py"] + env: + - name: FORGEJO_URL + value: https://git.juntekim.com + - name: FORGEJO_TOKEN + valueFrom: + secretKeyRef: + name: forgejo-api-secret + key: token + - name: REFRESH_INTERVAL + value: "20" + ports: + - containerPort: 8080 + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 64Mi + volumeMounts: + - name: scripts + mountPath: /scripts + volumes: + - name: scripts + configMap: + name: forgejo-metrics-script +--- +apiVersion: v1 +kind: Service +metadata: + name: forgejo-metrics-exporter + namespace: forgejo-runners +spec: + selector: + app: forgejo-metrics-exporter + ports: + - port: 8080 + targetPort: 8080