diff --git a/.github/workflows/weekly-k8s-backup.yml b/.github/workflows/weekly-k8s-backup.yml index 7a7bf20..dad7556 100644 --- a/.github/workflows/weekly-k8s-backup.yml +++ b/.github/workflows/weekly-k8s-backup.yml @@ -48,11 +48,16 @@ jobs: aws sts get-caller-identity - - name: Run backup + - name: Run DEV backup run: | + ENVIRONMENT=dev \ bash mist_infra/scripts/backup_k8s_storage_to_s3.sh - + - name: Run PROD backup + run: | + ENVIRONMENT=prod \ + I_UNDERSTAND_THIS_IS_PROD=true \ + bash mist_infra/scripts/backup_k8s_storage_to_s3.sh # example of restoring a back up # aws s3 cp s3://mist-backups/2025-03-09/k8s_storage_mist_2025-03-09_02-30-01.tar.gz . diff --git a/mist_infra/scripts/backup_k8s_storage_to_s3.sh b/mist_infra/scripts/backup_k8s_storage_to_s3.sh index 4fb95a7..9909ae9 100644 --- a/mist_infra/scripts/backup_k8s_storage_to_s3.sh +++ b/mist_infra/scripts/backup_k8s_storage_to_s3.sh @@ -1,68 +1,103 @@ #!/usr/bin/env bash set -euo pipefail -# ----------------------------- -# Config -# ----------------------------- +# ================================================== +# REQUIRED ENV VARS +# ================================================== +ENVIRONMENT="${ENVIRONMENT:-}" + +if [[ -z "$ENVIRONMENT" ]]; then + echo "❌ ENVIRONMENT must be set to dev or prod" + exit 1 +fi + +# ================================================== +# GLOBAL CONFIG +# ================================================== NAMESPACE="default" - -PG_ENABLED=true -PG_SECRET_NAME="postgres-secret" -PG_POD_SELECTOR="app=postgres" - K8S_STORAGE_ROOT="/k8s_storage" - -# NEVER touch Postgres raw data -TAR_EXCLUDES=( - "$K8S_STORAGE_ROOT/postgres" -) - BACKUP_ROOT="/tmp/k8s-backups" DATE="$(date -u +%Y-%m-%d_%H-%M-%S)" BACKUP_DIR="$BACKUP_ROOT/$DATE" mkdir -p "$BACKUP_DIR" -echo "=== Backup started at $(date -u) ===" +# NEVER touch raw Postgres data +TAR_EXCLUDES=( + "$K8S_STORAGE_ROOT/postgres" +) -# ----------------------------- -# Postgres pg_dump (SAFE) -# ----------------------------- -if [[ "$PG_ENABLED" == "true" ]]; then - echo "=== Postgres pg_dump enabled ===" +# ================================================== +# ENVIRONMENT SWITCH +# ================================================== +case "$ENVIRONMENT" in + dev) + PG_SECRET_NAME="postgres-secret" + PG_POD_SELECTOR="app=postgres-dev" + S3_PREFIX="dev" + ;; + prod) + if [[ "${I_UNDERSTAND_THIS_IS_PROD:-}" != "true" ]]; then + echo "❌ Refusing to run PROD backup without confirmation" + echo " Re-run with: I_UNDERSTAND_THIS_IS_PROD=true" + exit 1 + fi - POSTGRES_POD=$(kubectl get pods \ - -n "$NAMESPACE" \ - -l "$PG_POD_SELECTOR" \ - -o jsonpath='{.items[*].metadata.name}' | awk '{print $1}') - - if [[ -z "$POSTGRES_POD" ]]; then - echo "❌ No Postgres pod found" - kubectl get pods -n "$NAMESPACE" + PG_SECRET_NAME="postgres-prod-secret" + PG_POD_SELECTOR="app=postgres-prod" + S3_PREFIX="prod" + ;; + *) + echo "❌ Invalid ENVIRONMENT: $ENVIRONMENT (must be dev or prod)" exit 1 - fi + ;; +esac - POSTGRES_USER=$(kubectl get secret "$PG_SECRET_NAME" \ - -n "$NAMESPACE" \ - -o jsonpath='{.data.POSTGRES_USER}' | base64 -d) +echo "=== Backup started ($(date -u)) ===" +echo "Environment: $ENVIRONMENT" - POSTGRES_DB=$(kubectl get secret "$PG_SECRET_NAME" \ - -n "$NAMESPACE" \ - -o jsonpath='{.data.POSTGRES_DB}' | base64 -d) +# ================================================== +# POSTGRES DUMP (SAFE) +# ================================================== +POSTGRES_POD=$(kubectl get pods \ + -n "$NAMESPACE" \ + -l "$PG_POD_SELECTOR" \ + -o jsonpath='{.items[*].metadata.name}' | awk '{print $1}') - echo "Dumping database: $POSTGRES_DB" - - kubectl exec -n "$NAMESPACE" "$POSTGRES_POD" -- \ - pg_dump -U "$POSTGRES_USER" "$POSTGRES_DB" \ - > "$BACKUP_DIR/postgres.sql" - - echo "✔ pg_dump complete ($(du -h "$BACKUP_DIR/postgres.sql" | cut -f1))" +if [[ -z "$POSTGRES_POD" ]]; then + echo "❌ No Postgres pod found for selector: $PG_POD_SELECTOR" + kubectl get pods -n "$NAMESPACE" + exit 1 fi -# ----------------------------- -# Normalise permissions (EXCLUDING POSTGRES) -# ----------------------------- -echo "=== Normalising permissions (excluding Postgres) ===" +POSTGRES_USER=$(kubectl get secret "$PG_SECRET_NAME" \ + -n "$NAMESPACE" \ + -o jsonpath='{.data.POSTGRES_USER}' | base64 -d) + +POSTGRES_DB=$(kubectl get secret "$PG_SECRET_NAME" \ + -n "$NAMESPACE" \ + -o jsonpath='{.data.POSTGRES_DB}' 2>/dev/null | base64 -d || true) + +if [[ -z "$POSTGRES_DB" ]]; then + echo "❌ POSTGRES_DB missing in secret $PG_SECRET_NAME" + exit 1 +fi + +echo "Dumping database: $POSTGRES_DB" + +kubectl exec -n "$NAMESPACE" "$POSTGRES_POD" -- \ + pg_dump \ + -h localhost \ + -U "$POSTGRES_USER" \ + "$POSTGRES_DB" \ + > "$BACKUP_DIR/postgres.sql" + +echo "✔ pg_dump complete ($(du -h "$BACKUP_DIR/postgres.sql" | cut -f1))" + +# ================================================== +# NORMALISE PERMISSIONS (EXCLUDING POSTGRES) +# ================================================== +echo "Normalising permissions (excluding Postgres data)..." sudo find "$K8S_STORAGE_ROOT" \ -mindepth 1 \ @@ -70,19 +105,14 @@ sudo find "$K8S_STORAGE_ROOT" \ ! -name postgres \ -exec chmod -R a+rX {} \; || true -# ----------------------------- -# Build tar exclude args -# ----------------------------- +# ================================================== +# ARCHIVE K8S STORAGE (SAFE) +# ================================================== TAR_EXCLUDE_ARGS=() for path in "${TAR_EXCLUDES[@]}"; do TAR_EXCLUDE_ARGS+=(--exclude="$path") done -# ----------------------------- -# Archive k8s storage (SAFE) -# ----------------------------- -echo "=== Archiving k8s storage ===" - tar \ --ignore-failed-read \ --warning=no-file-changed \ @@ -92,21 +122,21 @@ tar \ echo "✔ k8s_storage archived ($(du -h "$BACKUP_DIR/k8s_storage_$DATE.tar.gz" | cut -f1))" -# ----------------------------- -# Upload to S3 -# ----------------------------- -S3_BUCKET="s3://mist-backups/$DATE" +# ================================================== +# UPLOAD TO S3 +# ================================================== +S3_BUCKET="s3://mist-backups/$S3_PREFIX/$DATE" aws s3 cp "$BACKUP_DIR" "$S3_BUCKET" --recursive echo "✔ Uploaded to $S3_BUCKET" -# ----------------------------- -# Restore instructions -# ----------------------------- +# ================================================== +# RESTORE GUIDE +# ================================================== echo "" echo "========================================" -echo "=== RESTORE GUIDE" +echo "=== RESTORE GUIDE ($ENVIRONMENT)" echo "========================================" echo "" echo "Restore volumes:"