diff --git a/deploy/README.md b/deploy/README.md index 25c1259..fac67b5 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -128,3 +128,15 @@ re-initdb the cluster, use `--encoding=UTF8 --locale=C.UTF-8`. chown void: /var/lib/void/icons ``` - **Service registry** — edit `config/services.json` to the real homelab service URLs and CT numbers. The committed seed values are best-guess placeholders and should be updated before the health band is meaningful. + +## Deploy safety (push.sh, hardened) +`./deploy/push.sh` now does an atomic-ish, self-verifying deploy: +1. **Snapshots** the current remote code (excl `node_modules`/`.env`) to `/opt/void-server.prev` for rollback. +2. rsyncs the new code (`--delete`; preserves `node_modules` + `.env`). +3. Runs **`npm install --omit=dev` + `npm run migrate`** as part of the deploy (no more separate manual migrate step). +4. Restarts `void-server`. +5. **Health-gates**: polls `/health` until it reports the expected `package.json` version + `db_ok` (≈25s). +6. **Auto-rolls-back** on any failure: restores the `.prev` snapshot, reinstalls, restarts. + +Override the health endpoint with `HEALTH_URL=…` if the target IP differs. +Caveat: forward-only migrations are not auto-reverted on rollback (they're additive by convention, so a code rollback against the new schema is safe; a destructive migration needs manual care). diff --git a/deploy/push.sh b/deploy/push.sh index 8d1849e..146f74f 100755 --- a/deploy/push.sh +++ b/deploy/push.sh @@ -1,25 +1,82 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail +# NOTE: not `-e` — failures are handled explicitly so we can roll back. -# Push dev source to void2-app (CT 311) and restart the service. -# Run from /project/src/void-v2. +# Deploy dev source to void2-app (CT 311) with a snapshot + health gate + auto +# rollback. Run from /project/src/void-v2. # -# Override TARGET / REMOTE_DIR via env if needed: -# TARGET=root@192.168.1.216 ./deploy/push.sh -# NOTE: target the LAN IP, not void2-app.hynesy.com (that resolves to the -# Cloudflare tunnel, which can't carry SSH). CT 311 moved .13 -> .216 on -# 2026-06-01 after a post-outage ARP/IP conflict on .13. +# Override via env: TARGET=root@192.168.1.216 ./deploy/push.sh +# Target the LAN IP, NOT void2-app.hynesy.com (that's the CF tunnel, no SSH). +# +# What it does: +# 1. Snapshot the current remote code (excl node_modules/.env) -> .prev +# 2. rsync the new code in (--delete; preserves node_modules + .env) +# 3. npm install --omit=dev && npm run migrate (migrations are part of deploy) +# 4. systemctl restart void-server +# 5. Poll /health until it reports the EXPECTED version + db_ok (up to ~25s) +# 6. On any failure: restore the snapshot, reinstall, restart -> roll back +# +# Caveat: forward-only migrations are NOT auto-reverted on rollback. They're +# additive/backward-compatible by convention, so rolling the CODE back with the +# new schema present is safe. A destructive migration would need manual care. TARGET=${TARGET:-root@192.168.1.216} REMOTE_DIR=${REMOTE_DIR:-/opt/void-server} +PREV_DIR="${REMOTE_DIR}.prev" +HOST_IP="${TARGET#*@}" +HEALTH_URL=${HEALTH_URL:-http://${HOST_IP}:3000/health} +EXPECT_VERSION=$(node -p "require('./package.json').version") -rsync -avz --delete \ - --exclude node_modules \ - --exclude .git \ - --exclude tests \ - --exclude coverage \ - --exclude .env \ - ./ "$TARGET:$REMOTE_DIR/" +RS_EXCLUDES=(--exclude node_modules --exclude .git --exclude tests --exclude coverage --exclude .env) -ssh "$TARGET" "cd $REMOTE_DIR && npm install --omit=dev && systemctl restart void-server" -echo "Deployed." +say() { printf '\n==> %s\n' "$*"; } +fail() { printf '\n!! %s\n' "$*" >&2; } + +health_ok() { + # Returns 0 if /health reports EXPECT_VERSION and db_ok:true within the window. + local i body + for i in $(seq 1 25); do + sleep 1 + body=$(curl -fsS -m4 "$HEALTH_URL" 2>/dev/null) || continue + if printf '%s' "$body" | grep -q "\"version\":\"${EXPECT_VERSION}\"" \ + && printf '%s' "$body" | grep -q '"db_ok":true'; then + return 0 + fi + done + return 1 +} + +rollback() { + fail "Rolling back to the previous release" + ssh "$TARGET" "test -d '$PREV_DIR' && rsync -a --delete --exclude node_modules --exclude .env '$PREV_DIR/' '$REMOTE_DIR/' && cd '$REMOTE_DIR' && npm install --omit=dev >/dev/null 2>&1 && systemctl restart void-server" \ + && say "Rollback applied" || fail "Rollback command errored — check the host manually" + sleep 3 + curl -fsS -m4 "$HEALTH_URL" >/dev/null 2>&1 && say "Service is responding after rollback." || fail "Service not responding after rollback — investigate on $TARGET" +} + +say "Deploying ${EXPECT_VERSION} to ${TARGET}:${REMOTE_DIR}" + +# 1. Snapshot current code for rollback (code only — fast; keeps node_modules/.env in place) +ssh "$TARGET" "mkdir -p '$PREV_DIR' && rsync -a --delete --exclude node_modules --exclude .env '$REMOTE_DIR/' '$PREV_DIR/'" \ + || { fail "Snapshot failed — aborting before touching the live release"; exit 1; } + +# 2. Sync new code +rsync -az --delete "${RS_EXCLUDES[@]}" ./ "$TARGET:$REMOTE_DIR/" \ + || { fail "rsync failed"; rollback; exit 1; } + +# 3 + 4. Install deps, run migrations, restart +if ! ssh "$TARGET" "cd '$REMOTE_DIR' && npm install --omit=dev && npm run migrate && systemctl restart void-server"; then + fail "install / migrate / restart failed" + rollback + exit 1 +fi + +# 5. Health gate +say "Health-checking for ${EXPECT_VERSION} ..." +if health_ok; then + say "Deployed ${EXPECT_VERSION} — healthy. ✓" +else + fail "Health check did not confirm ${EXPECT_VERSION} (db_ok) within the window" + rollback + exit 1 +fi