diff --git a/deploy/README.md b/deploy/README.md
index 25c1259..fac67b5 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -128,3 +128,15 @@ re-initdb the cluster, use `--encoding=UTF8 --locale=C.UTF-8`.
chown void: /var/lib/void/icons
```
- **Service registry** — edit `config/services.json` to the real homelab service URLs and CT numbers. The committed seed values are best-guess placeholders and should be updated before the health band is meaningful.
+
+## Deploy safety (push.sh, hardened)
+`./deploy/push.sh` now does an atomic-ish, self-verifying deploy:
+1. **Snapshots** the current remote code (excl `node_modules`/`.env`) to `/opt/void-server.prev` for rollback.
+2. rsyncs the new code (`--delete`; preserves `node_modules` + `.env`).
+3. Runs **`npm install --omit=dev` + `npm run migrate`** as part of the deploy (no more separate manual migrate step).
+4. Restarts `void-server`.
+5. **Health-gates**: polls `/health` until it reports the expected `package.json` version + `db_ok` (≈25s).
+6. **Auto-rolls-back** on any failure: restores the `.prev` snapshot, reinstalls, restarts.
+
+Override the health endpoint with `HEALTH_URL=…` if the target IP differs.
+Caveat: forward-only migrations are not auto-reverted on rollback (they're additive by convention, so a code rollback against the new schema is safe; a destructive migration needs manual care).
diff --git a/deploy/push.sh b/deploy/push.sh
index 8d1849e..146f74f 100755
--- a/deploy/push.sh
+++ b/deploy/push.sh
@@ -1,25 +1,82 @@
#!/usr/bin/env bash
-set -euo pipefail
+set -uo pipefail
+# NOTE: not `-e` — failures are handled explicitly so we can roll back.
-# Push dev source to void2-app (CT 311) and restart the service.
-# Run from /project/src/void-v2.
+# Deploy dev source to void2-app (CT 311) with a snapshot + health gate + auto
+# rollback. Run from /project/src/void-v2.
#
-# Override TARGET / REMOTE_DIR via env if needed:
-# TARGET=root@192.168.1.216 ./deploy/push.sh
-# NOTE: target the LAN IP, not void2-app.hynesy.com (that resolves to the
-# Cloudflare tunnel, which can't carry SSH). CT 311 moved .13 -> .216 on
-# 2026-06-01 after a post-outage ARP/IP conflict on .13.
+# Override via env: TARGET=root@192.168.1.216 ./deploy/push.sh
+# Target the LAN IP, NOT void2-app.hynesy.com (that's the CF tunnel, no SSH).
+#
+# What it does:
+# 1. Snapshot the current remote code (excl node_modules/.env) ->
.prev
+# 2. rsync the new code in (--delete; preserves node_modules + .env)
+# 3. npm install --omit=dev && npm run migrate (migrations are part of deploy)
+# 4. systemctl restart void-server
+# 5. Poll /health until it reports the EXPECTED version + db_ok (up to ~25s)
+# 6. On any failure: restore the snapshot, reinstall, restart -> roll back
+#
+# Caveat: forward-only migrations are NOT auto-reverted on rollback. They're
+# additive/backward-compatible by convention, so rolling the CODE back with the
+# new schema present is safe. A destructive migration would need manual care.
TARGET=${TARGET:-root@192.168.1.216}
REMOTE_DIR=${REMOTE_DIR:-/opt/void-server}
+PREV_DIR="${REMOTE_DIR}.prev"
+HOST_IP="${TARGET#*@}"
+HEALTH_URL=${HEALTH_URL:-http://${HOST_IP}:3000/health}
+EXPECT_VERSION=$(node -p "require('./package.json').version")
-rsync -avz --delete \
- --exclude node_modules \
- --exclude .git \
- --exclude tests \
- --exclude coverage \
- --exclude .env \
- ./ "$TARGET:$REMOTE_DIR/"
+RS_EXCLUDES=(--exclude node_modules --exclude .git --exclude tests --exclude coverage --exclude .env)
-ssh "$TARGET" "cd $REMOTE_DIR && npm install --omit=dev && systemctl restart void-server"
-echo "Deployed."
+say() { printf '\n==> %s\n' "$*"; }
+fail() { printf '\n!! %s\n' "$*" >&2; }
+
+health_ok() {
+ # Returns 0 if /health reports EXPECT_VERSION and db_ok:true within the window.
+ local i body
+ for i in $(seq 1 25); do
+ sleep 1
+ body=$(curl -fsS -m4 "$HEALTH_URL" 2>/dev/null) || continue
+ if printf '%s' "$body" | grep -q "\"version\":\"${EXPECT_VERSION}\"" \
+ && printf '%s' "$body" | grep -q '"db_ok":true'; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+rollback() {
+ fail "Rolling back to the previous release"
+ ssh "$TARGET" "test -d '$PREV_DIR' && rsync -a --delete --exclude node_modules --exclude .env '$PREV_DIR/' '$REMOTE_DIR/' && cd '$REMOTE_DIR' && npm install --omit=dev >/dev/null 2>&1 && systemctl restart void-server" \
+ && say "Rollback applied" || fail "Rollback command errored — check the host manually"
+ sleep 3
+ curl -fsS -m4 "$HEALTH_URL" >/dev/null 2>&1 && say "Service is responding after rollback." || fail "Service not responding after rollback — investigate on $TARGET"
+}
+
+say "Deploying ${EXPECT_VERSION} to ${TARGET}:${REMOTE_DIR}"
+
+# 1. Snapshot current code for rollback (code only — fast; keeps node_modules/.env in place)
+ssh "$TARGET" "mkdir -p '$PREV_DIR' && rsync -a --delete --exclude node_modules --exclude .env '$REMOTE_DIR/' '$PREV_DIR/'" \
+ || { fail "Snapshot failed — aborting before touching the live release"; exit 1; }
+
+# 2. Sync new code
+rsync -az --delete "${RS_EXCLUDES[@]}" ./ "$TARGET:$REMOTE_DIR/" \
+ || { fail "rsync failed"; rollback; exit 1; }
+
+# 3 + 4. Install deps, run migrations, restart
+if ! ssh "$TARGET" "cd '$REMOTE_DIR' && npm install --omit=dev && npm run migrate && systemctl restart void-server"; then
+ fail "install / migrate / restart failed"
+ rollback
+ exit 1
+fi
+
+# 5. Health gate
+say "Health-checking for ${EXPECT_VERSION} ..."
+if health_ok; then
+ say "Deployed ${EXPECT_VERSION} — healthy. ✓"
+else
+ fail "Health check did not confirm ${EXPECT_VERSION} (db_ok) within the window"
+ rollback
+ exit 1
+fi