diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48cd5c3..be48eb8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,19 @@ Format: [Keep a Changelog](https://keepachangelog.com).
- feat: phuryn usage dashboard now reachable at aiusage.hynesy.com behind CF Access.
- feat: Sacred Valley AI Usage card opens the in-Void #/ai-usage route.
+## 2.0.0-alpha.26 — Topbar cluster-health pill + always-fresh self-heal
+- **Topbar cluster-health indicator** (`public/components/topbar.js`): a themed pill left of Inbox/Chat/Owner that polls `/api/cluster` every 30s and shows **healthy** (green) when quorate + all nodes online + HA clean, **HA issue / node down / no quorum** (amber/red) otherwise. Click → Sacred Valley. Reuses the `--ok/--warn/--bad` dot palette.
+- **Always-fresh self-heal** (`public/index.html`): inline pre-module script unregisters any service worker and clears caches on every load. The legacy Void 1 caching SW (origin-scoped to `void.hynesy.com`) was serving stale assets that survived hard reloads; this removes it on the next load and prevents recurrence on every device. Assets are already served `no-cache`, so with no SW the app is always fresh.
+
+## 2.0.0-alpha.25 — Cluster health Sacred Valley card
+- **`GET /api/cluster`** (`lib/proxmox/cluster.js` + route, 10s-cached): read-only Proxmox cluster health — `quorate`, per-node online state, HA master/fencing, and HA service count + error count. Pure `normalizeCluster()` folds `/cluster/status` + `/cluster/ha/status/current`; unit-tested with injected fetch. Uses a **dedicated read-only PVE token** (`PROXMOX_RO_TOKEN`, user `void-ro@pve` with `PVEAuditor` on `/`) — never the power-action token.
+- **Sacred Valley "Cluster · HZ" card** (`public/views/cards/cluster.js`, registered in `sacred_valley.js`): polls every 30s, shows the quorum badge, node up/down dots, master, and HA-service issues. Reuses the tile status palette (blackflame `--ok`/`--warn`/`--bad`).
+
+## 2.0.0-alpha.24 — Infra sanity check + LAN host/MAC inventory
+- **`network_hosts` inventory table** (`migration 023`, repo `lib/db/repos/network_hosts.js`): authoritative id→ip→MAC map of every cluster guest + PVE host + the Pi QDevice, seeded from a live capture. Source of truth for router DHCP reservations (the LAN pool is the whole `.2–.254`, so each pinned guest needs a static IP + a MAC reservation) and for the audit below. Idempotent seed (`ON CONFLICT DO UPDATE`).
+- **`infra_audit` sanity check** (`lib/infra/audit.js`, `GET /api/infra/audit`, MCP tool `infra_audit` in `blueRegistry`): probes every `192.168.x.y:port` referenced in the Wiki **and** every enabled service URL, reports unreachable endpoints (stale/incorrect IPs or ports) grouped by source, plus inventory hosts missing a MAC. Read-only TCP connects; available to the owner or any authed agent (e.g. Little Blue) so agents can verify the docs/registry match reality.
+- **Service registry IP fixes**: `magicmirror` → `192.168.1.224`, `obd2` → `192.168.1.225` (moved off contested DHCP-range addresses to static).
+
## 2.0.0-alpha.23 — Local/remote-aware service tiles
- **Optional `external` URL per service** (`migration 022`, `config/services.json`, repo + `/api/health/services` payload + `svcBody`): Little Blue health-band tiles previously linked to the single LAN `url`, so they opened dead private IPs when browsing remotely (e.g. Gramps `http://192.168.1.99`). Migration adds the column and **backfills** curated domains by id (the live instance is already seeded, so a column-add alone wouldn't populate them); also normalises `jellyfin`/`chaptarr` (which stored a domain in `url`) to LAN `url` + `external`.
- **Context-based tile target + one-click alt** (`public/views/service_url.js`, `public/components/service_tile.js`, `public/views/health_band.js`): the tile picks its primary URL from `location.hostname` — public host (e.g. `void.hynesy.com`) opens the domain, private IP/localhost/.local opens the LAN address — and always offers a `⇄` alt to the *other* URL (a reliable manual fallback; an auto-probe can't work because an HTTPS dashboard is blocked from probing `http://` LAN IPs by mixed-content). Services with no `external` are dimmed with a "LAN-only" badge when remote. Tile root is now a `div` with a stretched primary `` + sibling alt `` (no nested anchors). Health checker unchanged (still probes LAN `url` from CT 311).
diff --git a/config/services.json b/config/services.json
index 1a2b541..bfa06f9 100644
--- a/config/services.json
+++ b/config/services.json
@@ -10,7 +10,7 @@
{ "id": "gramps", "name": "Gramps Web", "category": "infrastructure", "host": "ct109", "url": "http://192.168.1.99", "external": "https://gramps.hynesy.com", "icon": "gramps" },
{ "id": "scanopy", "name": "Scanopy", "category": "infrastructure", "host": "ct100", "url": "http://192.168.1.230:60072", "icon": "scanopy" },
{ "id": "homelab", "name": "Homelable", "category": "infrastructure", "host": "ct100", "url": "http://192.168.1.230:3000", "icon": "" },
- { "id": "obd2", "name": "OBD2", "category": "infrastructure", "host": "ct .28", "url": "http://192.168.1.28:8384", "icon": "" },
+ { "id": "obd2", "name": "OBD2", "category": "infrastructure", "host": "ct112 · .225", "url": "http://192.168.1.225:8384", "icon": "" },
{ "id": "pterodactyl", "name": "Pterodactyl", "category": "infrastructure", "host": "192.168.1.247", "url": "http://192.168.1.247", "icon": "pterodactyl" },
{ "id": "pve-z", "name": "Proxmox · z", "category": "infrastructure", "host": "z", "url": "https://192.168.1.124:8006", "icon": "proxmox", "check": { "type": "tcp" } },
{ "id": "pve-z3", "name": "Proxmox · Z3", "category": "infrastructure", "host": "z3", "url": "https://192.168.1.125:8006", "icon": "proxmox", "check": { "type": "tcp" } },
@@ -25,6 +25,6 @@
{ "id": "void1", "name": "The Void 1.x", "category": "other", "host": "ct301", "url": "http://192.168.1.11:2424", "icon": "void" },
{ "id": "farm-timelapse", "name": "Farm Timelapse", "category": "other", "host": "192.168.1.108", "url": "http://192.168.1.108:8000", "icon": "" },
- { "id": "magicmirror", "name": "MagicMirror", "category": "other", "host": "192.168.1.27", "url": "http://192.168.1.27:8080", "icon": "magicmirror" },
+ { "id": "magicmirror", "name": "MagicMirror", "category": "other", "host": "ct111 · .224", "url": "http://192.168.1.224:8080", "icon": "magicmirror" },
{ "id": "claude-usage", "name": "Claude Usage", "category": "other", "host": "ct300", "url": "http://192.168.1.212:8080", "icon": "claude" }
]
diff --git a/lib/ai/agent/tools/blue/index.js b/lib/ai/agent/tools/blue/index.js
index 73b0b4f..15f6994 100644
--- a/lib/ai/agent/tools/blue/index.js
+++ b/lib/ai/agent/tools/blue/index.js
@@ -1,9 +1,12 @@
import { createRegistry } from '../../registry.js';
import { searchTool } from '../search.js';
import { listActionsTool, proposeActionTool } from './actions.js';
+import { infraAuditTool } from './infra_audit.js';
-// read (search) + her action tools. No propose_change (she fixes infra, not content).
+// read (search) + her action tools + infra sanity check. No propose_change
+// (she fixes infra, not content).
export const blueRegistry = createRegistry();
blueRegistry.registerTool(searchTool);
blueRegistry.registerTool(listActionsTool);
blueRegistry.registerTool(proposeActionTool);
+blueRegistry.registerTool(infraAuditTool);
diff --git a/lib/ai/agent/tools/blue/infra_audit.js b/lib/ai/agent/tools/blue/infra_audit.js
new file mode 100644
index 0000000..5aafcc9
--- /dev/null
+++ b/lib/ai/agent/tools/blue/infra_audit.js
@@ -0,0 +1,17 @@
+// Little Blue's infra sanity check. Runs in the MCP child (no infra creds) — it
+// calls the main server's read-only /api/infra/audit, which probes wiki-referenced
+// endpoints + registered service URLs and reports anything unreachable (e.g. a
+// doc/registry pointing at a stale IP) plus inventory hosts missing a MAC.
+function api(env = process.env) { return { base: env.VOID_API_URL, token: env.VOID_AGENT_TOKEN }; }
+
+export const infraAuditTool = {
+ name: 'infra_audit',
+ description: 'Run a homelab sanity check: probe every IP:port the wiki references and every monitored service, and report unreachable endpoints (stale/incorrect IPs or ports) plus inventory hosts missing a MAC. Read-only — use to verify the docs/registry match reality.',
+ input_schema: { type: 'object', properties: {} },
+ async handler(_args, _ctx, { fetchImpl = fetch } = {}) {
+ const { base, token } = api();
+ const res = await fetchImpl(`${base}/api/infra/audit`, { headers: { Authorization: `Bearer ${token}` } });
+ if (!res.ok) return { error: `infra_audit ${res.status}` };
+ return res.json();
+ }
+};
diff --git a/lib/api/index.js b/lib/api/index.js
index f7e3e90..2999840 100644
--- a/lib/api/index.js
+++ b/lib/api/index.js
@@ -32,6 +32,8 @@ import { router as securityRouter } from './routes/security.js';
import { router as actionsRouter } from './routes/actions.js';
import { router as littleblueRouter } from './routes/littleblue.js';
import { router as aiUsageRouter } from './routes/ai_usage.js';
+import { router as infraRouter } from './routes/infra.js';
+import { router as clusterRouter } from './routes/cluster.js';
export function mountApi(app) {
const api = Router();
@@ -45,6 +47,8 @@ export function mountApi(app) {
api.use('/spaces/:space_id/companion', companionRouter);
api.use('/security', securityRouter);
api.use('/actions', actionsRouter);
+ api.use('/infra', infraRouter);
+ api.use('/cluster', clusterRouter);
api.use('/little-blue', littleblueRouter);
api.use('/ai-usage', aiUsageRouter);
api.use('/projects', projectsRouter);
diff --git a/lib/api/routes/cluster.js b/lib/api/routes/cluster.js
new file mode 100644
index 0000000..297074d
--- /dev/null
+++ b/lib/api/routes/cluster.js
@@ -0,0 +1,17 @@
+import { Router } from 'express';
+import { asyncWrap } from '../errors.js';
+import { clusterHealth } from '../../proxmox/cluster.js';
+
+// Read-only cluster health for the Sacred Valley card. Cached briefly so multiple
+// polling clients coalesce into one PVE call. Owner or any authed agent.
+export const router = Router();
+
+let cache = { at: 0, data: null };
+const TTL = 10_000;
+
+router.get('/', asyncWrap(async (_req, res) => {
+ if (cache.data && Date.now() - cache.at < TTL) return res.json(cache.data);
+ const data = await clusterHealth();
+ cache = { at: Date.now(), data };
+ res.json(data);
+}));
diff --git a/lib/api/routes/infra.js b/lib/api/routes/infra.js
new file mode 100644
index 0000000..db0fe2d
--- /dev/null
+++ b/lib/api/routes/infra.js
@@ -0,0 +1,26 @@
+import { Router } from 'express';
+import { asyncWrap } from '../errors.js';
+import { pool } from '../../db/pool.js';
+import * as monitored from '../../db/repos/monitored_services.js';
+import * as networkHosts from '../../db/repos/network_hosts.js';
+import { runAudit, tcpProbe } from '../../infra/audit.js';
+
+// Read-only infra sanity check: probe every IP:port referenced in the wiki and
+// every enabled service URL, and surface hosts missing a recorded MAC. Available
+// to the owner or any authed agent (no mutations, just TCP connects).
+export const router = Router();
+
+const probe = (host, port) => tcpProbe(host, port, 1500);
+
+router.get('/audit', asyncWrap(async (_req, res) => {
+ const { rows: pages } = await pool.query(
+ `SELECT p.title, p.body_md FROM pages p JOIN spaces s ON s.id = p.space_id WHERE s.slug = 'wiki'`);
+ const services = (await monitored.listEnabled()).filter(s => /^https?:\/\//.test(s.url || ''));
+ const report = await runAudit({ pages, services, probe });
+ const missingMac = (await networkHosts.missingMac()).map(h => h.id);
+ res.json({ ...report, inventory: { missing_mac: missingMac } });
+}));
+
+router.get('/hosts', asyncWrap(async (_req, res) => {
+ res.json({ hosts: await networkHosts.all() });
+}));
diff --git a/lib/db/migrations/023_network_hosts.sql b/lib/db/migrations/023_network_hosts.sql
new file mode 100644
index 0000000..0d8c04f
--- /dev/null
+++ b/lib/db/migrations/023_network_hosts.sql
@@ -0,0 +1,45 @@
+-- 023_network_hosts.sql
+-- Authoritative LAN inventory of cluster guests + hosts: id -> ip -> MAC.
+-- Source of truth for router DHCP reservations and the infra_audit sanity check.
+-- Pool is the whole .2-.254, so every pinned guest needs a static IP + a router
+-- reservation on its MAC; this table is where we record the MAC<->IP mapping.
+CREATE TABLE IF NOT EXISTS network_hosts (
+ id text PRIMARY KEY, -- e.g. ct100, vm200, pve-z, qdevice-pi
+ kind text NOT NULL, -- lxc | vm | pve-host | qdevice
+ name text NOT NULL,
+ node text, -- z | Z3 | won | -
+ ip text,
+ mac text, -- NULL when not yet captured (host down)
+ note text,
+ created_at timestamptz NOT NULL DEFAULT now(),
+ updated_at timestamptz NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_network_hosts_ip ON network_hosts(ip);
+
+-- Seed the current inventory (captured 2026-06-08). Idempotent: re-running keeps
+-- the row but refreshes ip/mac/note so a later edit-and-migrate stays correct.
+INSERT INTO network_hosts (id, kind, name, node, ip, mac, note) VALUES
+ ('ct100','lxc','mediastack','z','192.168.1.230','BC:24:11:D8:2B:7F','Docker media host'),
+ ('ct102','lxc','ollama','z','192.168.1.185','BC:24:11:06:89:40','Ollama (GPU)'),
+ ('ct103','lxc','openwebui','z','192.168.1.231','BC:24:11:98:28:A1','Open WebUI'),
+ ('ct104','lxc','bookstack','z','192.168.1.213','BC:24:11:C3:F4:0A','BookStack mirror'),
+ ('ct105','lxc','gitea','z','192.168.1.223','BC:24:11:AA:2B:4E','Gitea (static, was DHCP)'),
+ ('ct106','lxc','pihole','z','192.168.1.140','BC:24:11:DB:2A:39','Pi-hole DNS adblock'),
+ ('ct107','lxc','iventoy','z','192.168.1.150','BC:24:11:9B:01:10','PXE (parked, donatello-vm rootfs)'),
+ ('ct108','lxc','tlcapture','z','192.168.1.108','BC:24:11:6D:97:27','Farm Timelapse'),
+ ('ct109','lxc','gramps','z','192.168.1.99','BC:24:11:8E:D3:58','Gramps Web'),
+ ('ct110','lxc','n8n','z','192.168.1.235','BC:24:11:28:70:30','n8n'),
+ ('ct111','lxc','magicmirror','z','192.168.1.224','BC:24:11:6C:D4:E6','MagicMirror (static, was DHCP .27)'),
+ ('ct112','lxc','obd2','z','192.168.1.225','BC:24:11:E7:D8:BF','OBD2 telemetry (static, was DHCP .28)'),
+ ('ct300','lxc','claude','z','192.168.1.212','BC:24:11:9E:AA:73','Claude Code workspace'),
+ ('ct301','lxc','void1','z','192.168.1.11','BC:24:11:4D:B7:CC','Void 1.x legacy'),
+ ('ct310','lxc','void2-db','z','192.168.1.215','BC:24:11:49:C6:29','Void 2.0 Postgres'),
+ ('ct311','lxc','void2-app','z','192.168.1.216','BC:24:11:9B:B7:3A','Void 2.0 app'),
+ ('vm117','vm','Pterodactyl-Deb','z','192.168.1.247','BC:24:11:37:C1:F7','Game panel (static, in-guest)'),
+ ('vm200','vm','OpenClaw','z','192.168.1.183','BC:24:11:29:84:B9','OpenClaw agent (static, in-guest)'),
+ ('pve-z','pve-host','z','z','192.168.1.124','00:E0:4C:0F:36:00','Cluster node 1 (GPU)'),
+ ('pve-z3','pve-host','Z3','Z3','192.168.1.125','6C:0B:5E:78:1C:93','Cluster node 2 (HA target)'),
+ ('qdevice-pi','qdevice','retropie','-','192.168.1.254','D8:3A:DD:22:C4:21','QDevice corosync-qnetd — reserve this MAC to .254')
+ON CONFLICT (id) DO UPDATE SET
+ kind = EXCLUDED.kind, name = EXCLUDED.name, node = EXCLUDED.node,
+ ip = EXCLUDED.ip, mac = EXCLUDED.mac, note = EXCLUDED.note, updated_at = now();
diff --git a/lib/db/repos/network_hosts.js b/lib/db/repos/network_hosts.js
new file mode 100644
index 0000000..f14316b
--- /dev/null
+++ b/lib/db/repos/network_hosts.js
@@ -0,0 +1,28 @@
+import { pool } from '../pool.js';
+
+const COLS = 'id, kind, name, node, ip, mac, note, updated_at';
+
+// Authoritative guest/host LAN inventory (id -> ip -> mac). Read-only here; the
+// canonical seed lives in migration 023. Used by the infra_audit sanity check
+// and as the source for router DHCP reservations.
+export async function all() {
+ const { rows } = await pool.query(`SELECT ${COLS} FROM network_hosts ORDER BY id`);
+ return rows;
+}
+
+export async function get(id) {
+ const { rows: [r] } = await pool.query(`SELECT ${COLS} FROM network_hosts WHERE id=$1`, [id]);
+ return r || null;
+}
+
+// Hosts still missing a captured MAC (e.g. the Pi when it was down at seed time).
+export async function missingMac() {
+ const { rows } = await pool.query(`SELECT ${COLS} FROM network_hosts WHERE mac IS NULL ORDER BY id`);
+ return rows;
+}
+
+export async function setMac(id, mac) {
+ const { rows: [r] } = await pool.query(
+ `UPDATE network_hosts SET mac=$2, updated_at=now() WHERE id=$1 RETURNING ${COLS}`, [id, mac]);
+ return r || null;
+}
diff --git a/lib/infra/audit.js b/lib/infra/audit.js
new file mode 100644
index 0000000..1ab13a8
--- /dev/null
+++ b/lib/infra/audit.js
@@ -0,0 +1,86 @@
+import net from 'node:net';
+
+// Doc/infra sanity check. Pure functions with an injected `probe(host, port) ->
+// Promise` so they're testable offline; the default tcpProbe is used in prod.
+
+const LAN_RE = /(? {
+ const sock = new net.Socket();
+ let done = false;
+ const finish = (ok) => { if (done) return; done = true; sock.destroy(); resolve(ok); };
+ sock.setTimeout(timeoutMs);
+ sock.once('connect', () => finish(true));
+ sock.once('timeout', () => finish(false));
+ sock.once('error', () => finish(false));
+ sock.connect(port, host);
+ });
+}
+
+// Cross-check every IP:port referenced in the wiki against live reachability.
+// Flags stale references (e.g. a CT that moved off an old IP) grouped by page.
+export async function auditDocs({ pages, probe }) {
+ const map = new Map(); // host:port -> { host, port, pages:Set }
+ for (const p of pages || []) {
+ for (const ep of extractEndpoints(p.body_md)) {
+ const key = `${ep.host}:${ep.port ?? ''}`;
+ if (!map.has(key)) map.set(key, { host: ep.host, port: ep.port, pages: new Set() });
+ map.get(key).pages.add(p.title);
+ }
+ }
+ const all = [...map.values()];
+ const probable = all.filter(e => e.port != null);
+ const unprobed = all.filter(e => e.port == null).map(e => ({ host: e.host, port: null, pages: [...e.pages] }));
+ const unreachable = [];
+ for (const e of probable) {
+ if (!(await probe(e.host, e.port))) unreachable.push({ host: e.host, port: e.port, pages: [...e.pages] });
+ }
+ return {
+ ok: unreachable.length === 0,
+ summary: { endpoints: all.length, probed: probable.length, reachable: probable.length - unreachable.length, unreachable: unreachable.length },
+ unreachable,
+ unprobed
+ };
+}
+
+// Probe each registered service's LAN url; flag any that don't answer.
+export async function auditServices({ services, probe }) {
+ let probed = 0;
+ const unreachable = [];
+ for (const s of services || []) {
+ const hp = parseUrl(s.url);
+ if (!hp) continue;
+ probed++;
+ if (!(await probe(hp.host, hp.port))) unreachable.push({ id: s.id, url: s.url, host: hp.host, port: hp.port });
+ }
+ return { ok: unreachable.length === 0, summary: { probed, unreachable: unreachable.length }, unreachable };
+}
+
+// Full sanity sweep used by the API route / MCP tool.
+export async function runAudit({ pages = [], services = [], probe = tcpProbe }) {
+ const docs = await auditDocs({ pages, probe });
+ const svc = await auditServices({ services, probe });
+ return { ok: docs.ok && svc.ok, docs, services: svc };
+}
diff --git a/lib/proxmox/cluster.js b/lib/proxmox/cluster.js
new file mode 100644
index 0000000..53873f4
--- /dev/null
+++ b/lib/proxmox/cluster.js
@@ -0,0 +1,76 @@
+import { Agent } from 'undici';
+
+// Read-only Proxmox cluster health for the Sacred Valley card. Uses a dedicated
+// PVEAuditor token (PROXMOX_RO_TOKEN) — never the power-action token. PVE's REST
+// API has no vote-count endpoint, so "quorum" here = the corosync `quorate` flag
+// (from /cluster/status) plus the HA-manager quorum status (/cluster/ha/status).
+
+let insecure;
+function tlsDispatcher() {
+ if (process.env.PROXMOX_INSECURE_TLS !== '1') return undefined;
+ insecure ??= new Agent({ connect: { rejectUnauthorized: false } });
+ return insecure;
+}
+
+async function pveGet(path, { apiUrl, token, fetchImpl = fetch }) {
+ const res = await fetchImpl(`${apiUrl}/api2/json${path}`, {
+ headers: { Authorization: `PVEAPIToken=${token}` },
+ dispatcher: tlsDispatcher()
+ });
+ if (!res.ok) throw new Error(`pve ${path} -> ${res.status}`);
+ return (await res.json())?.data ?? [];
+}
+
+const SETTLED_STATES = new Set(['started', 'stopped', 'ignored', 'disabled']);
+
+// Pure: fold /cluster/status + /cluster/ha/status/current into the card shape.
+export function normalizeCluster(statusData = [], haData = []) {
+ const cluster = statusData.find(e => e.type === 'cluster') || {};
+ const nodes = statusData
+ .filter(e => e.type === 'node')
+ .map(n => ({ name: n.name, online: n.online === 1 || n.online === true, local: !!n.local, ip: n.ip || null }))
+ .sort((a, b) => a.name.localeCompare(b.name));
+
+ const quorum = haData.find(e => e.type === 'quorum') || {};
+ const master = haData.find(e => e.type === 'master') || {};
+ const fencing = haData.find(e => e.type === 'fencing') || {};
+ const services = haData
+ .filter(e => e.type === 'service')
+ .map(s => ({ sid: s.sid || (s.id || '').replace(/^service:/, ''), state: s.state || s.crm_state || 'unknown', node: s.node || null }))
+ .sort((a, b) => a.sid.localeCompare(b.sid));
+ const servicesError = services.filter(s => !SETTLED_STATES.has(s.state));
+
+ return {
+ name: cluster.name || null,
+ quorate: cluster.quorate === 1 || cluster.quorate === true,
+ nodes_total: cluster.nodes ?? nodes.length,
+ nodes_online: nodes.filter(n => n.online).length,
+ nodes,
+ ha: {
+ quorum_ok: quorum.quorate === 1 || quorum.status === 'OK',
+ master: master.node || null,
+ fencing: fencing['armed-state'] || (fencing.status ? 'armed' : null),
+ services_total: services.length,
+ services_error: servicesError.length,
+ services
+ }
+ };
+}
+
+export async function clusterHealth(opts = {}) {
+ const cfg = {
+ apiUrl: opts.apiUrl || process.env.PROXMOX_API_URL,
+ token: opts.token || process.env.PROXMOX_RO_TOKEN || process.env.PROXMOX_API_TOKEN,
+ fetchImpl: opts.fetchImpl || fetch
+ };
+ if (!cfg.apiUrl || !cfg.token) return { error: 'proxmox_not_configured', at: Date.now() };
+ try {
+ const [status, ha] = await Promise.all([
+ pveGet('/cluster/status', cfg),
+ pveGet('/cluster/ha/status/current', cfg).catch(() => []) // HA may be absent on a bare cluster
+ ]);
+ return { ...normalizeCluster(status, ha), at: Date.now() };
+ } catch (e) {
+ return { error: String(e.message || e), at: Date.now() };
+ }
+}
diff --git a/public/components/topbar.js b/public/components/topbar.js
index ca252f8..5281070 100644
--- a/public/components/topbar.js
+++ b/public/components/topbar.js
@@ -5,6 +5,29 @@ import { el, mount, clear } from '../dom.js';
import { navigate } from '../router.js';
import { on } from '../state.js';
import { toggleSidebar, toggleRail } from './chrome.js';
+import { api } from '../api.js';
+
+// Cluster health → topbar pill. Returns [status, label, title].
+function classifyCluster(c) {
+ if (!c || c.error) return ['unknown', 'cluster ?', 'Cluster status unavailable'];
+ if (!c.quorate) return ['down', 'no quorum', 'Cluster has LOST quorum'];
+ if ((c.nodes_online ?? 0) < (c.nodes_total ?? 0)) return ['down', 'node down', `${c.nodes_online}/${c.nodes_total} nodes online`];
+ if (c.ha && c.ha.services_error > 0) return ['warn', 'HA issue', `${c.ha.services_error} HA service(s) in error`];
+ return ['ok', 'healthy', `Quorate · ${c.nodes_online}/${c.nodes_total} nodes · HA ok`];
+}
+
+function startClusterHealth(pill, labelEl) {
+ async function tick() {
+ let c = null;
+ try { c = await api.get('/api/cluster'); } catch { c = { error: 'fetch' }; }
+ const [status, label, title] = classifyCluster(c);
+ pill.className = 'icon-btn cluster-health status-' + status;
+ pill.title = title;
+ labelEl.textContent = label;
+ }
+ tick();
+ setInterval(tick, 30000);
+}
function captureModal() {
const root = document.getElementById('modal-root');
@@ -37,17 +60,24 @@ export function renderTopbar(root) {
const bell = el('button', { class: 'icon-btn', onclick: () => navigate('/inbox') }, 'Inbox');
+ const chLabel = el('span', { class: 'ch-label' }, '…');
+ const clusterPill = el('button', { class: 'icon-btn cluster-health status-unknown', title: 'Cluster health', onclick: () => navigate('/sacred-valley') },
+ el('span', { class: 'dot' }), chLabel);
+
mount(root,
el('button', { class: 'chrome-toggle', title: 'Toggle menu', onclick: toggleSidebar }, '☰'),
el('div', { class: 'brand' }, 'VOID'),
el('button', { class: 'icon-btn', onclick: captureModal }, '+ Capture'),
el('div', { class: 'topbar-search' }, searchInput),
el('div', { class: 'topbar-spacer' }),
+ clusterPill,
bell,
el('button', { class: 'chrome-toggle', title: 'Toggle companion chat', onclick: toggleRail }, '◆'),
el('button', { class: 'icon-btn', onclick: () => alert('Agent-switching ships post-Plan-2.') }, 'Owner')
);
+ startClusterHealth(clusterPill, chLabel);
+
on('pending-count', (n) => {
const old = bell.querySelector('.badge');
if (old) old.remove();
diff --git a/public/index.html b/public/index.html
index 3725dea..a029a7d 100644
--- a/public/index.html
+++ b/public/index.html
@@ -4,6 +4,26 @@
Void
+