Read-only Proxmox storage health (same PROXMOX_RO_TOKEN as the cluster card): ZFS pool health+usage, dropped zfspool storages (the donatello/leonardo SATA signal), and per-LXC rootfs fill, with a HEALTHY/WATCH/ATTENTION roll-up. Closes the monitoring gap from the 2026-06-09 audit (C1 + H2 were invisible). Pure normalizeStorage() unit-tested (4 tests). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
95 lines
4.1 KiB
JavaScript
95 lines
4.1 KiB
JavaScript
import { Agent } from 'undici';
|
|
|
|
// Read-only Proxmox storage + capacity health for the Sacred Valley card. Same
|
|
// PVEAuditor token as the cluster card (PROXMOX_RO_TOKEN). Surfaces the two things
|
|
// that have actually bitten this homelab and were previously invisible:
|
|
// 1. a ZFS pool dropping out (the donatello/leonardo SATA-bus incident) — seen as
|
|
// a zfspool storage whose status is no longer 'available'.
|
|
// 2. a container rootfs filling up (mediastack hitting 95%) — per-LXC disk/maxdisk.
|
|
|
|
let insecure;
|
|
function tlsDispatcher() {
|
|
if (process.env.PROXMOX_INSECURE_TLS !== '1') return undefined;
|
|
insecure ??= new Agent({ connect: { rejectUnauthorized: false } });
|
|
return insecure;
|
|
}
|
|
|
|
async function pveGet(path, { apiUrl, token, fetchImpl = fetch }) {
|
|
const res = await fetchImpl(`${apiUrl}/api2/json${path}`, {
|
|
headers: { Authorization: `PVEAPIToken=${token}` },
|
|
dispatcher: tlsDispatcher()
|
|
});
|
|
if (!res.ok) throw new Error(`pve ${path} -> ${res.status}`);
|
|
return (await res.json())?.data ?? [];
|
|
}
|
|
|
|
export const WARN = 80, CRIT = 90;
|
|
const pct = (used, total) => (total > 0 ? Math.round((used / total) * 100) : null);
|
|
const sev = p => (p == null ? 'ok' : p >= CRIT ? 'crit' : p >= WARN ? 'warn' : 'ok');
|
|
const worstOf = items => items.reduce(
|
|
(w, x) => (x.status === 'crit' || w === 'crit') ? 'crit' : (x.status === 'warn' || w === 'warn') ? 'warn' : 'ok', 'ok');
|
|
|
|
// Pure: fold /nodes/*/disks/zfs + /cluster/resources(storage,vm) into the card shape.
|
|
export function normalizeStorage(storageRes = [], vmRes = [], zfsByNode = {}) {
|
|
// Imported ZFS pools (health + usage)
|
|
const pools = [];
|
|
for (const [node, list] of Object.entries(zfsByNode)) {
|
|
for (const z of (list || [])) {
|
|
const p = pct(z.alloc, z.size);
|
|
pools.push({
|
|
name: z.name, node, health: z.health, used: z.alloc, total: z.size, pct: p,
|
|
status: z.health !== 'ONLINE' ? 'crit' : sev(p)
|
|
});
|
|
}
|
|
}
|
|
pools.sort((a, b) => a.name.localeCompare(b.name) || a.node.localeCompare(b.node));
|
|
|
|
// zfspool storages that are configured but NOT available = a pool that has dropped
|
|
// out (or never imported). This is the donatello/leonardo signal.
|
|
const down = storageRes
|
|
.filter(s => s.plugintype === 'zfspool' && s.status !== 'available')
|
|
.map(s => ({ name: s.storage, node: s.node, state: s.status || 'unavailable', status: 'crit' }))
|
|
.sort((a, b) => a.name.localeCompare(b.name) || a.node.localeCompare(b.node));
|
|
|
|
// Per-guest rootfs fill. LXC report disk/maxdisk; QEMU usually report disk=0
|
|
// (no agent) so they're skipped rather than shown as 0%.
|
|
const guests = vmRes
|
|
.filter(v => v.type === 'lxc' && v.maxdisk > 0 && v.disk > 0)
|
|
.map(v => {
|
|
const p = pct(v.disk, v.maxdisk);
|
|
return { vmid: v.vmid, name: v.name, node: v.node, used: v.disk, total: v.maxdisk, pct: p, status: sev(p) };
|
|
})
|
|
.sort((a, b) => b.pct - a.pct);
|
|
|
|
const alerts = [
|
|
...down.map(d => `${d.name} (${d.node}) ${d.state}`),
|
|
...pools.filter(p => p.health !== 'ONLINE').map(p => `pool ${p.name} ${p.health}`),
|
|
...guests.filter(g => g.status !== 'ok').map(g => `CT ${g.vmid} ${g.name} ${g.pct}%`)
|
|
];
|
|
|
|
return { worst: worstOf([...pools, ...down, ...guests]), pools, down, guests, alerts };
|
|
}
|
|
|
|
export async function storageHealth(opts = {}) {
|
|
const cfg = {
|
|
apiUrl: opts.apiUrl || process.env.PROXMOX_API_URL,
|
|
token: opts.token || process.env.PROXMOX_RO_TOKEN || process.env.PROXMOX_API_TOKEN,
|
|
fetchImpl: opts.fetchImpl || fetch
|
|
};
|
|
if (!cfg.apiUrl || !cfg.token) return { error: 'proxmox_not_configured', at: Date.now() };
|
|
try {
|
|
const [storageRes, vmRes, nodes] = await Promise.all([
|
|
pveGet('/cluster/resources?type=storage', cfg),
|
|
pveGet('/cluster/resources?type=vm', cfg),
|
|
pveGet('/nodes', cfg)
|
|
]);
|
|
const zfsByNode = {};
|
|
await Promise.all((nodes || [])
|
|
.filter(n => n.status === 'online')
|
|
.map(async n => { zfsByNode[n.node] = await pveGet(`/nodes/${n.node}/disks/zfs`, cfg).catch(() => []); }));
|
|
return { ...normalizeStorage(storageRes, vmRes, zfsByNode), at: Date.now() };
|
|
} catch (e) {
|
|
return { error: String(e.message || e), at: Date.now() };
|
|
}
|
|
}
|