diff --git a/lib/cron/index.js b/lib/cron/index.js index 1dcc6ed..5334e0c 100644 --- a/lib/cron/index.js +++ b/lib/cron/index.js @@ -2,6 +2,9 @@ import cron from 'node-cron'; import { runSync } from './sync_source_docs.js'; import { log } from '../log.js'; import { enqueue } from '../jobs/queue.js'; +import { load } from '../health/registry.js'; +import { checkAll } from '../health/checker.js'; +import * as statusRepo from '../db/repos/service_status.js'; export function startCron() { // Daily at 03:00 local time @@ -20,5 +23,13 @@ export function startCron() { catch (e) { log.error({ err: e }, 'cron speedtest failed'); } }); + cron.schedule('*/1 * * * *', async () => { + try { + const results = await checkAll(load()); + for (const r of results) await statusRepo.upsert(r); + log.info({ n: results.length }, 'health check complete'); + } catch (e) { log.error({ err: e }, 'health check failed'); } + }); + log.info('cron started'); } diff --git a/lib/health/checker.js b/lib/health/checker.js new file mode 100644 index 0000000..df11e41 --- /dev/null +++ b/lib/health/checker.js @@ -0,0 +1,41 @@ +import net from 'node:net'; +const SLOW_MS = 3000; + +export function classify({ ok, reachable, latency, error }) { + if (ok) return { status: latency > SLOW_MS ? 'warn' : 'ok', latency_ms: latency, detail: `${latency}ms` }; + if (reachable) return { status: 'warn', latency_ms: latency ?? null, detail: 'degraded' }; + return { status: 'down', latency_ms: null, detail: error || 'unreachable' }; +} + +// Default probe: HTTP (status 2xx/3xx) or TCP connect. Only called with +// operator-configured URLs from the registry — never user input. +export async function probe(svc) { + const started = Date.now(); + const type = svc.check?.type || 'http'; + try { + if (type === 'tcp') { + const u = new URL(svc.url); + await new Promise((resolve, reject) => { + const sock = net.connect({ host: u.hostname, port: Number(u.port) }, () => { sock.end(); resolve(); }); + sock.setTimeout(5000); sock.on('timeout', () => { sock.destroy(); reject(new Error('timeout')); }); + sock.on('error', reject); + }); + return { ok: true, latency: Date.now() - started }; + } + const base = svc.url.replace(/\/$/, ''); + const url = base + (svc.check?.path || ''); + const res = await fetch(url, { redirect: 'manual', signal: AbortSignal.timeout(6000) }); + const reachable = true; + const ok = res.status >= 200 && res.status < 400; + return { ok, reachable, latency: Date.now() - started }; + } catch (e) { + return { ok: false, reachable: false, latency: Date.now() - started, error: e.code || e.message }; + } +} + +export async function checkAll(services, probeFn = probe) { + return Promise.all(services.map(async svc => { + const c = classify(await probeFn(svc)); + return { service_id: svc.id, ...c }; + })); +} diff --git a/tests/health/checker.test.js b/tests/health/checker.test.js new file mode 100644 index 0000000..1fb3f5a --- /dev/null +++ b/tests/health/checker.test.js @@ -0,0 +1,19 @@ +import { describe, it, expect, vi } from 'vitest'; +import { classify, checkAll } from '../../lib/health/checker.js'; + +describe('health classify', () => { + it('ok when reachable and fast', () => expect(classify({ ok: true, latency: 120 }).status).toBe('ok')); + it('warn when reachable but slow', () => expect(classify({ ok: true, latency: 4000 }).status).toBe('warn')); + it('warn on non-2xx/3xx reachable', () => expect(classify({ ok: false, reachable: true, latency: 50 }).status).toBe('warn')); + it('down when unreachable', () => expect(classify({ ok: false, reachable: false, error: 'ECONN' }).status).toBe('down')); +}); + +describe('checkAll', () => { + it('probes each service and returns a status per id', async () => { + const probe = vi.fn().mockResolvedValue({ ok: true, latency: 30 }); + const svcs = [{ id: 'a', url: 'http://x' }, { id: 'b', url: 'http://y' }]; + const out = await checkAll(svcs, probe); + expect(out.map(o => o.service_id).sort()).toEqual(['a', 'b']); + expect(out.every(o => o.status === 'ok')).toBe(true); + }); +});