Files
Void-Homelab/tests/api/capture.test.js
root afc20712cb feat(api): capture POST + upload + SSRF-safe URL fetch
safe_fetch.js validates URLs before fetch: rejects non-http(s), literal
or DNS-resolved loopback / RFC1918 / link-local / CGNAT / metadata
addresses; follows redirects manually with the same checks on each hop.
Test fixtures gate the check with VOID_INGEST_ALLOW_PRIVATE for offline
fixtures that hit 127.0.0.1.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 03:42:54 +10:00

73 lines
3.1 KiB
JavaScript

import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import fs from 'node:fs/promises';
import path from 'node:path';
import os from 'node:os';
import request from 'supertest';
import { setup } from './helpers.js';
import { stopBoss, waitForJob } from '../helpers/boss.js';
import * as queue from '../../lib/jobs/queue.js';
import { registerWorkers } from '../../lib/jobs/index.js';
import * as spaces from '../../lib/db/repos/spaces.js';
import * as refs from '../../lib/db/repos/refs.js';
let app, ownerHeaders, sp;
const HTML = `<html><head><title>X</title></head><body><article>
<p>An article body with enough text for readability to choose it as the main content.</p>
<p>Another paragraph to satisfy the readability heuristic.</p>
</article></body></html>`;
beforeEach(async () => {
({ app, ownerHeaders } = await setup());
sp = await spaces.create({ slug: 'cap', name: 'Cap' }, { kind: 'user', id: null });
process.env.BLOB_ROOT = await fs.mkdtemp(path.join(os.tmpdir(), 'void-blobs-'));
await queue.start(); await registerWorkers();
global.fetch = vi.fn(async () => new Response(HTML, {
status: 200, headers: { 'content-type': 'text/html' }
}));
});
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
describe('capture api', () => {
it('POST /api/capture enqueues ingest.url and returns 202', async () => {
const res = await request(app).post('/api/capture').set(ownerHeaders)
.send({ space_id: sp.id, url: 'https://example.com/a' });
expect(res.status).toBe(202);
expect(res.body.job_id).toBeTruthy();
expect(res.body.idempotency_key).toMatch(/^[0-9a-f]{64}$/);
});
it('POST /api/capture returns existing ref_id on duplicate', async () => {
const r1 = await request(app).post('/api/capture').set(ownerHeaders)
.send({ space_id: sp.id, url: 'https://example.com/dup' });
await waitForJob('ingest.url', r1.body.job_id, { timeoutMs: 10_000 });
const r2 = await request(app).post('/api/capture').set(ownerHeaders)
.send({ space_id: sp.id, url: 'https://example.com/dup' });
expect(r2.status).toBe(202);
expect(r2.body.job_id).toBeNull();
expect(r2.body.ref_id).toBeTruthy();
});
it('POST /api/capture/upload enqueues ingest.blob', async () => {
const res = await request(app).post('/api/capture/upload').set(ownerHeaders)
.field('space_id', sp.id)
.attach('file', Buffer.from('hi'), { filename: 'a.txt', contentType: 'text/plain' });
expect(res.status).toBe(202);
expect(res.body.job_id).toBeTruthy();
await waitForJob('ingest.blob', res.body.job_id, { timeoutMs: 10_000 });
const rows = await refs.list({ space_id: sp.id });
expect(rows[0].kind).toBe('file');
});
it('POST /api/capture rejects missing url', async () => {
const res = await request(app).post('/api/capture').set(ownerHeaders)
.send({ space_id: sp.id });
expect(res.status).toBe(400);
});
it('unauthenticated → 401', async () => {
const res = await request(app).post('/api/capture')
.send({ space_id: sp.id, url: 'https://example.com/a' });
expect(res.status).toBe(401);
});
});