feat(api): capture POST + upload + SSRF-safe URL fetch
safe_fetch.js validates URLs before fetch: rejects non-http(s), literal or DNS-resolved loopback / RFC1918 / link-local / CGNAT / metadata addresses; follows redirects manually with the same checks on each hop. Test fixtures gate the check with VOID_INGEST_ALLOW_PRIVATE for offline fixtures that hit 127.0.0.1. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
72
tests/api/capture.test.js
Normal file
72
tests/api/capture.test.js
Normal file
@@ -0,0 +1,72 @@
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import request from 'supertest';
|
||||
import { setup } from './helpers.js';
|
||||
import { stopBoss, waitForJob } from '../helpers/boss.js';
|
||||
import * as queue from '../../lib/jobs/queue.js';
|
||||
import { registerWorkers } from '../../lib/jobs/index.js';
|
||||
import * as spaces from '../../lib/db/repos/spaces.js';
|
||||
import * as refs from '../../lib/db/repos/refs.js';
|
||||
|
||||
let app, ownerHeaders, sp;
|
||||
const HTML = `<html><head><title>X</title></head><body><article>
|
||||
<p>An article body with enough text for readability to choose it as the main content.</p>
|
||||
<p>Another paragraph to satisfy the readability heuristic.</p>
|
||||
</article></body></html>`;
|
||||
|
||||
beforeEach(async () => {
|
||||
({ app, ownerHeaders } = await setup());
|
||||
sp = await spaces.create({ slug: 'cap', name: 'Cap' }, { kind: 'user', id: null });
|
||||
process.env.BLOB_ROOT = await fs.mkdtemp(path.join(os.tmpdir(), 'void-blobs-'));
|
||||
await queue.start(); await registerWorkers();
|
||||
global.fetch = vi.fn(async () => new Response(HTML, {
|
||||
status: 200, headers: { 'content-type': 'text/html' }
|
||||
}));
|
||||
});
|
||||
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
|
||||
|
||||
describe('capture api', () => {
|
||||
it('POST /api/capture enqueues ingest.url and returns 202', async () => {
|
||||
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||
expect(res.status).toBe(202);
|
||||
expect(res.body.job_id).toBeTruthy();
|
||||
expect(res.body.idempotency_key).toMatch(/^[0-9a-f]{64}$/);
|
||||
});
|
||||
|
||||
it('POST /api/capture returns existing ref_id on duplicate', async () => {
|
||||
const r1 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||
await waitForJob('ingest.url', r1.body.job_id, { timeoutMs: 10_000 });
|
||||
const r2 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||
expect(r2.status).toBe(202);
|
||||
expect(r2.body.job_id).toBeNull();
|
||||
expect(r2.body.ref_id).toBeTruthy();
|
||||
});
|
||||
|
||||
it('POST /api/capture/upload enqueues ingest.blob', async () => {
|
||||
const res = await request(app).post('/api/capture/upload').set(ownerHeaders)
|
||||
.field('space_id', sp.id)
|
||||
.attach('file', Buffer.from('hi'), { filename: 'a.txt', contentType: 'text/plain' });
|
||||
expect(res.status).toBe(202);
|
||||
expect(res.body.job_id).toBeTruthy();
|
||||
await waitForJob('ingest.blob', res.body.job_id, { timeoutMs: 10_000 });
|
||||
const rows = await refs.list({ space_id: sp.id });
|
||||
expect(rows[0].kind).toBe('file');
|
||||
});
|
||||
|
||||
it('POST /api/capture rejects missing url', async () => {
|
||||
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id });
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
|
||||
it('unauthenticated → 401', async () => {
|
||||
const res = await request(app).post('/api/capture')
|
||||
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||
expect(res.status).toBe(401);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user