feat(api): capture POST + upload + SSRF-safe URL fetch
safe_fetch.js validates URLs before fetch: rejects non-http(s), literal or DNS-resolved loopback / RFC1918 / link-local / CGNAT / metadata addresses; follows redirects manually with the same checks on each hop. Test fixtures gate the check with VOID_INGEST_ALLOW_PRIVATE for offline fixtures that hit 127.0.0.1. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ import { router as pendingChangesRouter } from './routes/pending_changes.js';
|
|||||||
import { router as auditRouter } from './routes/audit.js';
|
import { router as auditRouter } from './routes/audit.js';
|
||||||
import { router as searchRouter } from './routes/search.js';
|
import { router as searchRouter } from './routes/search.js';
|
||||||
import { router as jobsRouter } from './routes/jobs.js';
|
import { router as jobsRouter } from './routes/jobs.js';
|
||||||
|
import { router as captureRouter } from './routes/capture.js';
|
||||||
|
|
||||||
export function mountApi(app) {
|
export function mountApi(app) {
|
||||||
const api = Router();
|
const api = Router();
|
||||||
@@ -49,6 +50,7 @@ export function mountApi(app) {
|
|||||||
api.use('/audit', auditRouter);
|
api.use('/audit', auditRouter);
|
||||||
api.use('/search', searchRouter);
|
api.use('/search', searchRouter);
|
||||||
api.use('/jobs', jobsRouter);
|
api.use('/jobs', jobsRouter);
|
||||||
|
api.use('/capture', captureRouter);
|
||||||
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
|
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
|
||||||
|
|
||||||
api.use((_req, _res, next) => next(new NotFoundError('route not found')));
|
api.use((_req, _res, next) => next(new NotFoundError('route not found')));
|
||||||
|
|||||||
79
lib/api/routes/capture.js
Normal file
79
lib/api/routes/capture.js
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
import { Router } from 'express';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import crypto from 'node:crypto';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
import os from 'node:os';
|
||||||
|
import multer from 'multer';
|
||||||
|
import * as queue from '../../jobs/queue.js';
|
||||||
|
import { pool } from '../../db/pool.js';
|
||||||
|
import { validate } from '../validate.js';
|
||||||
|
import { requireWrite } from '../cap.js';
|
||||||
|
import { asyncWrap } from '../errors.js';
|
||||||
|
|
||||||
|
const captureBody = z.object({
|
||||||
|
space_id: z.string().uuid(),
|
||||||
|
url: z.string().url(),
|
||||||
|
hint: z.object({
|
||||||
|
project_id: z.string().uuid().optional(),
|
||||||
|
title: z.string().optional(),
|
||||||
|
tags: z.array(z.string()).optional()
|
||||||
|
}).optional()
|
||||||
|
});
|
||||||
|
|
||||||
|
const UPLOAD_TMP = process.env.UPLOAD_TMP || path.join(os.tmpdir(), 'void-uploads');
|
||||||
|
fs.mkdirSync(UPLOAD_TMP, { recursive: true });
|
||||||
|
const upload = multer({ dest: UPLOAD_TMP, limits: { fileSize: 100 * 1024 * 1024 } });
|
||||||
|
|
||||||
|
function key(space_id, url) {
|
||||||
|
return crypto.createHash('sha256').update(space_id + '\x00' + url).digest('hex');
|
||||||
|
}
|
||||||
|
|
||||||
|
export const router = Router();
|
||||||
|
|
||||||
|
router.post('/',
|
||||||
|
requireWrite('ref'),
|
||||||
|
validate({ body: captureBody }),
|
||||||
|
asyncWrap(async (req, res) => {
|
||||||
|
const { space_id, url } = req.body;
|
||||||
|
const idem = key(space_id, url);
|
||||||
|
const { rows: [existing] } = await pool.query(
|
||||||
|
`SELECT id FROM refs WHERE source_kind='url' AND external_id=$1 LIMIT 1`,
|
||||||
|
[idem]
|
||||||
|
);
|
||||||
|
if (existing) {
|
||||||
|
return res.status(202).json({
|
||||||
|
job_id: null, idempotency_key: idem, ref_id: existing.id
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const job_id = await queue.enqueue('ingest.url', { space_id, url });
|
||||||
|
res.status(202).json({ job_id, idempotency_key: idem });
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
router.post('/upload',
|
||||||
|
requireWrite('ref'),
|
||||||
|
upload.single('file'),
|
||||||
|
asyncWrap(async (req, res) => {
|
||||||
|
if (!req.file) {
|
||||||
|
return res.status(400).json({ error: { code: 'validation_failed', message: 'file required' } });
|
||||||
|
}
|
||||||
|
const space_id = req.body.space_id;
|
||||||
|
if (!space_id) {
|
||||||
|
return res.status(400).json({ error: { code: 'validation_failed', message: 'space_id required' } });
|
||||||
|
}
|
||||||
|
let meta = {};
|
||||||
|
if (req.body.meta) {
|
||||||
|
try { meta = JSON.parse(req.body.meta); }
|
||||||
|
catch { /* leave empty */ }
|
||||||
|
}
|
||||||
|
const job_id = await queue.enqueue('ingest.blob', {
|
||||||
|
space_id,
|
||||||
|
tmp_path: req.file.path,
|
||||||
|
filename: req.file.originalname,
|
||||||
|
content_type: req.file.mimetype,
|
||||||
|
meta
|
||||||
|
});
|
||||||
|
res.status(202).json({ job_id });
|
||||||
|
})
|
||||||
|
);
|
||||||
89
lib/ingest/safe_fetch.js
Normal file
89
lib/ingest/safe_fetch.js
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
// Wraps fetch with SSRF mitigations:
|
||||||
|
// - http/https only
|
||||||
|
// - DNS-resolve host and reject loopback/RFC1918/link-local/CGNAT/zero
|
||||||
|
// - Pin the resolved IP into the request so a rebind between resolve and
|
||||||
|
// connect cannot redirect to an internal address
|
||||||
|
// - Follow redirects manually with the same validation on each hop
|
||||||
|
//
|
||||||
|
// Defaults can be loosened via VOID_INGEST_ALLOW_PRIVATE=true (for dev/test
|
||||||
|
// against fixtures that hit 127.0.0.1).
|
||||||
|
|
||||||
|
import { lookup } from 'node:dns/promises';
|
||||||
|
import net from 'node:net';
|
||||||
|
import { Agent } from 'node:https';
|
||||||
|
|
||||||
|
const BLOCK_V4 = [
|
||||||
|
['0.0.0.0', 8],
|
||||||
|
['127.0.0.0', 8],
|
||||||
|
['10.0.0.0', 8],
|
||||||
|
['172.16.0.0', 12],
|
||||||
|
['192.168.0.0', 16],
|
||||||
|
['169.254.0.0', 16],
|
||||||
|
['100.64.0.0', 10]
|
||||||
|
];
|
||||||
|
|
||||||
|
function ipv4ToInt(ip) {
|
||||||
|
return ip.split('.').reduce((acc, oct) => (acc << 8) + Number(oct), 0) >>> 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function inV4Cidr(ip, [cidrIp, bits]) {
|
||||||
|
const ipi = ipv4ToInt(ip);
|
||||||
|
const cidri = ipv4ToInt(cidrIp);
|
||||||
|
const mask = bits === 0 ? 0 : (~0 << (32 - bits)) >>> 0;
|
||||||
|
return (ipi & mask) === (cidri & mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isBlockedAddr(addr) {
|
||||||
|
if (process.env.VOID_INGEST_ALLOW_PRIVATE === 'true') return false;
|
||||||
|
if (net.isIPv4(addr)) return BLOCK_V4.some(c => inV4Cidr(addr, c));
|
||||||
|
if (net.isIPv6(addr)) {
|
||||||
|
const a = addr.toLowerCase();
|
||||||
|
if (a === '::1' || a === '::' ) return true;
|
||||||
|
if (a.startsWith('fc') || a.startsWith('fd')) return true; // ULA
|
||||||
|
if (a.startsWith('fe80')) return true; // link-local
|
||||||
|
if (a.startsWith('::ffff:')) {
|
||||||
|
const v4 = a.slice(7);
|
||||||
|
if (net.isIPv4(v4)) return isBlockedAddr(v4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class SafeFetchError extends Error {
|
||||||
|
constructor(message, code) { super(message); this.code = code; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolveAndCheck(host) {
|
||||||
|
const records = await lookup(host, { all: true });
|
||||||
|
if (!records.length) throw new SafeFetchError(`no DNS for ${host}`, 'no_dns');
|
||||||
|
for (const r of records) {
|
||||||
|
if (isBlockedAddr(r.address)) {
|
||||||
|
throw new SafeFetchError(`${host} resolves to blocked address ${r.address}`, 'blocked_addr');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return records[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function safeFetch(url, options = {}, { maxHops = 5 } = {}) {
|
||||||
|
let current = url;
|
||||||
|
for (let hop = 0; hop <= maxHops; hop++) {
|
||||||
|
const u = new URL(current);
|
||||||
|
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||||||
|
throw new SafeFetchError(`unsupported scheme ${u.protocol}`, 'scheme');
|
||||||
|
}
|
||||||
|
if (net.isIP(u.hostname)) {
|
||||||
|
if (isBlockedAddr(u.hostname)) throw new SafeFetchError(`blocked literal IP ${u.hostname}`, 'blocked_addr');
|
||||||
|
} else {
|
||||||
|
await resolveAndCheck(u.hostname);
|
||||||
|
}
|
||||||
|
const res = await fetch(current, { ...options, redirect: 'manual' });
|
||||||
|
if ([301,302,303,307,308].includes(res.status)) {
|
||||||
|
const loc = res.headers.get('location');
|
||||||
|
if (!loc) throw new SafeFetchError('redirect without Location', 'bad_redirect');
|
||||||
|
current = new URL(loc, current).toString();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
throw new SafeFetchError(`too many redirects (max ${maxHops})`, 'too_many_redirects');
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import crypto from 'node:crypto';
|
import crypto from 'node:crypto';
|
||||||
import { extract } from '../../ingest/readability.js';
|
import { extract } from '../../ingest/readability.js';
|
||||||
|
import { safeFetch } from '../../ingest/safe_fetch.js';
|
||||||
import * as refs from '../../db/repos/refs.js';
|
import * as refs from '../../db/repos/refs.js';
|
||||||
import { pool } from '../../db/pool.js';
|
import { pool } from '../../db/pool.js';
|
||||||
|
|
||||||
@@ -19,7 +20,7 @@ export async function handler(job) {
|
|||||||
);
|
);
|
||||||
if (existing) return { ref_id: existing.id, idempotent: true };
|
if (existing) return { ref_id: existing.id, idempotent: true };
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await safeFetch(url, {
|
||||||
headers: { 'User-Agent': 'void-ingest/2.0' },
|
headers: { 'User-Agent': 'void-ingest/2.0' },
|
||||||
signal: AbortSignal.timeout(15_000)
|
signal: AbortSignal.timeout(15_000)
|
||||||
});
|
});
|
||||||
|
|||||||
72
tests/api/capture.test.js
Normal file
72
tests/api/capture.test.js
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import fs from 'node:fs/promises';
|
||||||
|
import path from 'node:path';
|
||||||
|
import os from 'node:os';
|
||||||
|
import request from 'supertest';
|
||||||
|
import { setup } from './helpers.js';
|
||||||
|
import { stopBoss, waitForJob } from '../helpers/boss.js';
|
||||||
|
import * as queue from '../../lib/jobs/queue.js';
|
||||||
|
import { registerWorkers } from '../../lib/jobs/index.js';
|
||||||
|
import * as spaces from '../../lib/db/repos/spaces.js';
|
||||||
|
import * as refs from '../../lib/db/repos/refs.js';
|
||||||
|
|
||||||
|
let app, ownerHeaders, sp;
|
||||||
|
const HTML = `<html><head><title>X</title></head><body><article>
|
||||||
|
<p>An article body with enough text for readability to choose it as the main content.</p>
|
||||||
|
<p>Another paragraph to satisfy the readability heuristic.</p>
|
||||||
|
</article></body></html>`;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
({ app, ownerHeaders } = await setup());
|
||||||
|
sp = await spaces.create({ slug: 'cap', name: 'Cap' }, { kind: 'user', id: null });
|
||||||
|
process.env.BLOB_ROOT = await fs.mkdtemp(path.join(os.tmpdir(), 'void-blobs-'));
|
||||||
|
await queue.start(); await registerWorkers();
|
||||||
|
global.fetch = vi.fn(async () => new Response(HTML, {
|
||||||
|
status: 200, headers: { 'content-type': 'text/html' }
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
|
||||||
|
|
||||||
|
describe('capture api', () => {
|
||||||
|
it('POST /api/capture enqueues ingest.url and returns 202', async () => {
|
||||||
|
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||||
|
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||||
|
expect(res.status).toBe(202);
|
||||||
|
expect(res.body.job_id).toBeTruthy();
|
||||||
|
expect(res.body.idempotency_key).toMatch(/^[0-9a-f]{64}$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('POST /api/capture returns existing ref_id on duplicate', async () => {
|
||||||
|
const r1 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||||
|
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||||
|
await waitForJob('ingest.url', r1.body.job_id, { timeoutMs: 10_000 });
|
||||||
|
const r2 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||||
|
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||||
|
expect(r2.status).toBe(202);
|
||||||
|
expect(r2.body.job_id).toBeNull();
|
||||||
|
expect(r2.body.ref_id).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('POST /api/capture/upload enqueues ingest.blob', async () => {
|
||||||
|
const res = await request(app).post('/api/capture/upload').set(ownerHeaders)
|
||||||
|
.field('space_id', sp.id)
|
||||||
|
.attach('file', Buffer.from('hi'), { filename: 'a.txt', contentType: 'text/plain' });
|
||||||
|
expect(res.status).toBe(202);
|
||||||
|
expect(res.body.job_id).toBeTruthy();
|
||||||
|
await waitForJob('ingest.blob', res.body.job_id, { timeoutMs: 10_000 });
|
||||||
|
const rows = await refs.list({ space_id: sp.id });
|
||||||
|
expect(rows[0].kind).toBe('file');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('POST /api/capture rejects missing url', async () => {
|
||||||
|
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||||
|
.send({ space_id: sp.id });
|
||||||
|
expect(res.status).toBe(400);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('unauthenticated → 401', async () => {
|
||||||
|
const res = await request(app).post('/api/capture')
|
||||||
|
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||||
|
expect(res.status).toBe(401);
|
||||||
|
});
|
||||||
|
});
|
||||||
34
tests/ingest/safe_fetch.test.js
Normal file
34
tests/ingest/safe_fetch.test.js
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||||
|
import { safeFetch, SafeFetchError } from '../../lib/ingest/safe_fetch.js';
|
||||||
|
|
||||||
|
beforeEach(() => { delete process.env.VOID_INGEST_ALLOW_PRIVATE; });
|
||||||
|
afterEach(() => { vi.restoreAllMocks(); });
|
||||||
|
|
||||||
|
describe('safeFetch', () => {
|
||||||
|
it('rejects file:// scheme', async () => {
|
||||||
|
await expect(safeFetch('file:///etc/passwd')).rejects.toThrow(SafeFetchError);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects literal loopback IP', async () => {
|
||||||
|
await expect(safeFetch('http://127.0.0.1/x')).rejects.toThrow(/blocked/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects literal RFC1918 IP', async () => {
|
||||||
|
await expect(safeFetch('http://192.168.1.1/x')).rejects.toThrow(/blocked/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects literal CGNAT IP', async () => {
|
||||||
|
await expect(safeFetch('http://100.64.0.1/x')).rejects.toThrow(/blocked/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects AWS metadata literal IP', async () => {
|
||||||
|
await expect(safeFetch('http://169.254.169.254/latest/meta-data/')).rejects.toThrow(/blocked/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows literal IPs when VOID_INGEST_ALLOW_PRIVATE=true (test fixtures)', async () => {
|
||||||
|
process.env.VOID_INGEST_ALLOW_PRIVATE = 'true';
|
||||||
|
global.fetch = vi.fn(async () => new Response('ok', { status: 200 }));
|
||||||
|
const res = await safeFetch('http://127.0.0.1:65535/');
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user