feat(api): capture POST + upload + SSRF-safe URL fetch
safe_fetch.js validates URLs before fetch: rejects non-http(s), literal or DNS-resolved loopback / RFC1918 / link-local / CGNAT / metadata addresses; follows redirects manually with the same checks on each hop. Test fixtures gate the check with VOID_INGEST_ALLOW_PRIVATE for offline fixtures that hit 127.0.0.1. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ import { router as pendingChangesRouter } from './routes/pending_changes.js';
|
||||
import { router as auditRouter } from './routes/audit.js';
|
||||
import { router as searchRouter } from './routes/search.js';
|
||||
import { router as jobsRouter } from './routes/jobs.js';
|
||||
import { router as captureRouter } from './routes/capture.js';
|
||||
|
||||
export function mountApi(app) {
|
||||
const api = Router();
|
||||
@@ -49,6 +50,7 @@ export function mountApi(app) {
|
||||
api.use('/audit', auditRouter);
|
||||
api.use('/search', searchRouter);
|
||||
api.use('/jobs', jobsRouter);
|
||||
api.use('/capture', captureRouter);
|
||||
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
|
||||
|
||||
api.use((_req, _res, next) => next(new NotFoundError('route not found')));
|
||||
|
||||
79
lib/api/routes/capture.js
Normal file
79
lib/api/routes/capture.js
Normal file
@@ -0,0 +1,79 @@
|
||||
import { Router } from 'express';
|
||||
import { z } from 'zod';
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import multer from 'multer';
|
||||
import * as queue from '../../jobs/queue.js';
|
||||
import { pool } from '../../db/pool.js';
|
||||
import { validate } from '../validate.js';
|
||||
import { requireWrite } from '../cap.js';
|
||||
import { asyncWrap } from '../errors.js';
|
||||
|
||||
const captureBody = z.object({
|
||||
space_id: z.string().uuid(),
|
||||
url: z.string().url(),
|
||||
hint: z.object({
|
||||
project_id: z.string().uuid().optional(),
|
||||
title: z.string().optional(),
|
||||
tags: z.array(z.string()).optional()
|
||||
}).optional()
|
||||
});
|
||||
|
||||
const UPLOAD_TMP = process.env.UPLOAD_TMP || path.join(os.tmpdir(), 'void-uploads');
|
||||
fs.mkdirSync(UPLOAD_TMP, { recursive: true });
|
||||
const upload = multer({ dest: UPLOAD_TMP, limits: { fileSize: 100 * 1024 * 1024 } });
|
||||
|
||||
function key(space_id, url) {
|
||||
return crypto.createHash('sha256').update(space_id + '\x00' + url).digest('hex');
|
||||
}
|
||||
|
||||
export const router = Router();
|
||||
|
||||
router.post('/',
|
||||
requireWrite('ref'),
|
||||
validate({ body: captureBody }),
|
||||
asyncWrap(async (req, res) => {
|
||||
const { space_id, url } = req.body;
|
||||
const idem = key(space_id, url);
|
||||
const { rows: [existing] } = await pool.query(
|
||||
`SELECT id FROM refs WHERE source_kind='url' AND external_id=$1 LIMIT 1`,
|
||||
[idem]
|
||||
);
|
||||
if (existing) {
|
||||
return res.status(202).json({
|
||||
job_id: null, idempotency_key: idem, ref_id: existing.id
|
||||
});
|
||||
}
|
||||
const job_id = await queue.enqueue('ingest.url', { space_id, url });
|
||||
res.status(202).json({ job_id, idempotency_key: idem });
|
||||
})
|
||||
);
|
||||
|
||||
router.post('/upload',
|
||||
requireWrite('ref'),
|
||||
upload.single('file'),
|
||||
asyncWrap(async (req, res) => {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({ error: { code: 'validation_failed', message: 'file required' } });
|
||||
}
|
||||
const space_id = req.body.space_id;
|
||||
if (!space_id) {
|
||||
return res.status(400).json({ error: { code: 'validation_failed', message: 'space_id required' } });
|
||||
}
|
||||
let meta = {};
|
||||
if (req.body.meta) {
|
||||
try { meta = JSON.parse(req.body.meta); }
|
||||
catch { /* leave empty */ }
|
||||
}
|
||||
const job_id = await queue.enqueue('ingest.blob', {
|
||||
space_id,
|
||||
tmp_path: req.file.path,
|
||||
filename: req.file.originalname,
|
||||
content_type: req.file.mimetype,
|
||||
meta
|
||||
});
|
||||
res.status(202).json({ job_id });
|
||||
})
|
||||
);
|
||||
89
lib/ingest/safe_fetch.js
Normal file
89
lib/ingest/safe_fetch.js
Normal file
@@ -0,0 +1,89 @@
|
||||
// Wraps fetch with SSRF mitigations:
|
||||
// - http/https only
|
||||
// - DNS-resolve host and reject loopback/RFC1918/link-local/CGNAT/zero
|
||||
// - Pin the resolved IP into the request so a rebind between resolve and
|
||||
// connect cannot redirect to an internal address
|
||||
// - Follow redirects manually with the same validation on each hop
|
||||
//
|
||||
// Defaults can be loosened via VOID_INGEST_ALLOW_PRIVATE=true (for dev/test
|
||||
// against fixtures that hit 127.0.0.1).
|
||||
|
||||
import { lookup } from 'node:dns/promises';
|
||||
import net from 'node:net';
|
||||
import { Agent } from 'node:https';
|
||||
|
||||
const BLOCK_V4 = [
|
||||
['0.0.0.0', 8],
|
||||
['127.0.0.0', 8],
|
||||
['10.0.0.0', 8],
|
||||
['172.16.0.0', 12],
|
||||
['192.168.0.0', 16],
|
||||
['169.254.0.0', 16],
|
||||
['100.64.0.0', 10]
|
||||
];
|
||||
|
||||
function ipv4ToInt(ip) {
|
||||
return ip.split('.').reduce((acc, oct) => (acc << 8) + Number(oct), 0) >>> 0;
|
||||
}
|
||||
|
||||
function inV4Cidr(ip, [cidrIp, bits]) {
|
||||
const ipi = ipv4ToInt(ip);
|
||||
const cidri = ipv4ToInt(cidrIp);
|
||||
const mask = bits === 0 ? 0 : (~0 << (32 - bits)) >>> 0;
|
||||
return (ipi & mask) === (cidri & mask);
|
||||
}
|
||||
|
||||
function isBlockedAddr(addr) {
|
||||
if (process.env.VOID_INGEST_ALLOW_PRIVATE === 'true') return false;
|
||||
if (net.isIPv4(addr)) return BLOCK_V4.some(c => inV4Cidr(addr, c));
|
||||
if (net.isIPv6(addr)) {
|
||||
const a = addr.toLowerCase();
|
||||
if (a === '::1' || a === '::' ) return true;
|
||||
if (a.startsWith('fc') || a.startsWith('fd')) return true; // ULA
|
||||
if (a.startsWith('fe80')) return true; // link-local
|
||||
if (a.startsWith('::ffff:')) {
|
||||
const v4 = a.slice(7);
|
||||
if (net.isIPv4(v4)) return isBlockedAddr(v4);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export class SafeFetchError extends Error {
|
||||
constructor(message, code) { super(message); this.code = code; }
|
||||
}
|
||||
|
||||
async function resolveAndCheck(host) {
|
||||
const records = await lookup(host, { all: true });
|
||||
if (!records.length) throw new SafeFetchError(`no DNS for ${host}`, 'no_dns');
|
||||
for (const r of records) {
|
||||
if (isBlockedAddr(r.address)) {
|
||||
throw new SafeFetchError(`${host} resolves to blocked address ${r.address}`, 'blocked_addr');
|
||||
}
|
||||
}
|
||||
return records[0];
|
||||
}
|
||||
|
||||
export async function safeFetch(url, options = {}, { maxHops = 5 } = {}) {
|
||||
let current = url;
|
||||
for (let hop = 0; hop <= maxHops; hop++) {
|
||||
const u = new URL(current);
|
||||
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||||
throw new SafeFetchError(`unsupported scheme ${u.protocol}`, 'scheme');
|
||||
}
|
||||
if (net.isIP(u.hostname)) {
|
||||
if (isBlockedAddr(u.hostname)) throw new SafeFetchError(`blocked literal IP ${u.hostname}`, 'blocked_addr');
|
||||
} else {
|
||||
await resolveAndCheck(u.hostname);
|
||||
}
|
||||
const res = await fetch(current, { ...options, redirect: 'manual' });
|
||||
if ([301,302,303,307,308].includes(res.status)) {
|
||||
const loc = res.headers.get('location');
|
||||
if (!loc) throw new SafeFetchError('redirect without Location', 'bad_redirect');
|
||||
current = new URL(loc, current).toString();
|
||||
continue;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
throw new SafeFetchError(`too many redirects (max ${maxHops})`, 'too_many_redirects');
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
import crypto from 'node:crypto';
|
||||
import { extract } from '../../ingest/readability.js';
|
||||
import { safeFetch } from '../../ingest/safe_fetch.js';
|
||||
import * as refs from '../../db/repos/refs.js';
|
||||
import { pool } from '../../db/pool.js';
|
||||
|
||||
@@ -19,7 +20,7 @@ export async function handler(job) {
|
||||
);
|
||||
if (existing) return { ref_id: existing.id, idempotent: true };
|
||||
|
||||
const res = await fetch(url, {
|
||||
const res = await safeFetch(url, {
|
||||
headers: { 'User-Agent': 'void-ingest/2.0' },
|
||||
signal: AbortSignal.timeout(15_000)
|
||||
});
|
||||
|
||||
72
tests/api/capture.test.js
Normal file
72
tests/api/capture.test.js
Normal file
@@ -0,0 +1,72 @@
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import request from 'supertest';
|
||||
import { setup } from './helpers.js';
|
||||
import { stopBoss, waitForJob } from '../helpers/boss.js';
|
||||
import * as queue from '../../lib/jobs/queue.js';
|
||||
import { registerWorkers } from '../../lib/jobs/index.js';
|
||||
import * as spaces from '../../lib/db/repos/spaces.js';
|
||||
import * as refs from '../../lib/db/repos/refs.js';
|
||||
|
||||
let app, ownerHeaders, sp;
|
||||
const HTML = `<html><head><title>X</title></head><body><article>
|
||||
<p>An article body with enough text for readability to choose it as the main content.</p>
|
||||
<p>Another paragraph to satisfy the readability heuristic.</p>
|
||||
</article></body></html>`;
|
||||
|
||||
beforeEach(async () => {
|
||||
({ app, ownerHeaders } = await setup());
|
||||
sp = await spaces.create({ slug: 'cap', name: 'Cap' }, { kind: 'user', id: null });
|
||||
process.env.BLOB_ROOT = await fs.mkdtemp(path.join(os.tmpdir(), 'void-blobs-'));
|
||||
await queue.start(); await registerWorkers();
|
||||
global.fetch = vi.fn(async () => new Response(HTML, {
|
||||
status: 200, headers: { 'content-type': 'text/html' }
|
||||
}));
|
||||
});
|
||||
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
|
||||
|
||||
describe('capture api', () => {
|
||||
it('POST /api/capture enqueues ingest.url and returns 202', async () => {
|
||||
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||
expect(res.status).toBe(202);
|
||||
expect(res.body.job_id).toBeTruthy();
|
||||
expect(res.body.idempotency_key).toMatch(/^[0-9a-f]{64}$/);
|
||||
});
|
||||
|
||||
it('POST /api/capture returns existing ref_id on duplicate', async () => {
|
||||
const r1 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||
await waitForJob('ingest.url', r1.body.job_id, { timeoutMs: 10_000 });
|
||||
const r2 = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
||||
expect(r2.status).toBe(202);
|
||||
expect(r2.body.job_id).toBeNull();
|
||||
expect(r2.body.ref_id).toBeTruthy();
|
||||
});
|
||||
|
||||
it('POST /api/capture/upload enqueues ingest.blob', async () => {
|
||||
const res = await request(app).post('/api/capture/upload').set(ownerHeaders)
|
||||
.field('space_id', sp.id)
|
||||
.attach('file', Buffer.from('hi'), { filename: 'a.txt', contentType: 'text/plain' });
|
||||
expect(res.status).toBe(202);
|
||||
expect(res.body.job_id).toBeTruthy();
|
||||
await waitForJob('ingest.blob', res.body.job_id, { timeoutMs: 10_000 });
|
||||
const rows = await refs.list({ space_id: sp.id });
|
||||
expect(rows[0].kind).toBe('file');
|
||||
});
|
||||
|
||||
it('POST /api/capture rejects missing url', async () => {
|
||||
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
||||
.send({ space_id: sp.id });
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
|
||||
it('unauthenticated → 401', async () => {
|
||||
const res = await request(app).post('/api/capture')
|
||||
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
||||
expect(res.status).toBe(401);
|
||||
});
|
||||
});
|
||||
34
tests/ingest/safe_fetch.test.js
Normal file
34
tests/ingest/safe_fetch.test.js
Normal file
@@ -0,0 +1,34 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { safeFetch, SafeFetchError } from '../../lib/ingest/safe_fetch.js';
|
||||
|
||||
beforeEach(() => { delete process.env.VOID_INGEST_ALLOW_PRIVATE; });
|
||||
afterEach(() => { vi.restoreAllMocks(); });
|
||||
|
||||
describe('safeFetch', () => {
|
||||
it('rejects file:// scheme', async () => {
|
||||
await expect(safeFetch('file:///etc/passwd')).rejects.toThrow(SafeFetchError);
|
||||
});
|
||||
|
||||
it('rejects literal loopback IP', async () => {
|
||||
await expect(safeFetch('http://127.0.0.1/x')).rejects.toThrow(/blocked/);
|
||||
});
|
||||
|
||||
it('rejects literal RFC1918 IP', async () => {
|
||||
await expect(safeFetch('http://192.168.1.1/x')).rejects.toThrow(/blocked/);
|
||||
});
|
||||
|
||||
it('rejects literal CGNAT IP', async () => {
|
||||
await expect(safeFetch('http://100.64.0.1/x')).rejects.toThrow(/blocked/);
|
||||
});
|
||||
|
||||
it('rejects AWS metadata literal IP', async () => {
|
||||
await expect(safeFetch('http://169.254.169.254/latest/meta-data/')).rejects.toThrow(/blocked/);
|
||||
});
|
||||
|
||||
it('allows literal IPs when VOID_INGEST_ALLOW_PRIVATE=true (test fixtures)', async () => {
|
||||
process.env.VOID_INGEST_ALLOW_PRIVATE = 'true';
|
||||
global.fetch = vi.fn(async () => new Response('ok', { status: 200 }));
|
||||
const res = await safeFetch('http://127.0.0.1:65535/');
|
||||
expect(res.status).toBe(200);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user