safe_fetch.js validates URLs before fetch: rejects non-http(s), literal or DNS-resolved loopback / RFC1918 / link-local / CGNAT / metadata addresses; follows redirects manually with the same checks on each hop. Test fixtures gate the check with VOID_INGEST_ALLOW_PRIVATE for offline fixtures that hit 127.0.0.1. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
73 lines
3.1 KiB
JavaScript
73 lines
3.1 KiB
JavaScript
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
import fs from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import os from 'node:os';
|
|
import request from 'supertest';
|
|
import { setup } from './helpers.js';
|
|
import { stopBoss, waitForJob } from '../helpers/boss.js';
|
|
import * as queue from '../../lib/jobs/queue.js';
|
|
import { registerWorkers } from '../../lib/jobs/index.js';
|
|
import * as spaces from '../../lib/db/repos/spaces.js';
|
|
import * as refs from '../../lib/db/repos/refs.js';
|
|
|
|
let app, ownerHeaders, sp;
|
|
const HTML = `<html><head><title>X</title></head><body><article>
|
|
<p>An article body with enough text for readability to choose it as the main content.</p>
|
|
<p>Another paragraph to satisfy the readability heuristic.</p>
|
|
</article></body></html>`;
|
|
|
|
beforeEach(async () => {
|
|
({ app, ownerHeaders } = await setup());
|
|
sp = await spaces.create({ slug: 'cap', name: 'Cap' }, { kind: 'user', id: null });
|
|
process.env.BLOB_ROOT = await fs.mkdtemp(path.join(os.tmpdir(), 'void-blobs-'));
|
|
await queue.start(); await registerWorkers();
|
|
global.fetch = vi.fn(async () => new Response(HTML, {
|
|
status: 200, headers: { 'content-type': 'text/html' }
|
|
}));
|
|
});
|
|
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
|
|
|
|
describe('capture api', () => {
|
|
it('POST /api/capture enqueues ingest.url and returns 202', async () => {
|
|
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
|
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
|
expect(res.status).toBe(202);
|
|
expect(res.body.job_id).toBeTruthy();
|
|
expect(res.body.idempotency_key).toMatch(/^[0-9a-f]{64}$/);
|
|
});
|
|
|
|
it('POST /api/capture returns existing ref_id on duplicate', async () => {
|
|
const r1 = await request(app).post('/api/capture').set(ownerHeaders)
|
|
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
|
await waitForJob('ingest.url', r1.body.job_id, { timeoutMs: 10_000 });
|
|
const r2 = await request(app).post('/api/capture').set(ownerHeaders)
|
|
.send({ space_id: sp.id, url: 'https://example.com/dup' });
|
|
expect(r2.status).toBe(202);
|
|
expect(r2.body.job_id).toBeNull();
|
|
expect(r2.body.ref_id).toBeTruthy();
|
|
});
|
|
|
|
it('POST /api/capture/upload enqueues ingest.blob', async () => {
|
|
const res = await request(app).post('/api/capture/upload').set(ownerHeaders)
|
|
.field('space_id', sp.id)
|
|
.attach('file', Buffer.from('hi'), { filename: 'a.txt', contentType: 'text/plain' });
|
|
expect(res.status).toBe(202);
|
|
expect(res.body.job_id).toBeTruthy();
|
|
await waitForJob('ingest.blob', res.body.job_id, { timeoutMs: 10_000 });
|
|
const rows = await refs.list({ space_id: sp.id });
|
|
expect(rows[0].kind).toBe('file');
|
|
});
|
|
|
|
it('POST /api/capture rejects missing url', async () => {
|
|
const res = await request(app).post('/api/capture').set(ownerHeaders)
|
|
.send({ space_id: sp.id });
|
|
expect(res.status).toBe(400);
|
|
});
|
|
|
|
it('unauthenticated → 401', async () => {
|
|
const res = await request(app).post('/api/capture')
|
|
.send({ space_id: sp.id, url: 'https://example.com/a' });
|
|
expect(res.status).toBe(401);
|
|
});
|
|
});
|