feat(jobs): ingest.url worker (fetch + readability + idempotent ref)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 03:35:44 +10:00
parent 6e973404e9
commit 3ccfd20b5f
3 changed files with 92 additions and 1 deletions

View File

@@ -0,0 +1,47 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { resetDb } from '../../helpers/db.js';
import { migrateUp } from '../../../lib/db/migrate.js';
import { stopBoss, waitForJob } from '../../helpers/boss.js';
import * as queue from '../../../lib/jobs/queue.js';
import { registerWorkers } from '../../../lib/jobs/index.js';
import * as spaces from '../../../lib/db/repos/spaces.js';
import * as refs from '../../../lib/db/repos/refs.js';
const HTML = `<html><head><title>Blackflame</title></head><body>
<article>
<h1>Blackflame</h1>
<p>An essay on the Cradle aesthetic and the blackflame motif. Long enough for readability to consider this main content. Lorem ipsum dolor sit amet.</p>
<p>Another paragraph that pads out the article for readability detection.</p>
</article></body></html>`;
beforeEach(async () => {
await resetDb(); await migrateUp(); await queue.start(); await registerWorkers();
global.fetch = vi.fn(async () => new Response(HTML, {
status: 200, headers: { 'content-type': 'text/html' }
}));
});
afterEach(async () => { await stopBoss(); vi.restoreAllMocks(); });
describe('ingest.url worker', () => {
it('creates a ref from a URL', async () => {
const sp = await spaces.create({ slug: 'u', name: 'U' }, { kind: 'user', id: null });
const id = await queue.enqueue('ingest.url', { space_id: sp.id, url: 'https://example.com/a' });
const j = await waitForJob('ingest.url', id, { timeoutMs: 10_000 });
expect(j.state).toBe('completed');
const rows = await refs.list({ space_id: sp.id });
expect(rows[0].title).toMatch(/Blackflame/);
expect(rows[0].external_id).toBeTruthy();
expect(rows[0].source_kind).toBe('url');
});
it('idempotent on repeat enqueue (same space_id + url)', async () => {
const sp = await spaces.create({ slug: 'u2', name: 'U2' }, { kind: 'user', id: null });
const id1 = await queue.enqueue('ingest.url', { space_id: sp.id, url: 'https://example.com/b' });
await waitForJob('ingest.url', id1, { timeoutMs: 10_000 });
const id2 = await queue.enqueue('ingest.url', { space_id: sp.id, url: 'https://example.com/b' });
const j2 = await waitForJob('ingest.url', id2, { timeoutMs: 10_000 });
expect(j2.output.idempotent).toBe(true);
const rows = await refs.list({ space_id: sp.id });
expect(rows.length).toBe(1);
});
});