feat(jobs): ingest.karakeep worker

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 03:55:03 +10:00
parent de1d7e3476
commit d1e986bc9c
3 changed files with 104 additions and 1 deletions

View File

@@ -3,8 +3,9 @@ import * as echo from './workers/echo.js';
import * as url from './workers/url.js';
import * as blob from './workers/blob.js';
import * as embed from './workers/embed.js';
import * as karakeep from './workers/karakeep.js';
const WORKERS = [echo, url, blob, embed];
const WORKERS = [echo, url, blob, embed, karakeep];
export async function registerWorkers() {
for (const w of WORKERS) {

View File

@@ -0,0 +1,50 @@
import crypto from 'node:crypto';
import { getBookmark } from '../../karakeep/client.js';
import { safeFetch } from '../../ingest/safe_fetch.js';
import { extract } from '../../ingest/readability.js';
import * as refs from '../../db/repos/refs.js';
import { pool } from '../../db/pool.js';
export const NAME = 'ingest.karakeep';
function key(space_id, bookmark_id) {
return crypto.createHash('sha256')
.update(space_id + '\x00karakeep:' + bookmark_id).digest('hex');
}
export async function handler(job) {
const { bookmark_id, space_id } = job.data;
const bm = await getBookmark(bookmark_id);
if (!bm) return { skipped: 'gone' };
const idem = key(space_id, bookmark_id);
const { rows: [existing] } = await pool.query(
`SELECT id FROM refs WHERE source_kind='karakeep' AND external_id=$1 LIMIT 1`,
[idem]
);
if (existing) return { ref_id: existing.id, idempotent: true };
let html = bm.html_content;
if (!html && bm.url) {
const res = await safeFetch(bm.url, {
headers: { 'User-Agent': 'void-ingest/2.0' },
signal: AbortSignal.timeout(15_000)
});
if (res.ok) html = await res.text();
}
const parsed = html ? extract(html, bm.url) : { title: null, textContent: '', excerpt: null };
const row = await refs.create({
space_id,
kind: 'url',
source_url: bm.url,
title: bm.title || parsed.title || bm.url,
summary: parsed.excerpt,
body_text: (parsed.textContent || '').slice(0, 200_000),
source_kind: 'karakeep',
external_id: idem,
metadata: { karakeep_id: bookmark_id, tags: (bm.tags || []).map(t => t.name) }
}, { kind: 'system', id: null });
return { ref_id: row.id };
}