import crypto from 'node:crypto'; import { getBookmark } from '../../karakeep/client.js'; import { safeFetch } from '../../ingest/safe_fetch.js'; import { extract } from '../../ingest/readability.js'; import * as refs from '../../db/repos/refs.js'; import { pool } from '../../db/pool.js'; export const NAME = 'ingest.karakeep'; function key(space_id, bookmark_id) { return crypto.createHash('sha256') .update(space_id + '\x00karakeep:' + bookmark_id).digest('hex'); } export async function handler(job) { const { bookmark_id, space_id } = job.data; const bm = await getBookmark(bookmark_id); if (!bm) return { skipped: 'gone' }; const idem = key(space_id, bookmark_id); const { rows: [existing] } = await pool.query( `SELECT id FROM refs WHERE source_kind='karakeep' AND external_id=$1 LIMIT 1`, [idem] ); if (existing) return { ref_id: existing.id, idempotent: true }; let html = bm.html_content; if (!html && bm.url) { const res = await safeFetch(bm.url, { headers: { 'User-Agent': 'void-ingest/2.0' }, signal: AbortSignal.timeout(15_000) }); if (res.ok) html = await res.text(); } const parsed = html ? extract(html, bm.url) : { title: null, textContent: '', excerpt: null }; const row = await refs.create({ space_id, kind: 'url', source_url: bm.url, title: bm.title || parsed.title || bm.url, summary: parsed.excerpt, body_text: (parsed.textContent || '').slice(0, 200_000), source_kind: 'karakeep', external_id: idem, metadata: { karakeep_id: bookmark_id, tags: (bm.tags || []).map(t => t.name) } }, { kind: 'system', id: null }); return { ref_id: row.id }; }