import crypto from 'node:crypto'; import { extract } from '../../ingest/readability.js'; import * as refs from '../../db/repos/refs.js'; import { pool } from '../../db/pool.js'; export const NAME = 'ingest.url'; function key(space_id, url) { return crypto.createHash('sha256').update(space_id + '\x00' + url).digest('hex'); } export async function handler(job) { const { space_id, url } = job.data; const idem = key(space_id, url); const { rows: [existing] } = await pool.query( `SELECT id FROM refs WHERE source_kind='url' AND external_id=$1 LIMIT 1`, [idem] ); if (existing) return { ref_id: existing.id, idempotent: true }; const res = await fetch(url, { headers: { 'User-Agent': 'void-ingest/2.0' }, signal: AbortSignal.timeout(15_000) }); if (!res.ok) throw new Error(`fetch ${url} → ${res.status}`); const html = await res.text(); const parsed = extract(html, url); const row = await refs.create({ space_id, kind: 'url', source_url: url, title: parsed.title || url, summary: parsed.excerpt, body_text: (parsed.textContent || '').slice(0, 200_000), source_kind: 'url', external_id: idem, metadata: { site_name: parsed.siteName, byline: parsed.byline } }, { kind: 'system', id: null }); return { ref_id: row.id }; }