import fs from 'node:fs/promises'; import * as refs from '../../db/repos/refs.js'; import { defaultStore } from '../../ingest/blob_store.js'; import * as queue from '../queue.js'; export const NAME = 'ingest.blob'; function kindFor(content_type, filename) { if (content_type?.startsWith('image/')) return 'image'; if (content_type === 'application/pdf' || filename?.toLowerCase().endsWith('.pdf')) return 'pdf'; return 'file'; } export async function handler(job) { const { space_id, tmp_path, filename, content_type, meta = {} } = job.data; const buf = await fs.readFile(tmp_path); const { sha, path } = await defaultStore().write(buf); try { await fs.unlink(tmp_path); } catch { /* */ } const kind = kindFor(content_type, filename); const row = await refs.create({ space_id, kind, source_url: null, title: meta.title || filename || sha.slice(0, 12), summary: null, body_text: null, blob_path: path, metadata: { sha, content_type, filename, size: buf.length, ...(meta.metadata || {}) } }, { kind: 'system', id: null }); // Plan 4: hand off to the Python void-workers for OCR / extraction. if (kind === 'pdf') { await queue.enqueue('extract.pdf', { ref_id: row.id, blob_path: path }); } else if (kind === 'image') { await queue.enqueue('extract.image', { ref_id: row.id, blob_path: path }); } return { ref_id: row.id, sha }; }