After creating a ref, the Node-side ingest.blob worker enqueues a follow-up job for the Python void-workers (Plan 4) to OCR / extract text. Other kinds (file) get no follow-up. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
40 lines
1.4 KiB
JavaScript
40 lines
1.4 KiB
JavaScript
import fs from 'node:fs/promises';
|
|
import * as refs from '../../db/repos/refs.js';
|
|
import { defaultStore } from '../../ingest/blob_store.js';
|
|
import * as queue from '../queue.js';
|
|
|
|
export const NAME = 'ingest.blob';
|
|
|
|
function kindFor(content_type, filename) {
|
|
if (content_type?.startsWith('image/')) return 'image';
|
|
if (content_type === 'application/pdf' || filename?.toLowerCase().endsWith('.pdf')) return 'pdf';
|
|
return 'file';
|
|
}
|
|
|
|
export async function handler(job) {
|
|
const { space_id, tmp_path, filename, content_type, meta = {} } = job.data;
|
|
const buf = await fs.readFile(tmp_path);
|
|
const { sha, path } = await defaultStore().write(buf);
|
|
try { await fs.unlink(tmp_path); } catch { /* */ }
|
|
|
|
const kind = kindFor(content_type, filename);
|
|
const row = await refs.create({
|
|
space_id,
|
|
kind,
|
|
source_url: null,
|
|
title: meta.title || filename || sha.slice(0, 12),
|
|
summary: null,
|
|
body_text: null,
|
|
blob_path: path,
|
|
metadata: { sha, content_type, filename, size: buf.length, ...(meta.metadata || {}) }
|
|
}, { kind: 'system', id: null });
|
|
|
|
// Plan 4: hand off to the Python void-workers for OCR / extraction.
|
|
if (kind === 'pdf') {
|
|
await queue.enqueue('extract.pdf', { ref_id: row.id, blob_path: path });
|
|
} else if (kind === 'image') {
|
|
await queue.enqueue('extract.image', { ref_id: row.id, blob_path: path });
|
|
}
|
|
return { ref_id: row.id, sha };
|
|
}
|