feat(jobs): embed.text worker (Ollama → vector(1024))
Pads nomic-embed-text's 768 dims to 1024 zeros so a later 1024-dim model swap is a re-embed, not a migration (per master spec). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2,8 +2,9 @@ import * as queue from './queue.js';
|
||||
import * as echo from './workers/echo.js';
|
||||
import * as url from './workers/url.js';
|
||||
import * as blob from './workers/blob.js';
|
||||
import * as embed from './workers/embed.js';
|
||||
|
||||
const WORKERS = [echo, url, blob];
|
||||
const WORKERS = [echo, url, blob, embed];
|
||||
|
||||
export async function registerWorkers() {
|
||||
for (const w of WORKERS) {
|
||||
|
||||
29
lib/jobs/workers/embed.js
Normal file
29
lib/jobs/workers/embed.js
Normal file
@@ -0,0 +1,29 @@
|
||||
import { embedText, padTo } from '../../ai/ollama.js';
|
||||
import { pool } from '../../db/pool.js';
|
||||
import { recordAudit } from '../../db/repos/audit.js';
|
||||
|
||||
export const NAME = 'embed.text';
|
||||
|
||||
const STRING_BUILDERS = {
|
||||
page: row => `${row.title}\n\n${row.body_md || ''}`,
|
||||
ref: row => `${row.title || ''}\n${row.summary || ''}\n${row.body_text || ''}`,
|
||||
source_doc: row => `${row.name}\n${row.body_text || ''}`,
|
||||
conversation: row => `${row.title || ''}\n${row.summary || ''}`
|
||||
};
|
||||
|
||||
const TABLE = { page: 'pages', ref: 'refs', source_doc: 'source_docs', conversation: 'conversations' };
|
||||
|
||||
export async function handler(job) {
|
||||
const { entity_type, entity_id } = job.data;
|
||||
const table = TABLE[entity_type];
|
||||
if (!table) throw new Error(`unknown entity_type: ${entity_type}`);
|
||||
const { rows: [row] } = await pool.query(`SELECT * FROM ${table} WHERE id=$1`, [entity_id]);
|
||||
if (!row) return { skipped: 'gone' };
|
||||
const text = STRING_BUILDERS[entity_type](row).slice(0, 6_000);
|
||||
const v = await embedText(text);
|
||||
const padded = padTo(v, 1024);
|
||||
const literal = '[' + padded.join(',') + ']';
|
||||
await pool.query(`UPDATE ${table} SET embedding=$1::vector WHERE id=$2`, [literal, entity_id]);
|
||||
await recordAudit({ kind: 'worker', id: null }, 'update', entity_type, entity_id, null, { embedding: 'updated' });
|
||||
return { entity_id };
|
||||
}
|
||||
Reference in New Issue
Block a user