feat(search): hybrid FTS + vector with RRF + graceful Ollama fallback

Replaces FTS-only /api/search in place. RRF (k=60) fuses ts_rank and
pgvector cosine distance rankings. Vector branch silently skipped when
Ollama times out / errors, keeping search snappy and resilient.

Messages have no embeddings in Plan 3, so they participate in the FTS
branch only.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 03:50:33 +10:00
parent 99ab1ffb70
commit f116811dda
2 changed files with 208 additions and 67 deletions

View File

@@ -1,83 +1,169 @@
import { pool } from '../pool.js'; import { pool } from '../pool.js';
import { embedText, padTo } from '../../ai/ollama.js';
import { log } from '../../log.js';
// FTS-only search across pages / refs / source_docs / messages, unioned // Hybrid FTS + vector search with reciprocal-rank fusion.
// with a `kind` discriminator and ranked by ts_rank. Each branch's
// to_tsvector expression matches the GIN index on its table so the
// indexes are used. Vector / hybrid RRF search lands in Plan 3.
// //
// Notes: // - FTS branch: tsvector @@ plainto_tsquery on pages / refs / source_docs /
// - messages have no space_id → the messages branch is dropped when a // messages, unioned with a `kind` discriminator. Each expression matches
// space_id filter is present. // the corresponding GIN index in migrations 002 / 003 / 004.
// - source_docs inherit space_id from their owning resource via join. // - Vector branch: pgvector cosine distance against the entity's embedding
// column. The query is embedded via Ollama (nomic-embed-text, padded
// 1024-d). Messages are not embedded in Plan 3 — skipped in the vector
// branch.
// - Graceful fallback: if Ollama times out or errors, the vector branch is
// dropped silently and search continues as FTS-only.
// - RRF fuses the two rank lists with k=60 (Cormack et al. canonical).
const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`; const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`;
const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`; const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`;
const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`; const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`;
const MSG_TSV = `to_tsvector('english', m.body)`; const MSG_TSV = `to_tsvector('english', m.body)`;
function buildBranches({ kinds, spaceFilterPresent }) { // ----- FTS branch (returns flat row list ordered by ts_rank) ----------------
const branches = [];
const wantPage = !kinds || kinds.includes('page');
const wantRef = !kinds || kinds.includes('ref');
const wantSD = !kinds || kinds.includes('source_doc');
const wantMsg = (!kinds || kinds.includes('message')) && !spaceFilterPresent;
if (wantPage) { function ftsBranches({ kinds, spaceFilterPresent }) {
branches.push(` const want = k => !kinds || kinds.includes(k);
SELECT 'page'::text AS kind, p.id, p.space_id, p.title AS title_or_snippet, const out = [];
ts_rank(${PAGES_TSV}, q.tsq) AS rank if (want('page')) out.push(`
FROM pages p, q SELECT 'page'::text AS kind, p.id::text, p.space_id, p.title AS title_or_snippet,
WHERE ${PAGES_TSV} @@ q.tsq ts_rank(${PAGES_TSV}, q.tsq) AS rank
AND ($2::uuid IS NULL OR p.space_id = $2) FROM pages p, q
`); WHERE ${PAGES_TSV} @@ q.tsq
} AND ($2::uuid IS NULL OR p.space_id = $2)`);
if (wantRef) { if (want('ref')) out.push(`
branches.push(` SELECT 'ref'::text, r.id::text, r.space_id,
SELECT 'ref'::text AS kind, r.id, r.space_id, coalesce(r.title, r.source_url, '(untitled)'),
coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet, ts_rank(${REFS_TSV}, q.tsq)
ts_rank(${REFS_TSV}, q.tsq) AS rank FROM refs r, q
FROM refs r, q WHERE ${REFS_TSV} @@ q.tsq
WHERE ${REFS_TSV} @@ q.tsq AND ($2::uuid IS NULL OR r.space_id = $2)`);
AND ($2::uuid IS NULL OR r.space_id = $2) if (want('source_doc')) out.push(`
`); SELECT 'source_doc'::text, sd.id::text, res.space_id, sd.name,
} ts_rank(${SD_TSV}, q.tsq)
if (wantSD) { FROM source_docs sd JOIN resources res ON res.id = sd.resource_id, q
branches.push(` WHERE ${SD_TSV} @@ q.tsq
SELECT 'source_doc'::text AS kind, sd.id, res.space_id, sd.name AS title_or_snippet, AND ($2::uuid IS NULL OR res.space_id = $2)`);
ts_rank(${SD_TSV}, q.tsq) AS rank if (want('message') && !spaceFilterPresent) out.push(`
FROM source_docs sd SELECT 'message'::text, m.id::text, NULL::uuid, substring(m.body, 1, 200),
JOIN resources res ON res.id = sd.resource_id, q ts_rank(${MSG_TSV}, q.tsq)
WHERE ${SD_TSV} @@ q.tsq FROM messages m, q
AND ($2::uuid IS NULL OR res.space_id = $2) WHERE ${MSG_TSV} @@ q.tsq`);
`); return out;
}
if (wantMsg) {
branches.push(`
SELECT 'message'::text AS kind, m.id, NULL::uuid AS space_id,
substring(m.body, 1, 200) AS title_or_snippet,
ts_rank(${MSG_TSV}, q.tsq) AS rank
FROM messages m, q
WHERE ${MSG_TSV} @@ q.tsq
`);
}
return branches;
} }
async function ftsRows({ q, space_id, kinds, perBranchLimit }) {
const branches = ftsBranches({ kinds, spaceFilterPresent: space_id != null });
if (!branches.length) return [];
const sql = `WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq)
SELECT * FROM (${branches.join('\n UNION ALL ')}) u
ORDER BY rank DESC LIMIT $3`;
const { rows } = await pool.query(sql, [q, space_id, perBranchLimit]);
return rows;
}
// ----- Vector branch (runs one ANN query per kind, concats results) --------
async function vectorRows({ qvec, space_id, kinds, perBranchLimit }) {
const want = k => !kinds || kinds.includes(k);
const literal = '[' + qvec.join(',') + ']';
const all = [];
if (want('page')) {
const { rows } = await pool.query(`
SELECT 'page'::text AS kind, p.id::text, p.space_id, p.title AS title_or_snippet,
(p.embedding <=> $1::vector) AS dist
FROM pages p
WHERE p.embedding IS NOT NULL
AND ($2::uuid IS NULL OR p.space_id = $2)
ORDER BY p.embedding <=> $1::vector LIMIT $3
`, [literal, space_id, perBranchLimit]);
all.push(...rows);
}
if (want('ref')) {
const { rows } = await pool.query(`
SELECT 'ref'::text AS kind, r.id::text, r.space_id,
coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet,
(r.embedding <=> $1::vector) AS dist
FROM refs r
WHERE r.embedding IS NOT NULL
AND ($2::uuid IS NULL OR r.space_id = $2)
ORDER BY r.embedding <=> $1::vector LIMIT $3
`, [literal, space_id, perBranchLimit]);
all.push(...rows);
}
if (want('source_doc')) {
const { rows } = await pool.query(`
SELECT 'source_doc'::text AS kind, sd.id::text, res.space_id, sd.name AS title_or_snippet,
(sd.embedding <=> $1::vector) AS dist
FROM source_docs sd JOIN resources res ON res.id = sd.resource_id
WHERE sd.embedding IS NOT NULL
AND ($2::uuid IS NULL OR res.space_id = $2)
ORDER BY sd.embedding <=> $1::vector LIMIT $3
`, [literal, space_id, perBranchLimit]);
all.push(...rows);
}
// Messages aren't embedded in Plan 3.
return all.sort((a, b) => a.dist - b.dist);
}
// ----- RRF fusion ----------------------------------------------------------
function fuse(fts, vec, limit, offset) {
const K = 60;
const score = new Map();
function add(rows) {
rows.forEach((r, idx) => {
const key = `${r.kind}:${r.id}`;
const cur = score.get(key) || {
kind: r.kind,
id: r.id,
space_id: r.space_id,
title_or_snippet: r.title_or_snippet,
rrf: 0
};
cur.rrf += 1 / (K + idx + 1);
score.set(key, cur);
});
}
add(fts);
add(vec);
const sorted = [...score.values()].sort((a, b) => b.rrf - a.rrf);
return sorted.slice(offset, offset + limit).map(r => ({
kind: r.kind,
id: r.id,
space_id: r.space_id,
title_or_snippet: r.title_or_snippet,
rank: r.rrf
}));
}
// ----- Public API ----------------------------------------------------------
export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) { export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) {
if (!q || typeof q !== 'string') return []; if (!q || typeof q !== 'string') return [];
const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null; const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null;
const spaceFilterPresent = space_id !== null && space_id !== undefined; const perBranchLimit = limit * 3;
const branches = buildBranches({ kinds: normalizedKinds, spaceFilterPresent });
if (branches.length === 0) return [];
const sql = ` const ftsP = ftsRows({ q, space_id, kinds: normalizedKinds, perBranchLimit });
WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq)
SELECT * FROM ( let vec = [];
${branches.join('\n UNION ALL\n ')} try {
) u const raw = await embedText(q, { timeoutMs: 5_000 });
ORDER BY rank DESC vec = await vectorRows({
LIMIT $3 OFFSET $4 qvec: padTo(raw, 1024),
`; space_id,
const { rows } = await pool.query(sql, [q, space_id, limit, offset]); kinds: normalizedKinds,
return rows; perBranchLimit
});
} catch (e) {
log.debug({ err: e }, 'search vector branch skipped — falling back to FTS-only');
}
const ftsResult = await ftsP;
return fuse(ftsResult, vec, limit, offset);
} }
// Alias for clarity at callsites that want the hybrid name.
export async function hybrid(args) { return fts(args); }

View File

@@ -1,6 +1,7 @@
import { describe, it, expect, beforeAll, beforeEach } from 'vitest'; import { describe, it, expect, beforeAll, beforeEach, afterEach, vi } from 'vitest';
import { resetDb } from '../helpers/db.js'; import { resetDb } from '../helpers/db.js';
import { migrateUp } from '../../lib/db/migrate.js'; import { migrateUp } from '../../lib/db/migrate.js';
import { pool } from '../../lib/db/pool.js';
import * as search from '../../lib/db/repos/search.js'; import * as search from '../../lib/db/repos/search.js';
import * as spacesRepo from '../../lib/db/repos/spaces.js'; import * as spacesRepo from '../../lib/db/repos/spaces.js';
import * as pagesRepo from '../../lib/db/repos/pages.js'; import * as pagesRepo from '../../lib/db/repos/pages.js';
@@ -19,7 +20,11 @@ beforeEach(async () => {
await migrateUp(); await migrateUp();
space = await spacesRepo.create({ slug: 's-main', name: 'Main' }, owner); space = await spacesRepo.create({ slug: 's-main', name: 'Main' }, owner);
otherSpace = await spacesRepo.create({ slug: 's-other', name: 'Other' }, owner); otherSpace = await spacesRepo.create({ slug: 's-other', name: 'Other' }, owner);
// Default: pretend Ollama is down so the vector branch is skipped and
// existing FTS-only assertions still hold deterministically.
global.fetch = vi.fn(async () => { throw new Error('Ollama unreachable (test default)'); });
}); });
afterEach(() => { vi.restoreAllMocks(); });
async function seedAll(word) { async function seedAll(word) {
await pagesRepo.create( await pagesRepo.create(
@@ -97,4 +102,54 @@ describe('search repo', () => {
const hits = await search.fts({ q: 'whitelight' }); const hits = await search.fts({ q: 'whitelight' });
expect(hits).toEqual([]); expect(hits).toEqual([]);
}); });
it('vector branch surfaces an FTS-miss when embedding is close to the query', async () => {
// Page text does not include "blackflame", but its hand-crafted vector
// is close to the query vector, so the vector branch should surface it.
const page = await pagesRepo.create(
{ space_id: space.id, slug: 'vec-only', title: 'Unrelated', body_md: 'nothing about it' },
owner
);
const v = '[' + new Array(1024).fill(0.5).join(',') + ']';
await pool.query('UPDATE pages SET embedding=$1::vector WHERE id=$2', [v, page.id]);
global.fetch = vi.fn(async () => new Response(
JSON.stringify({ embedding: new Array(768).fill(0.5) }),
{ status: 200, headers: { 'content-type': 'application/json' } }
));
const hits = await search.fts({ q: 'whatever' });
expect(hits.find(h => h.id === page.id)).toBeTruthy();
});
it('Ollama down → FTS-only fallback still returns FTS hits', async () => {
await pagesRepo.create(
{ space_id: space.id, slug: 'fb', title: 'blackflame palette', body_md: '' },
owner
);
// Default mock already throws — that simulates Ollama being unreachable.
const hits = await search.fts({ q: 'blackflame' });
expect(hits.length).toBeGreaterThan(0);
});
it('RRF fuses FTS and vector for the same row (higher rank than FTS alone)', async () => {
const page = await pagesRepo.create(
{ space_id: space.id, slug: 'rrf', title: 'cradle blackflame motif', body_md: 'blackflame essay' },
owner
);
const v = '[' + new Array(1024).fill(0.5).join(',') + ']';
await pool.query('UPDATE pages SET embedding=$1::vector WHERE id=$2', [v, page.id]);
// FTS-only run (vector branch errors)
const ftsOnly = await search.fts({ q: 'blackflame' });
const ftsRank = ftsOnly.find(h => h.id === page.id)?.rank;
expect(ftsRank).toBeGreaterThan(0);
// FTS + vector (query embedding matches the row's vector)
global.fetch = vi.fn(async () => new Response(
JSON.stringify({ embedding: new Array(768).fill(0.5) }),
{ status: 200, headers: { 'content-type': 'application/json' } }
));
const hybrid = await search.fts({ q: 'blackflame' });
const hybridRank = hybrid.find(h => h.id === page.id)?.rank;
expect(hybridRank).toBeGreaterThan(ftsRank);
});
}); });