Replaces FTS-only /api/search in place. RRF (k=60) fuses ts_rank and pgvector cosine distance rankings. Vector branch silently skipped when Ollama times out / errors, keeping search snappy and resilient. Messages have no embeddings in Plan 3, so they participate in the FTS branch only. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
156 lines
6.6 KiB
JavaScript
156 lines
6.6 KiB
JavaScript
import { describe, it, expect, beforeAll, beforeEach, afterEach, vi } from 'vitest';
|
|
import { resetDb } from '../helpers/db.js';
|
|
import { migrateUp } from '../../lib/db/migrate.js';
|
|
import { pool } from '../../lib/db/pool.js';
|
|
import * as search from '../../lib/db/repos/search.js';
|
|
import * as spacesRepo from '../../lib/db/repos/spaces.js';
|
|
import * as pagesRepo from '../../lib/db/repos/pages.js';
|
|
import * as refsRepo from '../../lib/db/repos/refs.js';
|
|
import * as resourcesRepo from '../../lib/db/repos/resources.js';
|
|
import * as sourceDocsRepo from '../../lib/db/repos/source_docs.js';
|
|
import * as conversationsRepo from '../../lib/db/repos/conversations.js';
|
|
import * as messagesRepo from '../../lib/db/repos/messages.js';
|
|
|
|
const owner = { kind: 'user', id: null };
|
|
let space, otherSpace;
|
|
|
|
beforeAll(async () => { await resetDb(); await migrateUp(); });
|
|
beforeEach(async () => {
|
|
await resetDb();
|
|
await migrateUp();
|
|
space = await spacesRepo.create({ slug: 's-main', name: 'Main' }, owner);
|
|
otherSpace = await spacesRepo.create({ slug: 's-other', name: 'Other' }, owner);
|
|
// Default: pretend Ollama is down so the vector branch is skipped and
|
|
// existing FTS-only assertions still hold deterministically.
|
|
global.fetch = vi.fn(async () => { throw new Error('Ollama unreachable (test default)'); });
|
|
});
|
|
afterEach(() => { vi.restoreAllMocks(); });
|
|
|
|
async function seedAll(word) {
|
|
await pagesRepo.create(
|
|
{ space_id: space.id, slug: 'pg-search', title: `${word} page`, body_md: `body about ${word}` }, owner
|
|
);
|
|
await refsRepo.create(
|
|
{
|
|
space_id: space.id, kind: 'url', source_url: 'https://example.com/x',
|
|
title: `${word} reference`, body_text: `text mentioning ${word}`
|
|
},
|
|
owner
|
|
);
|
|
const res = await resourcesRepo.create(
|
|
{ space_id: space.id, slug: 'r-search', name: 'Res', runtime_type: 'lxc' }, owner
|
|
);
|
|
await sourceDocsRepo.create(
|
|
{
|
|
resource_id: res.id, name: `${word} source doc`,
|
|
upstream_url: 'https://example.com/sd', body_text: `doc body about ${word}`
|
|
},
|
|
owner
|
|
);
|
|
const conv = await conversationsRepo.create({ title: 'chat' }, owner);
|
|
await messagesRepo.append(conv.id, { role: 'user', body: `let's talk about ${word}` });
|
|
}
|
|
|
|
describe('search repo', () => {
|
|
it('fts returns 4 hits across all kinds for the query word', async () => {
|
|
await seedAll('blackflame');
|
|
const hits = await search.fts({ q: 'blackflame' });
|
|
expect(hits.length).toBe(4);
|
|
const kinds = new Set(hits.map(h => h.kind));
|
|
expect(kinds).toEqual(new Set(['page','ref','source_doc','message']));
|
|
for (const h of hits) {
|
|
expect(typeof h.id).toBe('string');
|
|
expect(typeof h.title_or_snippet).toBe('string');
|
|
expect(typeof h.rank).toBe('number');
|
|
}
|
|
});
|
|
|
|
it('kinds filter narrows to requested branches', async () => {
|
|
await seedAll('blackflame');
|
|
const hits = await search.fts({ q: 'blackflame', kinds: ['page','ref'] });
|
|
expect(hits.length).toBe(2);
|
|
expect(new Set(hits.map(h => h.kind))).toEqual(new Set(['page','ref']));
|
|
});
|
|
|
|
it('space_id filter scopes pages/refs/source_docs (messages excluded)', async () => {
|
|
await seedAll('blackflame');
|
|
await pagesRepo.create(
|
|
{ space_id: otherSpace.id, slug: 'pg-other', title: 'blackflame other', body_md: '' }, owner
|
|
);
|
|
const hits = await search.fts({ q: 'blackflame', space_id: space.id });
|
|
// page+ref+source_doc from main space; no messages (no space); no other-space page
|
|
expect(hits.length).toBe(3);
|
|
expect(hits.every(h => !h.kind.includes('message'))).toBe(true);
|
|
expect(hits.every(h => h.space_id === space.id)).toBe(true);
|
|
});
|
|
|
|
it('orders by rank desc and respects limit + offset', async () => {
|
|
await seedAll('blackflame');
|
|
const all = await search.fts({ q: 'blackflame', limit: 100 });
|
|
const limited = await search.fts({ q: 'blackflame', limit: 2 });
|
|
expect(limited.length).toBe(2);
|
|
const ranks = all.map(h => h.rank);
|
|
const sorted = [...ranks].sort((a, b) => b - a);
|
|
expect(ranks).toEqual(sorted);
|
|
const off = await search.fts({ q: 'blackflame', limit: 2, offset: 2 });
|
|
expect(off.length).toBe(2);
|
|
expect(off[0].id).not.toBe(limited[0].id);
|
|
});
|
|
|
|
it('returns empty when nothing matches', async () => {
|
|
await seedAll('blackflame');
|
|
const hits = await search.fts({ q: 'whitelight' });
|
|
expect(hits).toEqual([]);
|
|
});
|
|
|
|
it('vector branch surfaces an FTS-miss when embedding is close to the query', async () => {
|
|
// Page text does not include "blackflame", but its hand-crafted vector
|
|
// is close to the query vector, so the vector branch should surface it.
|
|
const page = await pagesRepo.create(
|
|
{ space_id: space.id, slug: 'vec-only', title: 'Unrelated', body_md: 'nothing about it' },
|
|
owner
|
|
);
|
|
const v = '[' + new Array(1024).fill(0.5).join(',') + ']';
|
|
await pool.query('UPDATE pages SET embedding=$1::vector WHERE id=$2', [v, page.id]);
|
|
global.fetch = vi.fn(async () => new Response(
|
|
JSON.stringify({ embedding: new Array(768).fill(0.5) }),
|
|
{ status: 200, headers: { 'content-type': 'application/json' } }
|
|
));
|
|
const hits = await search.fts({ q: 'whatever' });
|
|
expect(hits.find(h => h.id === page.id)).toBeTruthy();
|
|
});
|
|
|
|
it('Ollama down → FTS-only fallback still returns FTS hits', async () => {
|
|
await pagesRepo.create(
|
|
{ space_id: space.id, slug: 'fb', title: 'blackflame palette', body_md: '' },
|
|
owner
|
|
);
|
|
// Default mock already throws — that simulates Ollama being unreachable.
|
|
const hits = await search.fts({ q: 'blackflame' });
|
|
expect(hits.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('RRF fuses FTS and vector for the same row (higher rank than FTS alone)', async () => {
|
|
const page = await pagesRepo.create(
|
|
{ space_id: space.id, slug: 'rrf', title: 'cradle blackflame motif', body_md: 'blackflame essay' },
|
|
owner
|
|
);
|
|
const v = '[' + new Array(1024).fill(0.5).join(',') + ']';
|
|
await pool.query('UPDATE pages SET embedding=$1::vector WHERE id=$2', [v, page.id]);
|
|
|
|
// FTS-only run (vector branch errors)
|
|
const ftsOnly = await search.fts({ q: 'blackflame' });
|
|
const ftsRank = ftsOnly.find(h => h.id === page.id)?.rank;
|
|
expect(ftsRank).toBeGreaterThan(0);
|
|
|
|
// FTS + vector (query embedding matches the row's vector)
|
|
global.fetch = vi.fn(async () => new Response(
|
|
JSON.stringify({ embedding: new Array(768).fill(0.5) }),
|
|
{ status: 200, headers: { 'content-type': 'application/json' } }
|
|
));
|
|
const hybrid = await search.fts({ q: 'blackflame' });
|
|
const hybridRank = hybrid.find(h => h.id === page.id)?.rank;
|
|
expect(hybridRank).toBeGreaterThan(ftsRank);
|
|
});
|
|
});
|