feat(api): unified FTS search
Single GET /api/search?q=&space_id=&kinds=&limit=&offset= unions FTS hits across pages / refs / source_docs / messages with a `kind` discriminator and ts_rank ordering. Each branch's to_tsvector matches the GIN index expression on its source table so indexes are used. Messages have no space_id and are excluded when a space filter is set. Hybrid vector / RRF lands in Plan 3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ import { router as tagsRouter, entityScopedRouter as tagsByEntityRouter } from '
|
|||||||
import { router as linksRouter } from './routes/links.js';
|
import { router as linksRouter } from './routes/links.js';
|
||||||
import { router as pendingChangesRouter } from './routes/pending_changes.js';
|
import { router as pendingChangesRouter } from './routes/pending_changes.js';
|
||||||
import { router as auditRouter } from './routes/audit.js';
|
import { router as auditRouter } from './routes/audit.js';
|
||||||
|
import { router as searchRouter } from './routes/search.js';
|
||||||
|
|
||||||
export function mountApi(app) {
|
export function mountApi(app) {
|
||||||
const api = Router();
|
const api = Router();
|
||||||
@@ -45,6 +46,7 @@ export function mountApi(app) {
|
|||||||
api.use('/links', linksRouter);
|
api.use('/links', linksRouter);
|
||||||
api.use('/pending-changes', pendingChangesRouter);
|
api.use('/pending-changes', pendingChangesRouter);
|
||||||
api.use('/audit', auditRouter);
|
api.use('/audit', auditRouter);
|
||||||
|
api.use('/search', searchRouter);
|
||||||
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
|
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
|
||||||
|
|
||||||
api.use((_req, _res, next) => next(new NotFoundError('route not found')));
|
api.use((_req, _res, next) => next(new NotFoundError('route not found')));
|
||||||
|
|||||||
37
lib/api/routes/search.js
Normal file
37
lib/api/routes/search.js
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import { Router } from 'express';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import * as repo from '../../db/repos/search.js';
|
||||||
|
import { parsePagination } from '../pagination.js';
|
||||||
|
import { validate } from '../validate.js';
|
||||||
|
import { asyncWrap } from '../errors.js';
|
||||||
|
|
||||||
|
const KINDS = ['page','ref','source_doc','message'];
|
||||||
|
|
||||||
|
const querySchema = z.object({
|
||||||
|
q: z.string().min(1),
|
||||||
|
space_id: z.string().uuid().optional(),
|
||||||
|
kinds: z.string().optional(),
|
||||||
|
limit: z.string().optional(),
|
||||||
|
offset: z.string().optional()
|
||||||
|
});
|
||||||
|
|
||||||
|
export const router = Router();
|
||||||
|
|
||||||
|
// Hybrid vector + reciprocal-rank-fusion search lands in Plan 3 (see
|
||||||
|
// docs/superpowers/specs/2026-05-31-void-v2-design.md §search).
|
||||||
|
router.get('/',
|
||||||
|
validate({ query: querySchema }),
|
||||||
|
asyncWrap(async (req, res) => {
|
||||||
|
const { limit, offset } = parsePagination(req);
|
||||||
|
const rawKinds = req.validatedQuery.kinds;
|
||||||
|
const kinds = rawKinds
|
||||||
|
? rawKinds.split(',').map(s => s.trim()).filter(k => KINDS.includes(k))
|
||||||
|
: null;
|
||||||
|
res.json(await repo.fts({
|
||||||
|
q: req.validatedQuery.q,
|
||||||
|
space_id: req.validatedQuery.space_id ?? null,
|
||||||
|
kinds,
|
||||||
|
limit, offset
|
||||||
|
}));
|
||||||
|
})
|
||||||
|
);
|
||||||
83
lib/db/repos/search.js
Normal file
83
lib/db/repos/search.js
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import { pool } from '../pool.js';
|
||||||
|
|
||||||
|
// FTS-only search across pages / refs / source_docs / messages, unioned
|
||||||
|
// with a `kind` discriminator and ranked by ts_rank. Each branch's
|
||||||
|
// to_tsvector expression matches the GIN index on its table so the
|
||||||
|
// indexes are used. Vector / hybrid RRF search lands in Plan 3.
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
// - messages have no space_id → the messages branch is dropped when a
|
||||||
|
// space_id filter is present.
|
||||||
|
// - source_docs inherit space_id from their owning resource via join.
|
||||||
|
|
||||||
|
const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`;
|
||||||
|
const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`;
|
||||||
|
const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`;
|
||||||
|
const MSG_TSV = `to_tsvector('english', m.body)`;
|
||||||
|
|
||||||
|
function buildBranches({ kinds, spaceFilterPresent }) {
|
||||||
|
const branches = [];
|
||||||
|
const wantPage = !kinds || kinds.includes('page');
|
||||||
|
const wantRef = !kinds || kinds.includes('ref');
|
||||||
|
const wantSD = !kinds || kinds.includes('source_doc');
|
||||||
|
const wantMsg = (!kinds || kinds.includes('message')) && !spaceFilterPresent;
|
||||||
|
|
||||||
|
if (wantPage) {
|
||||||
|
branches.push(`
|
||||||
|
SELECT 'page'::text AS kind, p.id, p.space_id, p.title AS title_or_snippet,
|
||||||
|
ts_rank(${PAGES_TSV}, q.tsq) AS rank
|
||||||
|
FROM pages p, q
|
||||||
|
WHERE ${PAGES_TSV} @@ q.tsq
|
||||||
|
AND ($2::uuid IS NULL OR p.space_id = $2)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
if (wantRef) {
|
||||||
|
branches.push(`
|
||||||
|
SELECT 'ref'::text AS kind, r.id, r.space_id,
|
||||||
|
coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet,
|
||||||
|
ts_rank(${REFS_TSV}, q.tsq) AS rank
|
||||||
|
FROM refs r, q
|
||||||
|
WHERE ${REFS_TSV} @@ q.tsq
|
||||||
|
AND ($2::uuid IS NULL OR r.space_id = $2)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
if (wantSD) {
|
||||||
|
branches.push(`
|
||||||
|
SELECT 'source_doc'::text AS kind, sd.id, res.space_id, sd.name AS title_or_snippet,
|
||||||
|
ts_rank(${SD_TSV}, q.tsq) AS rank
|
||||||
|
FROM source_docs sd
|
||||||
|
JOIN resources res ON res.id = sd.resource_id, q
|
||||||
|
WHERE ${SD_TSV} @@ q.tsq
|
||||||
|
AND ($2::uuid IS NULL OR res.space_id = $2)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
if (wantMsg) {
|
||||||
|
branches.push(`
|
||||||
|
SELECT 'message'::text AS kind, m.id, NULL::uuid AS space_id,
|
||||||
|
substring(m.body, 1, 200) AS title_or_snippet,
|
||||||
|
ts_rank(${MSG_TSV}, q.tsq) AS rank
|
||||||
|
FROM messages m, q
|
||||||
|
WHERE ${MSG_TSV} @@ q.tsq
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
return branches;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) {
|
||||||
|
if (!q || typeof q !== 'string') return [];
|
||||||
|
const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null;
|
||||||
|
const spaceFilterPresent = space_id !== null && space_id !== undefined;
|
||||||
|
const branches = buildBranches({ kinds: normalizedKinds, spaceFilterPresent });
|
||||||
|
if (branches.length === 0) return [];
|
||||||
|
|
||||||
|
const sql = `
|
||||||
|
WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq)
|
||||||
|
SELECT * FROM (
|
||||||
|
${branches.join('\n UNION ALL\n ')}
|
||||||
|
) u
|
||||||
|
ORDER BY rank DESC
|
||||||
|
LIMIT $3 OFFSET $4
|
||||||
|
`;
|
||||||
|
const { rows } = await pool.query(sql, [q, space_id, limit, offset]);
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
67
tests/api/search.test.js
Normal file
67
tests/api/search.test.js
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import { describe, it, expect, beforeAll, beforeEach } from 'vitest';
|
||||||
|
import request from 'supertest';
|
||||||
|
import { setup } from './helpers.js';
|
||||||
|
import * as spacesRepo from '../../lib/db/repos/spaces.js';
|
||||||
|
import * as pagesRepo from '../../lib/db/repos/pages.js';
|
||||||
|
import * as refsRepo from '../../lib/db/repos/refs.js';
|
||||||
|
|
||||||
|
let app, ownerHeaders, space;
|
||||||
|
const owner = { kind: 'user', id: null };
|
||||||
|
|
||||||
|
beforeAll(async () => { ({ app, ownerHeaders } = await setup()); });
|
||||||
|
beforeEach(async () => {
|
||||||
|
space = await spacesRepo.create(
|
||||||
|
{ slug: `s-${Date.now()}-${Math.random().toString(36).slice(2,5)}`, name: 'S' }, owner
|
||||||
|
);
|
||||||
|
await pagesRepo.create(
|
||||||
|
{ space_id: space.id, slug: 'pg', title: 'blackflame page', body_md: 'body about blackflame' }, owner
|
||||||
|
);
|
||||||
|
await refsRepo.create(
|
||||||
|
{
|
||||||
|
space_id: space.id, kind: 'url', source_url: 'https://x',
|
||||||
|
title: 'blackflame ref', body_text: 'a ref about blackflame'
|
||||||
|
},
|
||||||
|
owner
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('search api', () => {
|
||||||
|
it('401 without auth', async () => {
|
||||||
|
const res = await request(app).get('/api/search?q=blackflame');
|
||||||
|
expect(res.status).toBe(401);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('200 with hits across kinds', async () => {
|
||||||
|
const res = await request(app).get('/api/search?q=blackflame').set(ownerHeaders);
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(res.body.length).toBeGreaterThanOrEqual(2);
|
||||||
|
const kinds = new Set(res.body.map(h => h.kind));
|
||||||
|
expect(kinds.has('page')).toBe(true);
|
||||||
|
expect(kinds.has('ref')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('kinds filter narrows', async () => {
|
||||||
|
const res = await request(app)
|
||||||
|
.get('/api/search?q=blackflame&kinds=page').set(ownerHeaders);
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(res.body.every(h => h.kind === 'page')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('space_id filter scopes results', async () => {
|
||||||
|
const res = await request(app)
|
||||||
|
.get(`/api/search?q=blackflame&space_id=${space.id}`).set(ownerHeaders);
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(res.body.every(h => h.space_id === space.id)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('missing q → 400', async () => {
|
||||||
|
const res = await request(app).get('/api/search').set(ownerHeaders);
|
||||||
|
expect(res.status).toBe(400);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('limit respected', async () => {
|
||||||
|
const res = await request(app)
|
||||||
|
.get('/api/search?q=blackflame&limit=1').set(ownerHeaders);
|
||||||
|
expect(res.body.length).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
100
tests/repos/search.test.js
Normal file
100
tests/repos/search.test.js
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import { describe, it, expect, beforeAll, beforeEach } from 'vitest';
|
||||||
|
import { resetDb } from '../helpers/db.js';
|
||||||
|
import { migrateUp } from '../../lib/db/migrate.js';
|
||||||
|
import * as search from '../../lib/db/repos/search.js';
|
||||||
|
import * as spacesRepo from '../../lib/db/repos/spaces.js';
|
||||||
|
import * as pagesRepo from '../../lib/db/repos/pages.js';
|
||||||
|
import * as refsRepo from '../../lib/db/repos/refs.js';
|
||||||
|
import * as resourcesRepo from '../../lib/db/repos/resources.js';
|
||||||
|
import * as sourceDocsRepo from '../../lib/db/repos/source_docs.js';
|
||||||
|
import * as conversationsRepo from '../../lib/db/repos/conversations.js';
|
||||||
|
import * as messagesRepo from '../../lib/db/repos/messages.js';
|
||||||
|
|
||||||
|
const owner = { kind: 'user', id: null };
|
||||||
|
let space, otherSpace;
|
||||||
|
|
||||||
|
beforeAll(async () => { await resetDb(); await migrateUp(); });
|
||||||
|
beforeEach(async () => {
|
||||||
|
await resetDb();
|
||||||
|
await migrateUp();
|
||||||
|
space = await spacesRepo.create({ slug: 's-main', name: 'Main' }, owner);
|
||||||
|
otherSpace = await spacesRepo.create({ slug: 's-other', name: 'Other' }, owner);
|
||||||
|
});
|
||||||
|
|
||||||
|
async function seedAll(word) {
|
||||||
|
await pagesRepo.create(
|
||||||
|
{ space_id: space.id, slug: 'pg-search', title: `${word} page`, body_md: `body about ${word}` }, owner
|
||||||
|
);
|
||||||
|
await refsRepo.create(
|
||||||
|
{
|
||||||
|
space_id: space.id, kind: 'url', source_url: 'https://example.com/x',
|
||||||
|
title: `${word} reference`, body_text: `text mentioning ${word}`
|
||||||
|
},
|
||||||
|
owner
|
||||||
|
);
|
||||||
|
const res = await resourcesRepo.create(
|
||||||
|
{ space_id: space.id, slug: 'r-search', name: 'Res', runtime_type: 'lxc' }, owner
|
||||||
|
);
|
||||||
|
await sourceDocsRepo.create(
|
||||||
|
{
|
||||||
|
resource_id: res.id, name: `${word} source doc`,
|
||||||
|
upstream_url: 'https://example.com/sd', body_text: `doc body about ${word}`
|
||||||
|
},
|
||||||
|
owner
|
||||||
|
);
|
||||||
|
const conv = await conversationsRepo.create({ title: 'chat' }, owner);
|
||||||
|
await messagesRepo.append(conv.id, { role: 'user', body: `let's talk about ${word}` });
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('search repo', () => {
|
||||||
|
it('fts returns 4 hits across all kinds for the query word', async () => {
|
||||||
|
await seedAll('blackflame');
|
||||||
|
const hits = await search.fts({ q: 'blackflame' });
|
||||||
|
expect(hits.length).toBe(4);
|
||||||
|
const kinds = new Set(hits.map(h => h.kind));
|
||||||
|
expect(kinds).toEqual(new Set(['page','ref','source_doc','message']));
|
||||||
|
for (const h of hits) {
|
||||||
|
expect(typeof h.id).toBe('string');
|
||||||
|
expect(typeof h.title_or_snippet).toBe('string');
|
||||||
|
expect(typeof h.rank).toBe('number');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('kinds filter narrows to requested branches', async () => {
|
||||||
|
await seedAll('blackflame');
|
||||||
|
const hits = await search.fts({ q: 'blackflame', kinds: ['page','ref'] });
|
||||||
|
expect(hits.length).toBe(2);
|
||||||
|
expect(new Set(hits.map(h => h.kind))).toEqual(new Set(['page','ref']));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('space_id filter scopes pages/refs/source_docs (messages excluded)', async () => {
|
||||||
|
await seedAll('blackflame');
|
||||||
|
await pagesRepo.create(
|
||||||
|
{ space_id: otherSpace.id, slug: 'pg-other', title: 'blackflame other', body_md: '' }, owner
|
||||||
|
);
|
||||||
|
const hits = await search.fts({ q: 'blackflame', space_id: space.id });
|
||||||
|
// page+ref+source_doc from main space; no messages (no space); no other-space page
|
||||||
|
expect(hits.length).toBe(3);
|
||||||
|
expect(hits.every(h => !h.kind.includes('message'))).toBe(true);
|
||||||
|
expect(hits.every(h => h.space_id === space.id)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('orders by rank desc and respects limit + offset', async () => {
|
||||||
|
await seedAll('blackflame');
|
||||||
|
const all = await search.fts({ q: 'blackflame', limit: 100 });
|
||||||
|
const limited = await search.fts({ q: 'blackflame', limit: 2 });
|
||||||
|
expect(limited.length).toBe(2);
|
||||||
|
const ranks = all.map(h => h.rank);
|
||||||
|
const sorted = [...ranks].sort((a, b) => b - a);
|
||||||
|
expect(ranks).toEqual(sorted);
|
||||||
|
const off = await search.fts({ q: 'blackflame', limit: 2, offset: 2 });
|
||||||
|
expect(off.length).toBe(2);
|
||||||
|
expect(off[0].id).not.toBe(limited[0].id);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty when nothing matches', async () => {
|
||||||
|
await seedAll('blackflame');
|
||||||
|
const hits = await search.fts({ q: 'whitelight' });
|
||||||
|
expect(hits).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user