From 69e26ada98aa74d70327b485cbb0ab77ce91402a Mon Sep 17 00:00:00 2001 From: root Date: Mon, 1 Jun 2026 02:04:57 +1000 Subject: [PATCH] feat(api): unified FTS search Single GET /api/search?q=&space_id=&kinds=&limit=&offset= unions FTS hits across pages / refs / source_docs / messages with a `kind` discriminator and ts_rank ordering. Each branch's to_tsvector matches the GIN index expression on its source table so indexes are used. Messages have no space_id and are excluded when a space filter is set. Hybrid vector / RRF lands in Plan 3. Co-Authored-By: Claude Opus 4.7 --- lib/api/index.js | 2 + lib/api/routes/search.js | 37 ++++++++++++++ lib/db/repos/search.js | 83 ++++++++++++++++++++++++++++++ tests/api/search.test.js | 67 +++++++++++++++++++++++++ tests/repos/search.test.js | 100 +++++++++++++++++++++++++++++++++++++ 5 files changed, 289 insertions(+) create mode 100644 lib/api/routes/search.js create mode 100644 lib/db/repos/search.js create mode 100644 tests/api/search.test.js create mode 100644 tests/repos/search.test.js diff --git a/lib/api/index.js b/lib/api/index.js index 0770afd..b2f4688 100644 --- a/lib/api/index.js +++ b/lib/api/index.js @@ -19,6 +19,7 @@ import { router as tagsRouter, entityScopedRouter as tagsByEntityRouter } from ' import { router as linksRouter } from './routes/links.js'; import { router as pendingChangesRouter } from './routes/pending_changes.js'; import { router as auditRouter } from './routes/audit.js'; +import { router as searchRouter } from './routes/search.js'; export function mountApi(app) { const api = Router(); @@ -45,6 +46,7 @@ export function mountApi(app) { api.use('/links', linksRouter); api.use('/pending-changes', pendingChangesRouter); api.use('/audit', auditRouter); + api.use('/search', searchRouter); api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter); api.use((_req, _res, next) => next(new NotFoundError('route not found'))); diff --git a/lib/api/routes/search.js b/lib/api/routes/search.js new file mode 100644 index 0000000..b0b153c --- /dev/null +++ b/lib/api/routes/search.js @@ -0,0 +1,37 @@ +import { Router } from 'express'; +import { z } from 'zod'; +import * as repo from '../../db/repos/search.js'; +import { parsePagination } from '../pagination.js'; +import { validate } from '../validate.js'; +import { asyncWrap } from '../errors.js'; + +const KINDS = ['page','ref','source_doc','message']; + +const querySchema = z.object({ + q: z.string().min(1), + space_id: z.string().uuid().optional(), + kinds: z.string().optional(), + limit: z.string().optional(), + offset: z.string().optional() +}); + +export const router = Router(); + +// Hybrid vector + reciprocal-rank-fusion search lands in Plan 3 (see +// docs/superpowers/specs/2026-05-31-void-v2-design.md §search). +router.get('/', + validate({ query: querySchema }), + asyncWrap(async (req, res) => { + const { limit, offset } = parsePagination(req); + const rawKinds = req.validatedQuery.kinds; + const kinds = rawKinds + ? rawKinds.split(',').map(s => s.trim()).filter(k => KINDS.includes(k)) + : null; + res.json(await repo.fts({ + q: req.validatedQuery.q, + space_id: req.validatedQuery.space_id ?? null, + kinds, + limit, offset + })); + }) +); diff --git a/lib/db/repos/search.js b/lib/db/repos/search.js new file mode 100644 index 0000000..491faf7 --- /dev/null +++ b/lib/db/repos/search.js @@ -0,0 +1,83 @@ +import { pool } from '../pool.js'; + +// FTS-only search across pages / refs / source_docs / messages, unioned +// with a `kind` discriminator and ranked by ts_rank. Each branch's +// to_tsvector expression matches the GIN index on its table so the +// indexes are used. Vector / hybrid RRF search lands in Plan 3. +// +// Notes: +// - messages have no space_id → the messages branch is dropped when a +// space_id filter is present. +// - source_docs inherit space_id from their owning resource via join. + +const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`; +const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`; +const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`; +const MSG_TSV = `to_tsvector('english', m.body)`; + +function buildBranches({ kinds, spaceFilterPresent }) { + const branches = []; + const wantPage = !kinds || kinds.includes('page'); + const wantRef = !kinds || kinds.includes('ref'); + const wantSD = !kinds || kinds.includes('source_doc'); + const wantMsg = (!kinds || kinds.includes('message')) && !spaceFilterPresent; + + if (wantPage) { + branches.push(` + SELECT 'page'::text AS kind, p.id, p.space_id, p.title AS title_or_snippet, + ts_rank(${PAGES_TSV}, q.tsq) AS rank + FROM pages p, q + WHERE ${PAGES_TSV} @@ q.tsq + AND ($2::uuid IS NULL OR p.space_id = $2) + `); + } + if (wantRef) { + branches.push(` + SELECT 'ref'::text AS kind, r.id, r.space_id, + coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet, + ts_rank(${REFS_TSV}, q.tsq) AS rank + FROM refs r, q + WHERE ${REFS_TSV} @@ q.tsq + AND ($2::uuid IS NULL OR r.space_id = $2) + `); + } + if (wantSD) { + branches.push(` + SELECT 'source_doc'::text AS kind, sd.id, res.space_id, sd.name AS title_or_snippet, + ts_rank(${SD_TSV}, q.tsq) AS rank + FROM source_docs sd + JOIN resources res ON res.id = sd.resource_id, q + WHERE ${SD_TSV} @@ q.tsq + AND ($2::uuid IS NULL OR res.space_id = $2) + `); + } + if (wantMsg) { + branches.push(` + SELECT 'message'::text AS kind, m.id, NULL::uuid AS space_id, + substring(m.body, 1, 200) AS title_or_snippet, + ts_rank(${MSG_TSV}, q.tsq) AS rank + FROM messages m, q + WHERE ${MSG_TSV} @@ q.tsq + `); + } + return branches; +} + +export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) { + if (!q || typeof q !== 'string') return []; + const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null; + const spaceFilterPresent = space_id !== null && space_id !== undefined; + const branches = buildBranches({ kinds: normalizedKinds, spaceFilterPresent }); + if (branches.length === 0) return []; + + const sql = ` + WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq) + SELECT * FROM ( + ${branches.join('\n UNION ALL\n ')} + ) u + ORDER BY rank DESC + LIMIT $3 OFFSET $4 + `; + const { rows } = await pool.query(sql, [q, space_id, limit, offset]); + return rows; +} diff --git a/tests/api/search.test.js b/tests/api/search.test.js new file mode 100644 index 0000000..f5605f6 --- /dev/null +++ b/tests/api/search.test.js @@ -0,0 +1,67 @@ +import { describe, it, expect, beforeAll, beforeEach } from 'vitest'; +import request from 'supertest'; +import { setup } from './helpers.js'; +import * as spacesRepo from '../../lib/db/repos/spaces.js'; +import * as pagesRepo from '../../lib/db/repos/pages.js'; +import * as refsRepo from '../../lib/db/repos/refs.js'; + +let app, ownerHeaders, space; +const owner = { kind: 'user', id: null }; + +beforeAll(async () => { ({ app, ownerHeaders } = await setup()); }); +beforeEach(async () => { + space = await spacesRepo.create( + { slug: `s-${Date.now()}-${Math.random().toString(36).slice(2,5)}`, name: 'S' }, owner + ); + await pagesRepo.create( + { space_id: space.id, slug: 'pg', title: 'blackflame page', body_md: 'body about blackflame' }, owner + ); + await refsRepo.create( + { + space_id: space.id, kind: 'url', source_url: 'https://x', + title: 'blackflame ref', body_text: 'a ref about blackflame' + }, + owner + ); +}); + +describe('search api', () => { + it('401 without auth', async () => { + const res = await request(app).get('/api/search?q=blackflame'); + expect(res.status).toBe(401); + }); + + it('200 with hits across kinds', async () => { + const res = await request(app).get('/api/search?q=blackflame').set(ownerHeaders); + expect(res.status).toBe(200); + expect(res.body.length).toBeGreaterThanOrEqual(2); + const kinds = new Set(res.body.map(h => h.kind)); + expect(kinds.has('page')).toBe(true); + expect(kinds.has('ref')).toBe(true); + }); + + it('kinds filter narrows', async () => { + const res = await request(app) + .get('/api/search?q=blackflame&kinds=page').set(ownerHeaders); + expect(res.status).toBe(200); + expect(res.body.every(h => h.kind === 'page')).toBe(true); + }); + + it('space_id filter scopes results', async () => { + const res = await request(app) + .get(`/api/search?q=blackflame&space_id=${space.id}`).set(ownerHeaders); + expect(res.status).toBe(200); + expect(res.body.every(h => h.space_id === space.id)).toBe(true); + }); + + it('missing q → 400', async () => { + const res = await request(app).get('/api/search').set(ownerHeaders); + expect(res.status).toBe(400); + }); + + it('limit respected', async () => { + const res = await request(app) + .get('/api/search?q=blackflame&limit=1').set(ownerHeaders); + expect(res.body.length).toBe(1); + }); +}); diff --git a/tests/repos/search.test.js b/tests/repos/search.test.js new file mode 100644 index 0000000..236fb0a --- /dev/null +++ b/tests/repos/search.test.js @@ -0,0 +1,100 @@ +import { describe, it, expect, beforeAll, beforeEach } from 'vitest'; +import { resetDb } from '../helpers/db.js'; +import { migrateUp } from '../../lib/db/migrate.js'; +import * as search from '../../lib/db/repos/search.js'; +import * as spacesRepo from '../../lib/db/repos/spaces.js'; +import * as pagesRepo from '../../lib/db/repos/pages.js'; +import * as refsRepo from '../../lib/db/repos/refs.js'; +import * as resourcesRepo from '../../lib/db/repos/resources.js'; +import * as sourceDocsRepo from '../../lib/db/repos/source_docs.js'; +import * as conversationsRepo from '../../lib/db/repos/conversations.js'; +import * as messagesRepo from '../../lib/db/repos/messages.js'; + +const owner = { kind: 'user', id: null }; +let space, otherSpace; + +beforeAll(async () => { await resetDb(); await migrateUp(); }); +beforeEach(async () => { + await resetDb(); + await migrateUp(); + space = await spacesRepo.create({ slug: 's-main', name: 'Main' }, owner); + otherSpace = await spacesRepo.create({ slug: 's-other', name: 'Other' }, owner); +}); + +async function seedAll(word) { + await pagesRepo.create( + { space_id: space.id, slug: 'pg-search', title: `${word} page`, body_md: `body about ${word}` }, owner + ); + await refsRepo.create( + { + space_id: space.id, kind: 'url', source_url: 'https://example.com/x', + title: `${word} reference`, body_text: `text mentioning ${word}` + }, + owner + ); + const res = await resourcesRepo.create( + { space_id: space.id, slug: 'r-search', name: 'Res', runtime_type: 'lxc' }, owner + ); + await sourceDocsRepo.create( + { + resource_id: res.id, name: `${word} source doc`, + upstream_url: 'https://example.com/sd', body_text: `doc body about ${word}` + }, + owner + ); + const conv = await conversationsRepo.create({ title: 'chat' }, owner); + await messagesRepo.append(conv.id, { role: 'user', body: `let's talk about ${word}` }); +} + +describe('search repo', () => { + it('fts returns 4 hits across all kinds for the query word', async () => { + await seedAll('blackflame'); + const hits = await search.fts({ q: 'blackflame' }); + expect(hits.length).toBe(4); + const kinds = new Set(hits.map(h => h.kind)); + expect(kinds).toEqual(new Set(['page','ref','source_doc','message'])); + for (const h of hits) { + expect(typeof h.id).toBe('string'); + expect(typeof h.title_or_snippet).toBe('string'); + expect(typeof h.rank).toBe('number'); + } + }); + + it('kinds filter narrows to requested branches', async () => { + await seedAll('blackflame'); + const hits = await search.fts({ q: 'blackflame', kinds: ['page','ref'] }); + expect(hits.length).toBe(2); + expect(new Set(hits.map(h => h.kind))).toEqual(new Set(['page','ref'])); + }); + + it('space_id filter scopes pages/refs/source_docs (messages excluded)', async () => { + await seedAll('blackflame'); + await pagesRepo.create( + { space_id: otherSpace.id, slug: 'pg-other', title: 'blackflame other', body_md: '' }, owner + ); + const hits = await search.fts({ q: 'blackflame', space_id: space.id }); + // page+ref+source_doc from main space; no messages (no space); no other-space page + expect(hits.length).toBe(3); + expect(hits.every(h => !h.kind.includes('message'))).toBe(true); + expect(hits.every(h => h.space_id === space.id)).toBe(true); + }); + + it('orders by rank desc and respects limit + offset', async () => { + await seedAll('blackflame'); + const all = await search.fts({ q: 'blackflame', limit: 100 }); + const limited = await search.fts({ q: 'blackflame', limit: 2 }); + expect(limited.length).toBe(2); + const ranks = all.map(h => h.rank); + const sorted = [...ranks].sort((a, b) => b - a); + expect(ranks).toEqual(sorted); + const off = await search.fts({ q: 'blackflame', limit: 2, offset: 2 }); + expect(off.length).toBe(2); + expect(off[0].id).not.toBe(limited[0].id); + }); + + it('returns empty when nothing matches', async () => { + await seedAll('blackflame'); + const hits = await search.fts({ q: 'whitelight' }); + expect(hits).toEqual([]); + }); +});