feat(api): unified FTS search

Single GET /api/search?q=&space_id=&kinds=&limit=&offset= unions FTS
hits across pages / refs / source_docs / messages with a `kind`
discriminator and ts_rank ordering. Each branch's to_tsvector matches
the GIN index expression on its source table so indexes are used.
Messages have no space_id and are excluded when a space filter is set.
Hybrid vector / RRF lands in Plan 3.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 02:04:57 +10:00
parent ec96e4e2e3
commit 69e26ada98
5 changed files with 289 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ import { router as tagsRouter, entityScopedRouter as tagsByEntityRouter } from '
import { router as linksRouter } from './routes/links.js';
import { router as pendingChangesRouter } from './routes/pending_changes.js';
import { router as auditRouter } from './routes/audit.js';
import { router as searchRouter } from './routes/search.js';
export function mountApi(app) {
const api = Router();
@@ -45,6 +46,7 @@ export function mountApi(app) {
api.use('/links', linksRouter);
api.use('/pending-changes', pendingChangesRouter);
api.use('/audit', auditRouter);
api.use('/search', searchRouter);
api.use('/:entity_type/:entity_id/tags', tagsByEntityRouter);
api.use((_req, _res, next) => next(new NotFoundError('route not found')));

37
lib/api/routes/search.js Normal file
View File

@@ -0,0 +1,37 @@
import { Router } from 'express';
import { z } from 'zod';
import * as repo from '../../db/repos/search.js';
import { parsePagination } from '../pagination.js';
import { validate } from '../validate.js';
import { asyncWrap } from '../errors.js';
const KINDS = ['page','ref','source_doc','message'];
const querySchema = z.object({
q: z.string().min(1),
space_id: z.string().uuid().optional(),
kinds: z.string().optional(),
limit: z.string().optional(),
offset: z.string().optional()
});
export const router = Router();
// Hybrid vector + reciprocal-rank-fusion search lands in Plan 3 (see
// docs/superpowers/specs/2026-05-31-void-v2-design.md §search).
router.get('/',
validate({ query: querySchema }),
asyncWrap(async (req, res) => {
const { limit, offset } = parsePagination(req);
const rawKinds = req.validatedQuery.kinds;
const kinds = rawKinds
? rawKinds.split(',').map(s => s.trim()).filter(k => KINDS.includes(k))
: null;
res.json(await repo.fts({
q: req.validatedQuery.q,
space_id: req.validatedQuery.space_id ?? null,
kinds,
limit, offset
}));
})
);

83
lib/db/repos/search.js Normal file
View File

@@ -0,0 +1,83 @@
import { pool } from '../pool.js';
// FTS-only search across pages / refs / source_docs / messages, unioned
// with a `kind` discriminator and ranked by ts_rank. Each branch's
// to_tsvector expression matches the GIN index on its table so the
// indexes are used. Vector / hybrid RRF search lands in Plan 3.
//
// Notes:
// - messages have no space_id → the messages branch is dropped when a
// space_id filter is present.
// - source_docs inherit space_id from their owning resource via join.
const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`;
const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`;
const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`;
const MSG_TSV = `to_tsvector('english', m.body)`;
function buildBranches({ kinds, spaceFilterPresent }) {
const branches = [];
const wantPage = !kinds || kinds.includes('page');
const wantRef = !kinds || kinds.includes('ref');
const wantSD = !kinds || kinds.includes('source_doc');
const wantMsg = (!kinds || kinds.includes('message')) && !spaceFilterPresent;
if (wantPage) {
branches.push(`
SELECT 'page'::text AS kind, p.id, p.space_id, p.title AS title_or_snippet,
ts_rank(${PAGES_TSV}, q.tsq) AS rank
FROM pages p, q
WHERE ${PAGES_TSV} @@ q.tsq
AND ($2::uuid IS NULL OR p.space_id = $2)
`);
}
if (wantRef) {
branches.push(`
SELECT 'ref'::text AS kind, r.id, r.space_id,
coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet,
ts_rank(${REFS_TSV}, q.tsq) AS rank
FROM refs r, q
WHERE ${REFS_TSV} @@ q.tsq
AND ($2::uuid IS NULL OR r.space_id = $2)
`);
}
if (wantSD) {
branches.push(`
SELECT 'source_doc'::text AS kind, sd.id, res.space_id, sd.name AS title_or_snippet,
ts_rank(${SD_TSV}, q.tsq) AS rank
FROM source_docs sd
JOIN resources res ON res.id = sd.resource_id, q
WHERE ${SD_TSV} @@ q.tsq
AND ($2::uuid IS NULL OR res.space_id = $2)
`);
}
if (wantMsg) {
branches.push(`
SELECT 'message'::text AS kind, m.id, NULL::uuid AS space_id,
substring(m.body, 1, 200) AS title_or_snippet,
ts_rank(${MSG_TSV}, q.tsq) AS rank
FROM messages m, q
WHERE ${MSG_TSV} @@ q.tsq
`);
}
return branches;
}
export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) {
if (!q || typeof q !== 'string') return [];
const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null;
const spaceFilterPresent = space_id !== null && space_id !== undefined;
const branches = buildBranches({ kinds: normalizedKinds, spaceFilterPresent });
if (branches.length === 0) return [];
const sql = `
WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq)
SELECT * FROM (
${branches.join('\n UNION ALL\n ')}
) u
ORDER BY rank DESC
LIMIT $3 OFFSET $4
`;
const { rows } = await pool.query(sql, [q, space_id, limit, offset]);
return rows;
}