feat(api): unified FTS search
Single GET /api/search?q=&space_id=&kinds=&limit=&offset= unions FTS hits across pages / refs / source_docs / messages with a `kind` discriminator and ts_rank ordering. Each branch's to_tsvector matches the GIN index expression on its source table so indexes are used. Messages have no space_id and are excluded when a space filter is set. Hybrid vector / RRF lands in Plan 3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
83
lib/db/repos/search.js
Normal file
83
lib/db/repos/search.js
Normal file
@@ -0,0 +1,83 @@
|
||||
import { pool } from '../pool.js';
|
||||
|
||||
// FTS-only search across pages / refs / source_docs / messages, unioned
|
||||
// with a `kind` discriminator and ranked by ts_rank. Each branch's
|
||||
// to_tsvector expression matches the GIN index on its table so the
|
||||
// indexes are used. Vector / hybrid RRF search lands in Plan 3.
|
||||
//
|
||||
// Notes:
|
||||
// - messages have no space_id → the messages branch is dropped when a
|
||||
// space_id filter is present.
|
||||
// - source_docs inherit space_id from their owning resource via join.
|
||||
|
||||
const PAGES_TSV = `to_tsvector('english', p.title || ' ' || coalesce(p.body_md,''))`;
|
||||
const REFS_TSV = `to_tsvector('english', coalesce(r.title,'') || ' ' || coalesce(r.summary,'') || ' ' || coalesce(r.body_text,''))`;
|
||||
const SD_TSV = `to_tsvector('english', sd.name || ' ' || coalesce(sd.body_text,''))`;
|
||||
const MSG_TSV = `to_tsvector('english', m.body)`;
|
||||
|
||||
function buildBranches({ kinds, spaceFilterPresent }) {
|
||||
const branches = [];
|
||||
const wantPage = !kinds || kinds.includes('page');
|
||||
const wantRef = !kinds || kinds.includes('ref');
|
||||
const wantSD = !kinds || kinds.includes('source_doc');
|
||||
const wantMsg = (!kinds || kinds.includes('message')) && !spaceFilterPresent;
|
||||
|
||||
if (wantPage) {
|
||||
branches.push(`
|
||||
SELECT 'page'::text AS kind, p.id, p.space_id, p.title AS title_or_snippet,
|
||||
ts_rank(${PAGES_TSV}, q.tsq) AS rank
|
||||
FROM pages p, q
|
||||
WHERE ${PAGES_TSV} @@ q.tsq
|
||||
AND ($2::uuid IS NULL OR p.space_id = $2)
|
||||
`);
|
||||
}
|
||||
if (wantRef) {
|
||||
branches.push(`
|
||||
SELECT 'ref'::text AS kind, r.id, r.space_id,
|
||||
coalesce(r.title, r.source_url, '(untitled)') AS title_or_snippet,
|
||||
ts_rank(${REFS_TSV}, q.tsq) AS rank
|
||||
FROM refs r, q
|
||||
WHERE ${REFS_TSV} @@ q.tsq
|
||||
AND ($2::uuid IS NULL OR r.space_id = $2)
|
||||
`);
|
||||
}
|
||||
if (wantSD) {
|
||||
branches.push(`
|
||||
SELECT 'source_doc'::text AS kind, sd.id, res.space_id, sd.name AS title_or_snippet,
|
||||
ts_rank(${SD_TSV}, q.tsq) AS rank
|
||||
FROM source_docs sd
|
||||
JOIN resources res ON res.id = sd.resource_id, q
|
||||
WHERE ${SD_TSV} @@ q.tsq
|
||||
AND ($2::uuid IS NULL OR res.space_id = $2)
|
||||
`);
|
||||
}
|
||||
if (wantMsg) {
|
||||
branches.push(`
|
||||
SELECT 'message'::text AS kind, m.id, NULL::uuid AS space_id,
|
||||
substring(m.body, 1, 200) AS title_or_snippet,
|
||||
ts_rank(${MSG_TSV}, q.tsq) AS rank
|
||||
FROM messages m, q
|
||||
WHERE ${MSG_TSV} @@ q.tsq
|
||||
`);
|
||||
}
|
||||
return branches;
|
||||
}
|
||||
|
||||
export async function fts({ q, space_id = null, kinds = null, limit = 50, offset = 0 } = {}) {
|
||||
if (!q || typeof q !== 'string') return [];
|
||||
const normalizedKinds = Array.isArray(kinds) && kinds.length ? kinds : null;
|
||||
const spaceFilterPresent = space_id !== null && space_id !== undefined;
|
||||
const branches = buildBranches({ kinds: normalizedKinds, spaceFilterPresent });
|
||||
if (branches.length === 0) return [];
|
||||
|
||||
const sql = `
|
||||
WITH q AS (SELECT plainto_tsquery('english', $1) AS tsq)
|
||||
SELECT * FROM (
|
||||
${branches.join('\n UNION ALL\n ')}
|
||||
) u
|
||||
ORDER BY rank DESC
|
||||
LIMIT $3 OFFSET $4
|
||||
`;
|
||||
const { rows } = await pool.query(sql, [q, space_id, limit, offset]);
|
||||
return rows;
|
||||
}
|
||||
Reference in New Issue
Block a user