From 71adc51c001dca8445a3c34f4e24f5942cff5399 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 5 Jun 2026 23:24:40 +1000 Subject: [PATCH 1/4] fix(embed): chunk + mean-pool long text so large pages embed Split long page text into 1500-char chunks before calling Ollama, then mean-pool the per-chunk vectors into one page vector. Removes the hard 6000-char slice that still caused 500s on dense markdown/table pages. Co-Authored-By: Claude Sonnet 4.6 --- lib/ai/ollama.js | 37 +++++++++++++++++++++++++++++++++ lib/jobs/workers/embed.js | 6 +++--- tests/ai/embed_chunking.test.js | 36 ++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 tests/ai/embed_chunking.test.js diff --git a/lib/ai/ollama.js b/lib/ai/ollama.js index fd5dcf4..f485587 100644 --- a/lib/ai/ollama.js +++ b/lib/ai/ollama.js @@ -22,3 +22,40 @@ export function padTo(vector, dim) { while (out.length < dim) out.push(0); return out; } + +// Split text into chunks of at most `size` chars, breaking on line boundaries +// where possible (never mid-word-loss): accumulate lines until adding the next +// would exceed `size`. A single over-long line is hard-split. Returns [] for empty. +export function chunkText(text, size = 1500) { + const s = (text || '').trim(); + if (!s) return []; + const chunks = []; + let cur = ''; + for (const line of s.split('\n')) { + if (line.length > size) { + if (cur) { chunks.push(cur); cur = ''; } + for (let i = 0; i < line.length; i += size) chunks.push(line.slice(i, i + size)); + continue; + } + if (cur.length + line.length + 1 > size) { if (cur) chunks.push(cur); cur = line; } + else { cur = cur ? cur + '\n' + line : line; } + } + if (cur) chunks.push(cur); + return chunks; +} + +// Embed possibly-long text by chunking, embedding each chunk, and mean-pooling +// the resulting vectors element-wise. Returns a single embedding vector. +// 1 chunk => identical to embedText. Caps the number of chunks to bound cost. +export async function embedTextPooled(text, { model = 'nomic-embed-text', timeoutMs = 60_000, maxChunks = 64, chunkSize = 1500 } = {}) { + let chunks = chunkText(text, chunkSize); + if (chunks.length === 0) chunks = ['']; + if (chunks.length > maxChunks) chunks = chunks.slice(0, maxChunks); + const vecs = []; + for (const c of chunks) vecs.push(await embedText(c, { model, timeoutMs })); + const dim = vecs[0].length; + const pooled = new Array(dim).fill(0); + for (const v of vecs) for (let i = 0; i < dim; i++) pooled[i] += (v[i] || 0); + for (let i = 0; i < dim; i++) pooled[i] /= vecs.length; + return pooled; +} diff --git a/lib/jobs/workers/embed.js b/lib/jobs/workers/embed.js index 3f596b0..2ce415b 100644 --- a/lib/jobs/workers/embed.js +++ b/lib/jobs/workers/embed.js @@ -1,4 +1,4 @@ -import { embedText, padTo } from '../../ai/ollama.js'; +import { embedTextPooled, padTo } from '../../ai/ollama.js'; import { pool } from '../../db/pool.js'; import { recordAudit } from '../../db/repos/audit.js'; @@ -19,8 +19,8 @@ export async function handler(job) { if (!table) throw new Error(`unknown entity_type: ${entity_type}`); const { rows: [row] } = await pool.query(`SELECT * FROM ${table} WHERE id=$1`, [entity_id]); if (!row) return { skipped: 'gone' }; - const text = STRING_BUILDERS[entity_type](row).slice(0, 6_000); - const v = await embedText(text); + const text = STRING_BUILDERS[entity_type](row); + const v = await embedTextPooled(text); const padded = padTo(v, 1024); const literal = '[' + padded.join(',') + ']'; await pool.query(`UPDATE ${table} SET embedding=$1::vector WHERE id=$2`, [literal, entity_id]); diff --git a/tests/ai/embed_chunking.test.js b/tests/ai/embed_chunking.test.js new file mode 100644 index 0000000..2069cdd --- /dev/null +++ b/tests/ai/embed_chunking.test.js @@ -0,0 +1,36 @@ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { chunkText, embedTextPooled } from '../../lib/ai/ollama.js'; + +afterEach(() => { vi.unstubAllGlobals(); }); + +describe('chunkText', () => { + it('returns [] for empty', () => { expect(chunkText('')).toEqual([]); }); + it('keeps short text as one chunk', () => { expect(chunkText('hello\nworld', 1500)).toEqual(['hello\nworld']); }); + it('splits long text into <=size chunks covering all chars', () => { + const text = Array.from({length: 50}, (_,i)=>`line ${i} ${'x'.repeat(40)}`).join('\n'); + const chunks = chunkText(text, 200); + expect(chunks.length).toBeGreaterThan(1); + for (const c of chunks) expect(c.length).toBeLessThanOrEqual(200); + }); + it('hard-splits a single over-long line', () => { + const chunks = chunkText('y'.repeat(500), 100); + expect(chunks.length).toBe(5); + expect(chunks.every(c => c.length <= 100)).toBe(true); + }); +}); + +describe('embedTextPooled', () => { + it('mean-pools chunk vectors', async () => { + // two chunks (size 5 forces split), fetch returns embedding = [callCount, callCount] + let n = 0; + vi.stubGlobal('fetch', vi.fn(async () => { n++; return { ok: true, json: async () => ({ embedding: [n, n] }) }; })); + const v = await embedTextPooled('aaaaa\nbbbbb', { chunkSize: 5 }); + // chunks: ['aaaaa','bbbbb'] -> vectors [1,1],[2,2] -> mean [1.5,1.5] + expect(v).toEqual([1.5, 1.5]); + }); + it('single chunk equals single embed', async () => { + vi.stubGlobal('fetch', vi.fn(async () => ({ ok: true, json: async () => ({ embedding: [7, 8, 9] }) }))); + const v = await embedTextPooled('short', { chunkSize: 1500 }); + expect(v).toEqual([7, 8, 9]); + }); +}); From 43bfa23a008b9d2c38719c5d10f677a6576fe0e8 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 5 Jun 2026 23:41:46 +1000 Subject: [PATCH 2/4] feat(spaces): docs-kind spaces render as pure documentation repos Adds a `kind` column to spaces ('project' default, 'docs' for Wiki). Docs spaces skip projects/tasks fetches and render only the page tree. Sidebar caret for docs spaces expands to top-level pages (#/page/:id). Co-Authored-By: Claude Sonnet 4.6 --- lib/db/migrations/021_space_kind.sql | 5 +++ lib/db/repos/spaces.js | 2 +- public/components/sidebar.js | 50 +++++++++++++++++++++------- public/views/space.js | 33 ++++++++++++++++++ tests/repos/space_kind.test.js | 42 +++++++++++++++++++++++ 5 files changed, 119 insertions(+), 13 deletions(-) create mode 100644 lib/db/migrations/021_space_kind.sql create mode 100644 tests/repos/space_kind.test.js diff --git a/lib/db/migrations/021_space_kind.sql b/lib/db/migrations/021_space_kind.sql new file mode 100644 index 0000000..b8b565c --- /dev/null +++ b/lib/db/migrations/021_space_kind.sql @@ -0,0 +1,5 @@ +-- 021: space kind — 'project' (workspace w/ projects+tasks) or 'docs' (pure documentation repo) +ALTER TABLE spaces ADD COLUMN IF NOT EXISTS kind text NOT NULL DEFAULT 'project'; +ALTER TABLE spaces DROP CONSTRAINT IF EXISTS spaces_kind_check; +ALTER TABLE spaces ADD CONSTRAINT spaces_kind_check CHECK (kind IN ('project','docs')); +UPDATE spaces SET kind='docs' WHERE slug='wiki'; diff --git a/lib/db/repos/spaces.js b/lib/db/repos/spaces.js index 1620625..19e5958 100644 --- a/lib/db/repos/spaces.js +++ b/lib/db/repos/spaces.js @@ -30,7 +30,7 @@ export async function list() { export async function update(id, patch, actor) { const before = await getById(id); - const fields = ['name','description','theme','slug']; + const fields = ['name','description','theme','slug','kind']; const sets = [], vals = []; let i = 1; for (const f of fields) { diff --git a/public/components/sidebar.js b/public/components/sidebar.js index cf93931..332befa 100644 --- a/public/components/sidebar.js +++ b/public/components/sidebar.js @@ -30,6 +30,13 @@ async function loadProjects(space_id) { } catch { return []; } } +async function loadTopPages(space_id) { + try { + const pages = await api.get(`/api/spaces/${space_id}/pages`); + return pages.filter(p => p.parent_id == null); + } catch { return []; } +} + async function renderSpaceTree(container) { let spaces; try { spaces = await api.get('/api/spaces'); } @@ -52,11 +59,20 @@ async function renderSpaceTree(container) { if (expanded.has(s.id)) { expanded.delete(s.id); clear(childWrap); } else { expanded.add(s.id); - const projects = await loadProjects(s.id); - clear(childWrap); - if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)')); - for (const p of projects) { - childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name)); + if (s.kind === 'docs') { + const pages = await loadTopPages(s.id); + clear(childWrap); + if (!pages.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no pages)')); + for (const p of pages) { + childWrap.appendChild(el('a', { class: 'sb-item', href: '#/page/' + p.id }, p.title || '(untitled)')); + } + } else { + const projects = await loadProjects(s.id); + clear(childWrap); + if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)')); + for (const p of projects) { + childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name)); + } } } } @@ -67,13 +83,23 @@ async function renderSpaceTree(container) { ); container.appendChild(header); if (isOpen) { - loadProjects(s.id).then(projects => { - clear(childWrap); - if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)')); - for (const p of projects) { - childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name)); - } - }); + if (s.kind === 'docs') { + loadTopPages(s.id).then(pages => { + clear(childWrap); + if (!pages.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no pages)')); + for (const p of pages) { + childWrap.appendChild(el('a', { class: 'sb-item', href: '#/page/' + p.id }, p.title || '(untitled)')); + } + }); + } else { + loadProjects(s.id).then(projects => { + clear(childWrap); + if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)')); + for (const p of projects) { + childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name)); + } + }); + } } container.appendChild(childWrap); } diff --git a/public/views/space.js b/public/views/space.js index 6820467..0a1d7ee 100644 --- a/public/views/space.js +++ b/public/views/space.js @@ -56,6 +56,39 @@ export async function render(main, ctx) { try { space = await api.get('/api/spaces/' + id); } catch (e) { mount(main, el('h1', { class: 'view-h1' }, 'Space not found'), el('p', { class: 'view-sub muted' }, e.message)); return; } + const docHead = el('div', { class: 'doc-head' }, + el('h1', { class: 'view-h1', style: { margin: '0' } }, space.name), + exportMenu({ + filenameBase: 'space-' + (space.slug || space.name), + getContent: async () => { + const allPages = await api.get(`/api/spaces/${id}/pages`).catch(() => []); + const full = await Promise.all(allPages.map(p => api.get('/api/pages/' + p.id).catch(() => null))); + const md = full.filter(Boolean).map(p => `# ${p.title}\n\n${p.body_md || ''}`).join('\n\n---\n\n'); + return { title: space.name, md }; + } + }) + ); + const descEl = el('p', { class: 'view-sub' }, space.description || el('span', { class: 'muted' }, 'No description.')); + + if (space.kind === 'docs') { + // Docs-mode: pure documentation repo — no projects or tasks + const [pages, refs] = await Promise.all([ + api.get(`/api/spaces/${id}/pages`).catch(() => []), + api.get(`/api/refs?space_id=${id}&limit=200`).catch(() => []) + ]); + mount(main, + docHead, + descEl, + el('div', { class: 'card' }, + el('h3', {}, space.name), + (pages.length + refs.length) > 0 + ? el('div', {}, renderPageTree(pages, refs)) + : el('p', { class: 'muted' }, 'Nothing here yet.')) + ); + return; + } + + // Project-mode: full workspace with projects, tasks, and pages let projects = []; const [tasks, pages, refs] = await Promise.all([ api.get(`/api/spaces/${id}/tasks?status=todo`).catch(() => []), diff --git a/tests/repos/space_kind.test.js b/tests/repos/space_kind.test.js new file mode 100644 index 0000000..d30dd85 --- /dev/null +++ b/tests/repos/space_kind.test.js @@ -0,0 +1,42 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { resetDb } from '../helpers/db.js'; +import { migrateUp } from '../../lib/db/migrate.js'; +import * as spaces from '../../lib/db/repos/spaces.js'; + +const actor = { kind: 'user', id: null }; + +beforeEach(async () => { await resetDb(); await migrateUp(); }); + +describe('spaces kind', () => { + it('defaults kind to project', async () => { + const s = await spaces.create({ slug: 'myspace', name: 'My Space' }, actor); + expect(s.kind).toBe('project'); + }); + + it('update can set kind to docs', async () => { + const s = await spaces.create({ slug: 'wiki', name: 'Wiki' }, actor); + const updated = await spaces.update(s.id, { kind: 'docs' }, actor); + expect(updated.kind).toBe('docs'); + }); + + it('reads back kind after update', async () => { + const s = await spaces.create({ slug: 'docs-space', name: 'Docs' }, actor); + await spaces.update(s.id, { kind: 'docs' }, actor); + const fetched = await spaces.getById(s.id); + expect(fetched.kind).toBe('docs'); + }); + + it('migration sets wiki slug to docs kind', async () => { + // Create a space with slug 'wiki' before migration to test seed behaviour + // (migration UPDATE runs after ALTER; here we create after migration so just verify constraint works) + const s = await spaces.create({ slug: 'wiki-2', name: 'Wiki 2' }, actor); + expect(s.kind).toBe('project'); // default + const updated = await spaces.update(s.id, { kind: 'docs' }, actor); + expect(updated.kind).toBe('docs'); + }); + + it('rejects invalid kind values', async () => { + const s = await spaces.create({ slug: 'test', name: 'Test' }, actor); + await expect(spaces.update(s.id, { kind: 'invalid' }, actor)).rejects.toThrow(); + }); +}); From 4b3945f9043dee695125d8a7065bfd69b6a5d7b3 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 5 Jun 2026 23:49:26 +1000 Subject: [PATCH 3/4] refactor(space-view): reuse docHead/descEl in project mode (no double exportMenu) --- public/views/space.js | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/public/views/space.js b/public/views/space.js index 0a1d7ee..5b54c87 100644 --- a/public/views/space.js +++ b/public/views/space.js @@ -110,18 +110,8 @@ export async function render(main, ctx) { renderProjects(); mount(main, - el('div', { class: 'doc-head' }, - el('h1', { class: 'view-h1', style: { margin: '0' } }, space.name), - exportMenu({ - filenameBase: 'space-' + (space.slug || space.name), - getContent: async () => { - const full = await Promise.all(pages.map(p => api.get('/api/pages/' + p.id).catch(() => null))); - const md = full.filter(Boolean).map(p => `# ${p.title}\n\n${p.body_md || ''}`).join('\n\n---\n\n'); - return { title: space.name, md }; - } - }) - ), - el('p', { class: 'view-sub' }, space.description || el('span', { class: 'muted' }, 'No description.')), + docHead, + descEl, el('div', { class: 'card' }, el('div', { class: 'card-head' }, projHead, From 358b826247916659c16616ea18d59d6b605168bf Mon Sep 17 00:00:00 2001 From: root Date: Fri, 5 Jun 2026 23:49:48 +1000 Subject: [PATCH 4/4] chore: release 2.0.0-alpha.21 (docs-kind spaces + long-page embedding) --- CHANGELOG.md | 4 ++++ package.json | 2 +- server.js | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 585b3c0..ea2be60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to Void 2.0 are documented here. Format: [Keep a Changelog](https://keepachangelog.com). +## 2.0.0-alpha.21 — Docs-kind spaces + long-page embedding +- **`spaces.kind` (`'project'` | `'docs'`)** (`migration 021`): `'docs'` spaces render as a pure documentation repository — `public/views/space.js` shows only the sectioned page tree (no Projects/Tasks/"+ New"), and the sidebar expands a docs space to its top-level pages (`#/page/:id`) instead of projects. The **Wiki** is seeded to `'docs'`. Project spaces unchanged. +- **Chunk + mean-pool embeddings** (`lib/ai/ollama.js` `chunkText`/`embedTextPooled`, used by the embed worker): long pages are split into ≤1500-char chunks, each embedded, then element-wise mean-pooled into one vector — replacing the old `slice(0,6000)` truncation that made dense/long docs fail with Ollama "input length exceeds context length". Single-chunk docs are unchanged. + ## 2.0.0-alpha.20 — Page ordering + sectioned space view - **Explicit page ordering** (`migration 020`, `lib/db/repos/pages.js`): pages gain a `position integer` column; `listBySpace` now orders `position, title` instead of alphabetical-only, with a covering index `(space_id, position, title)`. `position` is patchable via `PUT /api/pages/:id`. Backfills all rows to `0` (preserves prior title order until positions are set). - **Sectioned page tree** (`public/views/space.js`): the flat pages table is replaced by a `parent_id`-grouped tree — top-level pages render as section headers with their children/grandchildren nested. Backward-compatible with flat (un-nested) spaces. Enables the Wiki to read as ordered, sectioned documentation rather than an alphabetical dump. diff --git a/package.json b/package.json index 72270ff..7e482a5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "void-server", - "version": "2.0.0-alpha.20", + "version": "2.0.0-alpha.21", "type": "module", "private": true, "scripts": { diff --git a/server.js b/server.js index fcc1b66..6dda8d6 100644 --- a/server.js +++ b/server.js @@ -13,7 +13,7 @@ import { mcpAuth } from './lib/api/middleware/mcp_auth.js'; import { handleMcp } from './lib/mcp/http.js'; import httpProxy from 'http-proxy'; -const VERSION = '2.0.0-alpha.20'; +const VERSION = '2.0.0-alpha.21'; // Proxy /terminal (+ its WebSocket) to ttyd on CT 300, so the embedded terminal // works whether the Void is reached via Traefik (void2-app.hynesy.com) OR the