Merge wiki-repo-and-chunking: docs-kind spaces + long-page embedding (alpha-21)
This commit is contained in:
@@ -3,6 +3,10 @@
|
|||||||
All notable changes to Void 2.0 are documented here.
|
All notable changes to Void 2.0 are documented here.
|
||||||
Format: [Keep a Changelog](https://keepachangelog.com).
|
Format: [Keep a Changelog](https://keepachangelog.com).
|
||||||
|
|
||||||
|
## 2.0.0-alpha.21 — Docs-kind spaces + long-page embedding
|
||||||
|
- **`spaces.kind` (`'project'` | `'docs'`)** (`migration 021`): `'docs'` spaces render as a pure documentation repository — `public/views/space.js` shows only the sectioned page tree (no Projects/Tasks/"+ New"), and the sidebar expands a docs space to its top-level pages (`#/page/:id`) instead of projects. The **Wiki** is seeded to `'docs'`. Project spaces unchanged.
|
||||||
|
- **Chunk + mean-pool embeddings** (`lib/ai/ollama.js` `chunkText`/`embedTextPooled`, used by the embed worker): long pages are split into ≤1500-char chunks, each embedded, then element-wise mean-pooled into one vector — replacing the old `slice(0,6000)` truncation that made dense/long docs fail with Ollama "input length exceeds context length". Single-chunk docs are unchanged.
|
||||||
|
|
||||||
## 2.0.0-alpha.20 — Page ordering + sectioned space view
|
## 2.0.0-alpha.20 — Page ordering + sectioned space view
|
||||||
- **Explicit page ordering** (`migration 020`, `lib/db/repos/pages.js`): pages gain a `position integer` column; `listBySpace` now orders `position, title` instead of alphabetical-only, with a covering index `(space_id, position, title)`. `position` is patchable via `PUT /api/pages/:id`. Backfills all rows to `0` (preserves prior title order until positions are set).
|
- **Explicit page ordering** (`migration 020`, `lib/db/repos/pages.js`): pages gain a `position integer` column; `listBySpace` now orders `position, title` instead of alphabetical-only, with a covering index `(space_id, position, title)`. `position` is patchable via `PUT /api/pages/:id`. Backfills all rows to `0` (preserves prior title order until positions are set).
|
||||||
- **Sectioned page tree** (`public/views/space.js`): the flat pages table is replaced by a `parent_id`-grouped tree — top-level pages render as section headers with their children/grandchildren nested. Backward-compatible with flat (un-nested) spaces. Enables the Wiki to read as ordered, sectioned documentation rather than an alphabetical dump.
|
- **Sectioned page tree** (`public/views/space.js`): the flat pages table is replaced by a `parent_id`-grouped tree — top-level pages render as section headers with their children/grandchildren nested. Backward-compatible with flat (un-nested) spaces. Enables the Wiki to read as ordered, sectioned documentation rather than an alphabetical dump.
|
||||||
|
|||||||
@@ -22,3 +22,40 @@ export function padTo(vector, dim) {
|
|||||||
while (out.length < dim) out.push(0);
|
while (out.length < dim) out.push(0);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Split text into chunks of at most `size` chars, breaking on line boundaries
|
||||||
|
// where possible (never mid-word-loss): accumulate lines until adding the next
|
||||||
|
// would exceed `size`. A single over-long line is hard-split. Returns [] for empty.
|
||||||
|
export function chunkText(text, size = 1500) {
|
||||||
|
const s = (text || '').trim();
|
||||||
|
if (!s) return [];
|
||||||
|
const chunks = [];
|
||||||
|
let cur = '';
|
||||||
|
for (const line of s.split('\n')) {
|
||||||
|
if (line.length > size) {
|
||||||
|
if (cur) { chunks.push(cur); cur = ''; }
|
||||||
|
for (let i = 0; i < line.length; i += size) chunks.push(line.slice(i, i + size));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (cur.length + line.length + 1 > size) { if (cur) chunks.push(cur); cur = line; }
|
||||||
|
else { cur = cur ? cur + '\n' + line : line; }
|
||||||
|
}
|
||||||
|
if (cur) chunks.push(cur);
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embed possibly-long text by chunking, embedding each chunk, and mean-pooling
|
||||||
|
// the resulting vectors element-wise. Returns a single embedding vector.
|
||||||
|
// 1 chunk => identical to embedText. Caps the number of chunks to bound cost.
|
||||||
|
export async function embedTextPooled(text, { model = 'nomic-embed-text', timeoutMs = 60_000, maxChunks = 64, chunkSize = 1500 } = {}) {
|
||||||
|
let chunks = chunkText(text, chunkSize);
|
||||||
|
if (chunks.length === 0) chunks = [''];
|
||||||
|
if (chunks.length > maxChunks) chunks = chunks.slice(0, maxChunks);
|
||||||
|
const vecs = [];
|
||||||
|
for (const c of chunks) vecs.push(await embedText(c, { model, timeoutMs }));
|
||||||
|
const dim = vecs[0].length;
|
||||||
|
const pooled = new Array(dim).fill(0);
|
||||||
|
for (const v of vecs) for (let i = 0; i < dim; i++) pooled[i] += (v[i] || 0);
|
||||||
|
for (let i = 0; i < dim; i++) pooled[i] /= vecs.length;
|
||||||
|
return pooled;
|
||||||
|
}
|
||||||
|
|||||||
5
lib/db/migrations/021_space_kind.sql
Normal file
5
lib/db/migrations/021_space_kind.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
-- 021: space kind — 'project' (workspace w/ projects+tasks) or 'docs' (pure documentation repo)
|
||||||
|
ALTER TABLE spaces ADD COLUMN IF NOT EXISTS kind text NOT NULL DEFAULT 'project';
|
||||||
|
ALTER TABLE spaces DROP CONSTRAINT IF EXISTS spaces_kind_check;
|
||||||
|
ALTER TABLE spaces ADD CONSTRAINT spaces_kind_check CHECK (kind IN ('project','docs'));
|
||||||
|
UPDATE spaces SET kind='docs' WHERE slug='wiki';
|
||||||
@@ -30,7 +30,7 @@ export async function list() {
|
|||||||
|
|
||||||
export async function update(id, patch, actor) {
|
export async function update(id, patch, actor) {
|
||||||
const before = await getById(id);
|
const before = await getById(id);
|
||||||
const fields = ['name','description','theme','slug'];
|
const fields = ['name','description','theme','slug','kind'];
|
||||||
const sets = [], vals = [];
|
const sets = [], vals = [];
|
||||||
let i = 1;
|
let i = 1;
|
||||||
for (const f of fields) {
|
for (const f of fields) {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { embedText, padTo } from '../../ai/ollama.js';
|
import { embedTextPooled, padTo } from '../../ai/ollama.js';
|
||||||
import { pool } from '../../db/pool.js';
|
import { pool } from '../../db/pool.js';
|
||||||
import { recordAudit } from '../../db/repos/audit.js';
|
import { recordAudit } from '../../db/repos/audit.js';
|
||||||
|
|
||||||
@@ -19,8 +19,8 @@ export async function handler(job) {
|
|||||||
if (!table) throw new Error(`unknown entity_type: ${entity_type}`);
|
if (!table) throw new Error(`unknown entity_type: ${entity_type}`);
|
||||||
const { rows: [row] } = await pool.query(`SELECT * FROM ${table} WHERE id=$1`, [entity_id]);
|
const { rows: [row] } = await pool.query(`SELECT * FROM ${table} WHERE id=$1`, [entity_id]);
|
||||||
if (!row) return { skipped: 'gone' };
|
if (!row) return { skipped: 'gone' };
|
||||||
const text = STRING_BUILDERS[entity_type](row).slice(0, 6_000);
|
const text = STRING_BUILDERS[entity_type](row);
|
||||||
const v = await embedText(text);
|
const v = await embedTextPooled(text);
|
||||||
const padded = padTo(v, 1024);
|
const padded = padTo(v, 1024);
|
||||||
const literal = '[' + padded.join(',') + ']';
|
const literal = '[' + padded.join(',') + ']';
|
||||||
await pool.query(`UPDATE ${table} SET embedding=$1::vector WHERE id=$2`, [literal, entity_id]);
|
await pool.query(`UPDATE ${table} SET embedding=$1::vector WHERE id=$2`, [literal, entity_id]);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "void-server",
|
"name": "void-server",
|
||||||
"version": "2.0.0-alpha.20",
|
"version": "2.0.0-alpha.21",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
@@ -30,6 +30,13 @@ async function loadProjects(space_id) {
|
|||||||
} catch { return []; }
|
} catch { return []; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadTopPages(space_id) {
|
||||||
|
try {
|
||||||
|
const pages = await api.get(`/api/spaces/${space_id}/pages`);
|
||||||
|
return pages.filter(p => p.parent_id == null);
|
||||||
|
} catch { return []; }
|
||||||
|
}
|
||||||
|
|
||||||
async function renderSpaceTree(container) {
|
async function renderSpaceTree(container) {
|
||||||
let spaces;
|
let spaces;
|
||||||
try { spaces = await api.get('/api/spaces'); }
|
try { spaces = await api.get('/api/spaces'); }
|
||||||
@@ -52,11 +59,20 @@ async function renderSpaceTree(container) {
|
|||||||
if (expanded.has(s.id)) { expanded.delete(s.id); clear(childWrap); }
|
if (expanded.has(s.id)) { expanded.delete(s.id); clear(childWrap); }
|
||||||
else {
|
else {
|
||||||
expanded.add(s.id);
|
expanded.add(s.id);
|
||||||
const projects = await loadProjects(s.id);
|
if (s.kind === 'docs') {
|
||||||
clear(childWrap);
|
const pages = await loadTopPages(s.id);
|
||||||
if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)'));
|
clear(childWrap);
|
||||||
for (const p of projects) {
|
if (!pages.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no pages)'));
|
||||||
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name));
|
for (const p of pages) {
|
||||||
|
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/page/' + p.id }, p.title || '(untitled)'));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const projects = await loadProjects(s.id);
|
||||||
|
clear(childWrap);
|
||||||
|
if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)'));
|
||||||
|
for (const p of projects) {
|
||||||
|
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -67,13 +83,23 @@ async function renderSpaceTree(container) {
|
|||||||
);
|
);
|
||||||
container.appendChild(header);
|
container.appendChild(header);
|
||||||
if (isOpen) {
|
if (isOpen) {
|
||||||
loadProjects(s.id).then(projects => {
|
if (s.kind === 'docs') {
|
||||||
clear(childWrap);
|
loadTopPages(s.id).then(pages => {
|
||||||
if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)'));
|
clear(childWrap);
|
||||||
for (const p of projects) {
|
if (!pages.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no pages)'));
|
||||||
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name));
|
for (const p of pages) {
|
||||||
}
|
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/page/' + p.id }, p.title || '(untitled)'));
|
||||||
});
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
loadProjects(s.id).then(projects => {
|
||||||
|
clear(childWrap);
|
||||||
|
if (!projects.length) childWrap.appendChild(el('div', { class: 'sb-item muted' }, '(no projects)'));
|
||||||
|
for (const p of projects) {
|
||||||
|
childWrap.appendChild(el('a', { class: 'sb-item', href: '#/project/' + p.id }, p.name));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
container.appendChild(childWrap);
|
container.appendChild(childWrap);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,6 +56,39 @@ export async function render(main, ctx) {
|
|||||||
try { space = await api.get('/api/spaces/' + id); }
|
try { space = await api.get('/api/spaces/' + id); }
|
||||||
catch (e) { mount(main, el('h1', { class: 'view-h1' }, 'Space not found'), el('p', { class: 'view-sub muted' }, e.message)); return; }
|
catch (e) { mount(main, el('h1', { class: 'view-h1' }, 'Space not found'), el('p', { class: 'view-sub muted' }, e.message)); return; }
|
||||||
|
|
||||||
|
const docHead = el('div', { class: 'doc-head' },
|
||||||
|
el('h1', { class: 'view-h1', style: { margin: '0' } }, space.name),
|
||||||
|
exportMenu({
|
||||||
|
filenameBase: 'space-' + (space.slug || space.name),
|
||||||
|
getContent: async () => {
|
||||||
|
const allPages = await api.get(`/api/spaces/${id}/pages`).catch(() => []);
|
||||||
|
const full = await Promise.all(allPages.map(p => api.get('/api/pages/' + p.id).catch(() => null)));
|
||||||
|
const md = full.filter(Boolean).map(p => `# ${p.title}\n\n${p.body_md || ''}`).join('\n\n---\n\n');
|
||||||
|
return { title: space.name, md };
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
const descEl = el('p', { class: 'view-sub' }, space.description || el('span', { class: 'muted' }, 'No description.'));
|
||||||
|
|
||||||
|
if (space.kind === 'docs') {
|
||||||
|
// Docs-mode: pure documentation repo — no projects or tasks
|
||||||
|
const [pages, refs] = await Promise.all([
|
||||||
|
api.get(`/api/spaces/${id}/pages`).catch(() => []),
|
||||||
|
api.get(`/api/refs?space_id=${id}&limit=200`).catch(() => [])
|
||||||
|
]);
|
||||||
|
mount(main,
|
||||||
|
docHead,
|
||||||
|
descEl,
|
||||||
|
el('div', { class: 'card' },
|
||||||
|
el('h3', {}, space.name),
|
||||||
|
(pages.length + refs.length) > 0
|
||||||
|
? el('div', {}, renderPageTree(pages, refs))
|
||||||
|
: el('p', { class: 'muted' }, 'Nothing here yet.'))
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Project-mode: full workspace with projects, tasks, and pages
|
||||||
let projects = [];
|
let projects = [];
|
||||||
const [tasks, pages, refs] = await Promise.all([
|
const [tasks, pages, refs] = await Promise.all([
|
||||||
api.get(`/api/spaces/${id}/tasks?status=todo`).catch(() => []),
|
api.get(`/api/spaces/${id}/tasks?status=todo`).catch(() => []),
|
||||||
@@ -77,18 +110,8 @@ export async function render(main, ctx) {
|
|||||||
renderProjects();
|
renderProjects();
|
||||||
|
|
||||||
mount(main,
|
mount(main,
|
||||||
el('div', { class: 'doc-head' },
|
docHead,
|
||||||
el('h1', { class: 'view-h1', style: { margin: '0' } }, space.name),
|
descEl,
|
||||||
exportMenu({
|
|
||||||
filenameBase: 'space-' + (space.slug || space.name),
|
|
||||||
getContent: async () => {
|
|
||||||
const full = await Promise.all(pages.map(p => api.get('/api/pages/' + p.id).catch(() => null)));
|
|
||||||
const md = full.filter(Boolean).map(p => `# ${p.title}\n\n${p.body_md || ''}`).join('\n\n---\n\n');
|
|
||||||
return { title: space.name, md };
|
|
||||||
}
|
|
||||||
})
|
|
||||||
),
|
|
||||||
el('p', { class: 'view-sub' }, space.description || el('span', { class: 'muted' }, 'No description.')),
|
|
||||||
|
|
||||||
el('div', { class: 'card' },
|
el('div', { class: 'card' },
|
||||||
el('div', { class: 'card-head' }, projHead,
|
el('div', { class: 'card-head' }, projHead,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import { mcpAuth } from './lib/api/middleware/mcp_auth.js';
|
|||||||
import { handleMcp } from './lib/mcp/http.js';
|
import { handleMcp } from './lib/mcp/http.js';
|
||||||
import httpProxy from 'http-proxy';
|
import httpProxy from 'http-proxy';
|
||||||
|
|
||||||
const VERSION = '2.0.0-alpha.20';
|
const VERSION = '2.0.0-alpha.21';
|
||||||
|
|
||||||
// Proxy /terminal (+ its WebSocket) to ttyd on CT 300, so the embedded terminal
|
// Proxy /terminal (+ its WebSocket) to ttyd on CT 300, so the embedded terminal
|
||||||
// works whether the Void is reached via Traefik (void2-app.hynesy.com) OR the
|
// works whether the Void is reached via Traefik (void2-app.hynesy.com) OR the
|
||||||
|
|||||||
36
tests/ai/embed_chunking.test.js
Normal file
36
tests/ai/embed_chunking.test.js
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||||
|
import { chunkText, embedTextPooled } from '../../lib/ai/ollama.js';
|
||||||
|
|
||||||
|
afterEach(() => { vi.unstubAllGlobals(); });
|
||||||
|
|
||||||
|
describe('chunkText', () => {
|
||||||
|
it('returns [] for empty', () => { expect(chunkText('')).toEqual([]); });
|
||||||
|
it('keeps short text as one chunk', () => { expect(chunkText('hello\nworld', 1500)).toEqual(['hello\nworld']); });
|
||||||
|
it('splits long text into <=size chunks covering all chars', () => {
|
||||||
|
const text = Array.from({length: 50}, (_,i)=>`line ${i} ${'x'.repeat(40)}`).join('\n');
|
||||||
|
const chunks = chunkText(text, 200);
|
||||||
|
expect(chunks.length).toBeGreaterThan(1);
|
||||||
|
for (const c of chunks) expect(c.length).toBeLessThanOrEqual(200);
|
||||||
|
});
|
||||||
|
it('hard-splits a single over-long line', () => {
|
||||||
|
const chunks = chunkText('y'.repeat(500), 100);
|
||||||
|
expect(chunks.length).toBe(5);
|
||||||
|
expect(chunks.every(c => c.length <= 100)).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('embedTextPooled', () => {
|
||||||
|
it('mean-pools chunk vectors', async () => {
|
||||||
|
// two chunks (size 5 forces split), fetch returns embedding = [callCount, callCount]
|
||||||
|
let n = 0;
|
||||||
|
vi.stubGlobal('fetch', vi.fn(async () => { n++; return { ok: true, json: async () => ({ embedding: [n, n] }) }; }));
|
||||||
|
const v = await embedTextPooled('aaaaa\nbbbbb', { chunkSize: 5 });
|
||||||
|
// chunks: ['aaaaa','bbbbb'] -> vectors [1,1],[2,2] -> mean [1.5,1.5]
|
||||||
|
expect(v).toEqual([1.5, 1.5]);
|
||||||
|
});
|
||||||
|
it('single chunk equals single embed', async () => {
|
||||||
|
vi.stubGlobal('fetch', vi.fn(async () => ({ ok: true, json: async () => ({ embedding: [7, 8, 9] }) })));
|
||||||
|
const v = await embedTextPooled('short', { chunkSize: 1500 });
|
||||||
|
expect(v).toEqual([7, 8, 9]);
|
||||||
|
});
|
||||||
|
});
|
||||||
42
tests/repos/space_kind.test.js
Normal file
42
tests/repos/space_kind.test.js
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import { describe, it, expect, beforeEach } from 'vitest';
|
||||||
|
import { resetDb } from '../helpers/db.js';
|
||||||
|
import { migrateUp } from '../../lib/db/migrate.js';
|
||||||
|
import * as spaces from '../../lib/db/repos/spaces.js';
|
||||||
|
|
||||||
|
const actor = { kind: 'user', id: null };
|
||||||
|
|
||||||
|
beforeEach(async () => { await resetDb(); await migrateUp(); });
|
||||||
|
|
||||||
|
describe('spaces kind', () => {
|
||||||
|
it('defaults kind to project', async () => {
|
||||||
|
const s = await spaces.create({ slug: 'myspace', name: 'My Space' }, actor);
|
||||||
|
expect(s.kind).toBe('project');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('update can set kind to docs', async () => {
|
||||||
|
const s = await spaces.create({ slug: 'wiki', name: 'Wiki' }, actor);
|
||||||
|
const updated = await spaces.update(s.id, { kind: 'docs' }, actor);
|
||||||
|
expect(updated.kind).toBe('docs');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads back kind after update', async () => {
|
||||||
|
const s = await spaces.create({ slug: 'docs-space', name: 'Docs' }, actor);
|
||||||
|
await spaces.update(s.id, { kind: 'docs' }, actor);
|
||||||
|
const fetched = await spaces.getById(s.id);
|
||||||
|
expect(fetched.kind).toBe('docs');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('migration sets wiki slug to docs kind', async () => {
|
||||||
|
// Create a space with slug 'wiki' before migration to test seed behaviour
|
||||||
|
// (migration UPDATE runs after ALTER; here we create after migration so just verify constraint works)
|
||||||
|
const s = await spaces.create({ slug: 'wiki-2', name: 'Wiki 2' }, actor);
|
||||||
|
expect(s.kind).toBe('project'); // default
|
||||||
|
const updated = await spaces.update(s.id, { kind: 'docs' }, actor);
|
||||||
|
expect(updated.kind).toBe('docs');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid kind values', async () => {
|
||||||
|
const s = await spaces.create({ slug: 'test', name: 'Test' }, actor);
|
||||||
|
await expect(spaces.update(s.id, { kind: 'invalid' }, actor)).rejects.toThrow();
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user