Split long page text into 1500-char chunks before calling Ollama, then mean-pool the per-chunk vectors into one page vector. Removes the hard 6000-char slice that still caused 500s on dense markdown/table pages. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
37 lines
1.7 KiB
JavaScript
37 lines
1.7 KiB
JavaScript
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
import { chunkText, embedTextPooled } from '../../lib/ai/ollama.js';
|
|
|
|
afterEach(() => { vi.unstubAllGlobals(); });
|
|
|
|
describe('chunkText', () => {
|
|
it('returns [] for empty', () => { expect(chunkText('')).toEqual([]); });
|
|
it('keeps short text as one chunk', () => { expect(chunkText('hello\nworld', 1500)).toEqual(['hello\nworld']); });
|
|
it('splits long text into <=size chunks covering all chars', () => {
|
|
const text = Array.from({length: 50}, (_,i)=>`line ${i} ${'x'.repeat(40)}`).join('\n');
|
|
const chunks = chunkText(text, 200);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
for (const c of chunks) expect(c.length).toBeLessThanOrEqual(200);
|
|
});
|
|
it('hard-splits a single over-long line', () => {
|
|
const chunks = chunkText('y'.repeat(500), 100);
|
|
expect(chunks.length).toBe(5);
|
|
expect(chunks.every(c => c.length <= 100)).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('embedTextPooled', () => {
|
|
it('mean-pools chunk vectors', async () => {
|
|
// two chunks (size 5 forces split), fetch returns embedding = [callCount, callCount]
|
|
let n = 0;
|
|
vi.stubGlobal('fetch', vi.fn(async () => { n++; return { ok: true, json: async () => ({ embedding: [n, n] }) }; }));
|
|
const v = await embedTextPooled('aaaaa\nbbbbb', { chunkSize: 5 });
|
|
// chunks: ['aaaaa','bbbbb'] -> vectors [1,1],[2,2] -> mean [1.5,1.5]
|
|
expect(v).toEqual([1.5, 1.5]);
|
|
});
|
|
it('single chunk equals single embed', async () => {
|
|
vi.stubGlobal('fetch', vi.fn(async () => ({ ok: true, json: async () => ({ embedding: [7, 8, 9] }) })));
|
|
const v = await embedTextPooled('short', { chunkSize: 1500 });
|
|
expect(v).toEqual([7, 8, 9]);
|
|
});
|
|
});
|