From 207ea906ee4578e933414f5b25795a20df59ca37 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 9 Jun 2026 08:35:00 +1000 Subject: [PATCH] =?UTF-8?q?feat(icons):=20ingest=20=E2=80=94=20file=20proc?= =?UTF-8?q?essor,=20zip=20unpack,=20URL=20fetch=20(guards)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- lib/icons/ingest.js | 75 ++++++++++++++++++++++++++++++++++++++ package-lock.json | 14 ++++++- package.json | 1 + tests/icons/ingest.test.js | 67 ++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 lib/icons/ingest.js create mode 100644 tests/icons/ingest.test.js diff --git a/lib/icons/ingest.js b/lib/icons/ingest.js new file mode 100644 index 0000000..ccd055b --- /dev/null +++ b/lib/icons/ingest.js @@ -0,0 +1,75 @@ +// lib/icons/ingest.js +import path from 'node:path'; +import AdmZip from 'adm-zip'; +import { sanitizeSvg } from './sanitize.js'; + +export const MAX_FILE = 256 * 1024; // 256 KB per icon +export const MAX_ZIP_ENTRIES = 200; +export const MAX_ZIP_TOTAL = 5 * 1024 * 1024; // 5 MB uncompressed +export const MAX_URL_BYTES = 5 * 1024 * 1024; + +const EXT = { '.svg': 'image/svg+xml', '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg' }; +const PNG_SIG = [0x89,0x50,0x4e,0x47]; +const JPG_SIG = [0xff,0xd8,0xff]; + +function slugBase(name) { + return path.basename(name, path.extname(name)).toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/(^-|-$)/g, ''); +} +function magicOk(ext, buf) { + if (ext === '.png') return PNG_SIG.every((b, i) => buf[i] === b); + if (ext === '.jpg' || ext === '.jpeg') return JPG_SIG.every((b, i) => buf[i] === b); + if (ext === '.svg') return buf.toString('utf8', 0, 400).includes(' MAX_FILE) throw new Error('too_large'); + if (!magicOk(ext, buffer)) throw new Error('bad_magic'); + const base = slugBase(name); + if (!base) throw new Error('bad_name'); + const out = ext === '.svg' ? Buffer.from(sanitizeSvg(buffer)) : buffer; + return { name: `${base}${ext}`, buffer: out, ext, contentType: EXT[ext] }; +} + +// Extract image entries from a zip buffer; flatten basenames, skip traversal/junk. +export function unpackZip(buffer) { + const zip = new AdmZip(buffer); + const entries = zip.getEntries(); + if (entries.length > MAX_ZIP_ENTRIES) throw new Error('too_many_entries'); + const out = []; let total = 0; + for (const e of entries) { + if (e.isDirectory) continue; + const ext = path.extname(e.entryName).toLowerCase(); + if (!EXT[ext]) continue; // skip non-images + if (/(^|[\\/])\.\.([\\/]|$)/.test(e.entryName)) continue; // skip traversal + const data = e.getData(); + total += data.length; + if (total > MAX_ZIP_TOTAL) throw new Error('zip_too_big'); + try { out.push(processFile({ name: path.basename(e.entryName), buffer: data })); } + catch { /* skip individually-invalid entries */ } + } + return out; +} + +const PRIVATE_HOST = /^(localhost|127\.|0\.0\.0\.0|10\.|192\.168\.|169\.254\.|172\.(1[6-9]|2\d|3[01])\.|\[?::1\]?)/i; + +// Fetch a remote icon or zip. SSRF guard: http/https only, no localhost/private, +// size + timeout caps. `fetcher` injectable for tests. +export async function fetchUrl(url, { fetcher = fetch } = {}) { + let u; + try { u = new URL(url); } catch { throw new Error('bad_url'); } + if (u.protocol !== 'http:' && u.protocol !== 'https:') throw new Error('bad_scheme'); + if (PRIVATE_HOST.test(u.hostname)) throw new Error('blocked_host'); + const res = await fetcher(url, { signal: AbortSignal.timeout(8000), redirect: 'error' }); + if (!res.ok) throw new Error('fetch_failed'); + const ab = await res.arrayBuffer(); + if (ab.byteLength > MAX_URL_BYTES) throw new Error('too_large'); + const ct = (res.headers.get ? res.headers.get('content-type') : res.headers.get?.('content-type')) || ''; + return { buffer: Buffer.from(ab), contentType: ct }; +} + +export function isZip(buf) { return buf && buf.length > 4 && buf[0] === 0x50 && buf[1] === 0x4b; } diff --git a/package-lock.json b/package-lock.json index b854eb0..60c68d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "void-server", - "version": "2.0.0-alpha.16", + "version": "2.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "void-server", - "version": "2.0.0-alpha.16", + "version": "2.4.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.29.0", "@mozilla/readability": "^0.6.0", + "adm-zip": "^0.5.17", "bcrypt": "^6.0.0", "dompurify": "^3.4.7", "dotenv": "^17.4.2", @@ -965,6 +966,15 @@ "node": ">= 0.6" } }, + "node_modules/adm-zip": { + "version": "0.5.17", + "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz", + "integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==", + "license": "MIT", + "engines": { + "node": ">=12.0" + } + }, "node_modules/ajv": { "version": "8.20.0", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", diff --git a/package.json b/package.json index 4d2770b..169015e 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "dependencies": { "@modelcontextprotocol/sdk": "^1.29.0", "@mozilla/readability": "^0.6.0", + "adm-zip": "^0.5.17", "bcrypt": "^6.0.0", "dompurify": "^3.4.7", "dotenv": "^17.4.2", diff --git a/tests/icons/ingest.test.js b/tests/icons/ingest.test.js new file mode 100644 index 0000000..004566b --- /dev/null +++ b/tests/icons/ingest.test.js @@ -0,0 +1,67 @@ +import { describe, it, expect } from 'vitest'; +import AdmZip from 'adm-zip'; +import { processFile, unpackZip, fetchUrl, MAX_FILE } from '../../lib/icons/ingest.js'; + +const PNG = Buffer.from([0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a, 0,0,0,0]); + +describe('processFile', () => { + it('slugifies name, keeps png', () => { + const r = processFile({ name: 'My Router.png', buffer: PNG }); + expect(r.name).toBe('my-router.png'); + expect(r.buffer).toBe(PNG); + }); + it('sanitizes svg', () => { + const r = processFile({ name: 'x.svg', buffer: Buffer.from('') }); + expect(r.buffer.toString()).not.toMatch(/script/i); + }); + it('rejects non-image extension', () => { + expect(() => processFile({ name: 'x.exe', buffer: PNG })).toThrow(); + }); + it('rejects oversize', () => { + expect(() => processFile({ name: 'x.png', buffer: Buffer.alloc(MAX_FILE + 1, 1) })).toThrow(); + }); + it('rejects png with bad magic', () => { + expect(() => processFile({ name: 'x.png', buffer: Buffer.from('not a png') })).toThrow(); + }); +}); + +describe('unpackZip', () => { + it('extracts images, skips non-image junk', () => { + const z = new AdmZip(); + z.addFile('a.png', PNG); + z.addFile('notes.txt', Buffer.from('hi')); + const out = unpackZip(z.toBuffer()); + expect(out.map(f => f.name)).toEqual(['a.png']); + }); + + it('skips path-traversal entries', () => { + // adm-zip's addFile() sanitizes '../' at write time (zipnamefix), so it + // can't produce a real traversal entry. Build one by mutating the entry + // name at the raw level *after* addFile — this survives serialization and + // stores '../evil.png' verbatim in the zip bytes. + const z = new AdmZip(); + z.addFile('a.png', PNG); + z.addFile('placeholder.png', PNG); + const entries = z.getEntries(); + entries[1].entryName = '../evil.png'; + const buf = z.toBuffer(); + // Sanity check: the traversal entry name really is in the serialized bytes. + expect(buf.includes(Buffer.from('../evil.png'))).toBe(true); + const out = unpackZip(buf); + expect(out.map(f => f.name)).toEqual(['a.png']); + }); +}); + +describe('fetchUrl', () => { + it('rejects non-http schemes', async () => { + await expect(fetchUrl('file:///etc/passwd')).rejects.toThrow(); + }); + it('rejects localhost/private hosts', async () => { + await expect(fetchUrl('http://127.0.0.1/x.png')).rejects.toThrow(); + }); + it('fetches via injected fetcher', async () => { + const fake = async () => ({ ok: true, arrayBuffer: async () => PNG.buffer.slice(PNG.byteOffset, PNG.byteOffset + PNG.length), headers: new Map([['content-type','image/png']]) }); + const r = await fetchUrl('https://example.com/x.png', { fetcher: fake }); + expect(Buffer.isBuffer(r.buffer)).toBe(true); + }); +});