feat(ingest): readability wrapper

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 03:34:51 +10:00
parent 8d2afcd040
commit c6e72e93d5
2 changed files with 41 additions and 0 deletions

16
lib/ingest/readability.js Normal file
View File

@@ -0,0 +1,16 @@
import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';
export function extract(html, url) {
const dom = new JSDOM(html, { url });
const reader = new Readability(dom.window.document);
const a = reader.parse();
if (!a) return { title: null, textContent: '', excerpt: null, byline: null, siteName: null };
return {
title: a.title || null,
textContent: (a.textContent || '').trim(),
excerpt: a.excerpt || null,
byline: a.byline || null,
siteName: a.siteName || null
};
}