Blackflame Notes
+An essay on the Cradle aesthetic and the blackflame motif. This is a longer paragraph that gives readability enough text to consider this the main content of the page.
+A second paragraph also part of the article.
+diff --git a/lib/ingest/readability.js b/lib/ingest/readability.js new file mode 100644 index 0000000..4720e31 --- /dev/null +++ b/lib/ingest/readability.js @@ -0,0 +1,16 @@ +import { JSDOM } from 'jsdom'; +import { Readability } from '@mozilla/readability'; + +export function extract(html, url) { + const dom = new JSDOM(html, { url }); + const reader = new Readability(dom.window.document); + const a = reader.parse(); + if (!a) return { title: null, textContent: '', excerpt: null, byline: null, siteName: null }; + return { + title: a.title || null, + textContent: (a.textContent || '').trim(), + excerpt: a.excerpt || null, + byline: a.byline || null, + siteName: a.siteName || null + }; +} diff --git a/tests/ingest/readability.test.js b/tests/ingest/readability.test.js new file mode 100644 index 0000000..2b8e063 --- /dev/null +++ b/tests/ingest/readability.test.js @@ -0,0 +1,25 @@ +import { describe, it, expect } from 'vitest'; +import { extract } from '../../lib/ingest/readability.js'; + +const HTML = ` +
An essay on the Cradle aesthetic and the blackflame motif. This is a longer paragraph that gives readability enough text to consider this the main content of the page.
+A second paragraph also part of the article.
+