17 lines
528 B
JavaScript
17 lines
528 B
JavaScript
import { JSDOM } from 'jsdom';
|
|
import { Readability } from '@mozilla/readability';
|
|
|
|
export function extract(html, url) {
|
|
const dom = new JSDOM(html, { url });
|
|
const reader = new Readability(dom.window.document);
|
|
const a = reader.parse();
|
|
if (!a) return { title: null, textContent: '', excerpt: null, byline: null, siteName: null };
|
|
return {
|
|
title: a.title || null,
|
|
textContent: (a.textContent || '').trim(),
|
|
excerpt: a.excerpt || null,
|
|
byline: a.byline || null,
|
|
siteName: a.siteName || null
|
|
};
|
|
}
|