var Extrator = require("html-extractor");
var myExtrator = new Extrator();
var html = `
<html>
<head>
<title>Super page</title>
<meta content="X, Y, Z" name="keywords">
<meta content="Look at this super page" name="description">
<meta content="Super pageCMS" name="generator">
</head>
<body>
<div id="head">
<h1>My super page<sup>2</sup></h1>
</div>
<ul id="menu">
<li>Home</li>
<li>First</li>
<li>Second</li>
</ul>
<div id="content">
<h1>First article</h1>
<p>Lorem ipsum dolor sit amet ... </p>
<h1>Second article</h1>
<p>Aenean commodo ligula eget dolor.</p>
<script>
var superVar = [ 3,2,1 ]
</script>
</div>
<section class="abc">
<h3>ABC 1</h3>
<p>Lorem ipsum dolor sit amet ... </p>
</section>
<section class="xyz">
<h3>XYZ 1</h3>
<p>Lorem ipsum dolor sit amet ... </p>
</section>
<section class="abc">
<h3>ABC 2</h3>
<p>Lorem ipsum dolor sit amet ... </p>
</section>
<div id="footer">
Copyright 2013
</div>
</body>
</html>
`
var reduceTo = {
tag: "div",
attr: "id",
val: "content"
}
myExtrator.extract( html, reduceTo, function( err, data ){
if( err ){
throw( err )
} else {
console.log( "String", data );
//{
// meta: {
// title: 'Super page',
// description: 'Look at this super page',
// keywords: ['X', 'Y', 'Z'],
// generator: 'Super pageCMS'
// },
// body: 'First article Lorem ipsum dolor sit amet ... Second article Aenean commodo ligula eget dolor. ',
// h1: ['My super page2', 'First article', 'Second article']
//}
}
});
var reduceToList = {
tag: "div",
attr: "id",
val: "content",
list: true
};
myExtrator.extract( html, reduceToList, function( err, data ){
if( err ){
throw( err )
} else {
console.log( "List", data );
//{
// meta: {
// title: 'Super page',
// description: 'Look at this super page',
// keywords: ['X', 'Y', 'Z'],
// generator: 'Super pageCMS'
// },
// body: [
// 'ABC 1 Lorem ipsum dolor sit amet ... ',
// 'ABC 2 Lorem ipsum dolor sit amet ... '
// ],
// h1: ['My super page2', 'First article', 'Second article']
//}
}
});