html-extractor advanced

node v6.17.1
version: master
endpointsharetweet
var Extrator = require("html-extractor"); var myExtrator = new Extrator(); var html = ` <html> <head> <title>Super page</title> <meta content="X, Y, Z" name="keywords"> <meta content="Look at this super page" name="description"> <meta content="Super pageCMS" name="generator"> </head> <body> <div id="head"> <h1>My super page<sup>2</sup></h1> </div> <ul id="menu"> <li>Home</li> <li>First</li> <li>Second</li> </ul> <div id="content"> <h1>First article</h1> <p>Lorem ipsum dolor sit amet ... </p> <h1>Second article</h1> <p>Aenean commodo ligula eget dolor.</p> <script> var superVar = [ 3,2,1 ] </script> </div> <section class="abc"> <h3>ABC 1</h3> <p>Lorem ipsum dolor sit amet ... </p> </section> <section class="xyz"> <h3>XYZ 1</h3> <p>Lorem ipsum dolor sit amet ... </p> </section> <section class="abc"> <h3>ABC 2</h3> <p>Lorem ipsum dolor sit amet ... </p> </section> <div id="footer"> Copyright 2013 </div> </body> </html> ` var reduceTo = { tag: "div", attr: "id", val: "content" } myExtrator.extract( html, reduceTo, function( err, data ){ if( err ){ throw( err ) } else { console.log( "String", data ); //{ // meta: { // title: 'Super page', // description: 'Look at this super page', // keywords: ['X', 'Y', 'Z'], // generator: 'Super pageCMS' // }, // body: 'First article Lorem ipsum dolor sit amet ... Second article Aenean commodo ligula eget dolor. ', // h1: ['My super page2', 'First article', 'Second article'] //} } }); var reduceToList = { tag: "div", attr: "id", val: "content", list: true }; myExtrator.extract( html, reduceToList, function( err, data ){ if( err ){ throw( err ) } else { console.log( "List", data ); //{ // meta: { // title: 'Super page', // description: 'Look at this super page', // keywords: ['X', 'Y', 'Z'], // generator: 'Super pageCMS' // }, // body: [ // 'ABC 1 Lorem ipsum dolor sit amet ... ', // 'ABC 2 Lorem ipsum dolor sit amet ... ' // ], // h1: ['My super page2', 'First article', 'Second article'] //} } });
Loading…

no comments

    sign in to comment