node-crawler

node-crawler 介绍

node-crawler这是一个比较好用的node.js爬虫框架，我们可以使用最亲切的jQuery语法来解析响应返回的页面。

node-crawler安装

npm install crawler

node-crawler使用

var Crawler = require("crawler");

var c = new Crawler({

maxConnections : 10,

// This will be called for each crawled page

callback : function (error,res,done) {

if(error){

console.log(error);

}else{

var $ = res.$;

// $ is Cheerio by default

//a lean implementation of core jQuery designed specifically for the server

console.log($("title").text());

}

done();

}

});

// Queue just one URL,with default callback

c.queue('http://www.amazon.com');

// Queue a list of URLs

c.queue(['http://www.google.com/','http://www.yahoo.com']);

// Queue URLs with custom callbacks & parameters

c.queue([{

uri: 'http://parishackers.org/',

jQuery: false,

// The global callback won't be called

callback: function (error,done) {

if(error){

console.log(error);

}else{

console.log('Grabbed',res.body.length,'bytes');

}

done();

}

}]);

// Queue some html code directly without grabbing (mostly for tests)

c.queue([{

html: '<p>This is a <strong>test</strong></p>'

}]);

网站地址:http://nodecrawler.org

GitHub:https://github.com/bda-research/node-crawler

网站描述:一款最好的node.js爬虫工具

官方网站：http://nodecrawler.org

node-crawler

node-crawler 介绍

node-crawler安装

node-crawler使用

node-crawler

相关推荐