logger/server/simplecrawler.js

53 lines
1.3 KiB
JavaScript
Raw Normal View History

2016-03-08 21:10:12 +00:00
var Crawler = require("simplecrawler"),fs = require('fs');
//var myCrawler = new Crawler("http://www.bbc.co.uk/food/recipes/chicken_piperade_with_23608");
var myCrawler = new Crawler("www.bbc.co.uk", "/food/recipes/chicken_piperade_with_23608", 80);
var htmlfile = __dirname + '/' + 'test.html';
myCrawler.maxDepth = 1;
//myCrawler.interval = 10000; // Ten seconds
myCrawler.maxConcurrency = 1;
myCrawler.on('crawlstart', function() {
console.log('Crawling started...');
});
myCrawler.on('fetchstart ', function(a, b) {
console.log('fetchstart ...');
console.log(a);
console.log(b);
});
myCrawler.on('fetcherror ', function(a, b) {
console.log('Crawling error...');
console.log(a);
console.log(b);
});
myCrawler.on('fetchclienterror ', function(a, b) {
console.log('fetchclienterror error...');
console.log(a);
console.log(b);
});
myCrawler.on('queueadd ', function(a) {
console.log('fetchclienterror error...');
console.log(a);
});
myCrawler.on("fetchcomplete", function(queueItem, responseBuffer, response) {
console.log("I just received %s (%d bytes)", queueItem.url, responseBuffer.length);
console.log("It was a resource of type %s", response.headers['content-type']);
// Do something with the data in responseBuffer
fs.writeFileSync(htmlfile, responseBuffer);
});
myCrawler.start();