mirror of
https://gitlab.silvrtree.co.uk/martind2000/old-silvrgit.git
synced 2025-01-10 23:45:07 +00:00
53 lines
1.3 KiB
JavaScript
53 lines
1.3 KiB
JavaScript
var Crawler = require("simplecrawler"),fs = require('fs');
|
|
|
|
|
|
//var myCrawler = new Crawler("http://www.bbc.co.uk/food/recipes/chicken_piperade_with_23608");
|
|
|
|
var myCrawler = new Crawler("www.bbc.co.uk", "/food/recipes/chicken_piperade_with_23608", 80);
|
|
|
|
var htmlfile = __dirname + '/' + 'test.html';
|
|
|
|
myCrawler.maxDepth = 1;
|
|
//myCrawler.interval = 10000; // Ten seconds
|
|
myCrawler.maxConcurrency = 1;
|
|
|
|
|
|
myCrawler.on('crawlstart', function() {
|
|
console.log('Crawling started...');
|
|
});
|
|
|
|
myCrawler.on('fetchstart ', function(a, b) {
|
|
console.log('fetchstart ...');
|
|
console.log(a);
|
|
console.log(b);
|
|
});
|
|
|
|
myCrawler.on('fetcherror ', function(a, b) {
|
|
console.log('Crawling error...');
|
|
console.log(a);
|
|
console.log(b);
|
|
});
|
|
|
|
myCrawler.on('fetchclienterror ', function(a, b) {
|
|
console.log('fetchclienterror error...');
|
|
console.log(a);
|
|
console.log(b);
|
|
});
|
|
|
|
myCrawler.on('queueadd ', function(a) {
|
|
console.log('fetchclienterror error...');
|
|
console.log(a);
|
|
|
|
});
|
|
|
|
myCrawler.on("fetchcomplete", function(queueItem, responseBuffer, response) {
|
|
console.log("I just received %s (%d bytes)", queueItem.url, responseBuffer.length);
|
|
console.log("It was a resource of type %s", response.headers['content-type']);
|
|
|
|
// Do something with the data in responseBuffer
|
|
|
|
fs.writeFileSync(htmlfile, responseBuffer);
|
|
});
|
|
|
|
myCrawler.start();
|