recipes/server/grab.js
martind2000 4ee49fe2ac ll
2016-02-23 00:28:47 +00:00

61 lines
1.8 KiB
JavaScript

/**
* Created by Martin on 22/02/2016.
*/
var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util');
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string');
var log4js = require('log4js');
var logger = log4js.getLogger();
var bodyfile = __dirname + '/' + 'body.html';
var htmlfile = __dirname + '/' + 'testoutput.html';
var generics = ['ARTICLE', 'div.content_column', 'div.post'];
function cleaner(b) {
var _b = b;
var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks','.post-comments'];
for (var i = 0; i < unwanted.length; i++) {
_b.find(unwanted[i]).remove();
}
return _b;
}
module.exports = {
generic: function (url) {
logger.info(url);
request(url, function (err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
var title = $('TITLE').text();
// try to find a body to grab
var i = 0;
while (($(generics[i]).length == 0) && (i < generics.length)) {
i++;
}
logger.debug(i);
if (i < generics.length) {
var tdihbody = $(generics[i]);
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
fs.writeFileSync(htmlfile, tdihbody.html());
}
fs.writeFileSync(bodyfile, $.html());
});
}
};
//module.exports.grabMarksDailyApple('http://www.marksdailyapple.com/spiced-pork-and-butternut-squash-with-sage');
module.exports.generic('http://www.health-bent.com/soups/paleo-mediterranean-beef-stew');