/** * Created by Martin on 22/02/2016. */ var express = require('express'); var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util'); var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string'); var log4js = require('log4js'); var logger = log4js.getLogger(); var router = express.Router(); var sqlite3 = require('sqlite3').verbose(); var EventEmitter = require('events'); var busEmitter = new EventEmitter(); var dbfile = process.env.DB_HOME + '/' + "recipes.db"; var bodyfile = __dirname + '/' + 'body.html'; var htmlfile = __dirname + '/' + 'testoutput.html'; var generics = ['ARTICLE', 'div.content_column', 'div.post']; var db; function createDB() { logger.debug('Creating recipes db...'); logger.debug(dbfile); if (!fs.existsSync(dbfile)) { logger.debug('creating db file'); fs.openSync(dbfile, 'w'); db = new sqlite3.Database(dbfile, createTable); db.close(); } else { logger.info('Database already created.'); connectDB(); } } function connectDB() { "use strict"; logger.debug('Connect db.'); db = new sqlite3.Database(dbfile); //logger.debug(temp_db); return db; } function createTable() { logger.debug('Creating temp table...'); db.run('CREATE TABLE `recipes` (`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, `url` TEXT, `html` TEXT, `reduced` TEXT );'); } function closeDB() { "use strict"; logger.debug('Closing db.'); db.close(); } function cleaner(b) { var _b = b; var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments']; for (var i = 0; i < unwanted.length; i++) { _b.find(unwanted[i]).remove(); } return _b; } function insertRecipe(obj) { // logger.debug(obj); db.run('BEGIN TRANSACTION'); db.run('INSERT INTO `recipes`(`url`,`html`,`reduced`,`title`) VALUES (?,?,?,?);', obj); db.run('commit'); logger.debug('Insert done..'); } var doInsertRecipe = (obj) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); insertRecipe(obj); } var doGetRecipe = (url) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(url); } busEmitter.on('saveRecipeData', doInsertRecipe); busEmitter.on('getRecipe', doGetRecipe); function genericGrab(url) { logger.info(url); request(url, function (err, resp, body) { if (err) throw err; $ = cheerio.load(body); var title = $('TITLE').text(); // try to find a body to grab var i = 0; while (($(generics[i]).length == 0) && (i < generics.length)) { i++; } logger.debug(i); if (i < generics.length) { var tdihbody = $(generics[i]); var obj = []; logger.debug(tdihbody.length); tdihbody = cleaner(tdihbody); logger.debug(title); // fs.writeFileSync(htmlfile, tdihbody.html()); obj.push(url); obj.push($.html()); obj.push(tdihbody.html()); obj.push(title); busEmitter.emit("saveRecipeData", obj); } // fs.writeFileSync(bodyfile, $.html()); }); } router.get('/list', function (req, res) { logger.debug('list..'); // 'select id, title from `recipes` where title is not null;' db.all('select id, title from `recipes` where title is not null;', function (err, rows) { var out = []; if (rows !== null && rows.length > 0) { // logger.debug(err); // logger.debug(rows); rows.forEach(function (row) { out.push({"id": row.id, "title": row.title}); }); } res.writeHead(200, {"ContentType": "application/json"}); //res.send(JSON.stringify(t)); res.end(JSON.stringify({list: out})); //closeDB(); }); }); router.get('/entry/:id', function (req, res) { logger.debug('entry..'); logger.debug(req.params.id); // 'select id, title from `recipes` where title is not null;' var sqlstr = 'select * from `recipes` where id = ' + req.params.id + ';'; db.all(sqlstr, function (err, rows) { var out = []; // logger.debug(err); // logger.debug(rows); if (rows !== null && rows.length > 0) { rows.forEach(function (row) { var d = {"id": row.id, "title": row.title}; if (row.reduced.length !== 0) { d.body = row.reduced; } else { d.body = row.html; } out.push(d); }); } res.writeHead(200, {"ContentType": "application/json"}); //res.send(JSON.stringify(t)); res.end(JSON.stringify({list: out})); //closeDB(); }); }) ; router.post('/add', function (req, res) { logger.debug('add entry..'); var t = req.body; if (t.hasOwnProperty('url')) { var url = JSON.parse(t.url.toString()); logger.debug(url); busEmitter.emit("getRecipe", url); } else { logger.error('No data block!'); } res.writeHead(200, {"ContentType": "application/json"}); //res.send(JSON.stringify(t)); res.end(JSON.stringify({adding: url})); }); module.exports = router; createDB(); //module.exports.grabMarksDailyApple('http://www.marksdailyapple.com/spiced-pork-and-butternut-squash-with-sage'); //module.exports.generic('http://www.health-bent.com/soups/paleo-mediterranean-beef-stew');