"use strict"; /** * Created by Martin on 22/02/2016. */ var express = require('express'); var http = require('http'), request = require('request'), cheerio = require( 'cheerio'), util = require('util'); var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require( 'string'); var zlib = require("zlib"); var log4js = require('log4js'); var logger = log4js.getLogger(); var URL = require('url'); var router = express.Router(); var EventEmitter = require('events'); var nano = require('nano')('http://localhost:5984'); var busEmitter = new EventEmitter(); var db_name = 'keeper'; var dbCouch = nano.use(db_name); var bodyfile = __dirname + '/' + 'body.html'; var htmlfile = __dirname + '/' + 'testoutput.html'; var generics = [ 'ARTICLE', 'div.content_column', 'div.post', 'div.page', '#recipe-single' ]; function cleaner(b) { var _b = b; var unwanted = [ 'div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments', 'HEADER', '.post-title', '#side-menu', '.footer-container', '#pre-footer', '#cakephp-global-navigation', '.masthead', '.breadcrumb-header', '.single-recipe-sidebar', '#recipe-related-videos' ]; for (var i = 0; i < unwanted.length; i++) { _b.find(unwanted[i]).remove(); } return _b; } function insertBookmark(obj) { logger.debug('Inserting into couch...'); logger.info(util.inspect(obj)); dbCouch.insert(obj, function(err, body, header) { if (err) { logger.error('Error inserting into couch'); return; } }); logger.debug('Insert done..'); } function updateBookmark(obj, _id, _rev) { logger.debug('Updating couch...'); var _obj = obj; _obj._id = _id; _obj._rev = _rev; dbCouch.insert(_obj, function(err, body, header) { if (err) { logger.error('Error updating into couch'); return; } }); logger.debug('Update done..'); } var doInsertBookmark = (obj) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); insertBookmark(obj); }; var doUpdateBookmark = (obj, _id, _rev) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); updateBookmark(obj, _id, _rev); }; var doGetBookmark = (obj) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(obj); }; var doGetBookmarkRedo = (obj) => { // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(obj); }; var doGetBookmarkRes = (url, res) => { logger.debug('doGetBookmarkRes'); // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(url, res); }; // Events busEmitter.on('saveBookmarkData', doInsertBookmark); busEmitter.on('updateBookmarkData', doUpdateBookmark); busEmitter.on('getBookmark', doGetBookmark); busEmitter.on('getBookmarkRes', doGetBookmarkRes); busEmitter.on('getBookmarkRedo', doGetBookmarkRedo); function processBody(body, url, _id, _rev) { var obj, tdihbody, i, urlObj, urlPrefix; var $ = cheerio.load(body); var title = $('TITLE').text(); // try to find a body to grab i = 0; while (($(generics[i]).length == 0) && (i < generics.length)) { i++; } logger.debug(i); obj = {}; if (i < generics.length) { tdihbody = $(generics[i]); logger.debug(tdihbody.length); tdihbody = cleaner(tdihbody); logger.debug(title); } else { // bah. nothing to reduce so just grab the body, tidy it and use that tdihbody = $('BODY'); logger.debug(tdihbody.length); tdihbody = cleaner(tdihbody); logger.debug(title); } urlObj = URL.parse(url); urlPrefix = urlObj.protocol + '//' + urlObj.host + '/'; try { tdihbody.find('IMG').each(function(i, elem) { let s, src = $(this).attr("src"); if (!STRING(src).startsWith('http')) { src = urlPrefix + STRING(src).stripLeft('/').s; } s = 'http://image.silvrtree.co.uk/900,fit/' + src; $(this).attr("src", s); }); } catch (e) { logger.error(e); } obj.url = url; obj.html = $.html(); obj.reduced = STRING(tdihbody.html()).trim().s; obj.title = STRING(title).collapseWhitespace().s; if (_id !== null) { busEmitter.emit("updateBookmarkData", obj, _id, _rev); } else { busEmitter.emit("saveBookmarkData", obj); } return obj; } function genericGrab(obj, res) { var url, _id = null, _ver = null; if (typeof obj === 'string') { logger.info(obj); url = obj; } else { url = obj.url; _id = obj._id || null; _ver = obj._rev || null; } logger.warn(typeof obj); logger.info(url); logger.info(_id); logger.info(_ver); request(url, function(err, resp, body) { if (err) throw err; if (resp.headers.hasOwnProperty('content-encoding')) { logger.warn('content-encoding'); if (resp.headers['content-encoding'] == 'gzip') { // to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/ var gunzip = zlib.createGunzip(); var jsonString = ''; resp.pipe(gunzip); gunzip.on('data', function(chunk) { jsonString += chunk; }); gunzip.on('end', function() { console.log((jsonString)); callback(JSON.stringify(jsonString)); }); gunzip.on('error', function(e) { console.log(e); }); } else { var b = processBody(body, url, _id, _ver); if (res != null) { res.render('grabbed'); } } } else { var b = processBody(body, url, _id, _ver); if (res != null) { res.render('grabbed', {data: b}); } } }); } router.get('/list', function(req, res) { logger.debug('list..'); dbCouch.view('titles', 'titles', function(err, body) { if (!err) { var outJSON = []; body.rows.forEach(function(doc) { outJSON.push({id: doc.id, title: doc.value}) }); //logger.debug(util.inspect(body)); res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify({list: outJSON})); } else { res.writeHead(500, {"ContentType": "application/json"}); res.end(JSON.stringify({})); } }); }); router.get('/entry/:id', function(req, res) { logger.debug('entry..'); logger.debug(req.params.id); dbCouch.get(req.params.id, function(err, body) { if (!err) { var outJSON = {}; outJSON._id = body._id; outJSON._rev = body._rev; outJSON.title = body.title; outJSON.reduced = body.reduced; outJSON.url = body.url; //logger.debug(util.inspect(body)); res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify(outJSON)); } else { res.writeHead(500, {"ContentType": "application/json"}); res.end(JSON.stringify({})); } }); }); router.post('/add', function(req, res) { logger.debug('add entry..'); var t = req.body; if (t.hasOwnProperty('url')) { var url = JSON.parse(t.url.toString()); logger.debug(url); busEmitter.emit("getBookmark", t); } else { logger.error('No data block!'); } res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify({adding: url})); }); router.post('/redo', function(req, res) { logger.debug('redoing entry..'); var t = req.body; console.log(t); if (t.hasOwnProperty('url')) { var url = t.url.toString(); logger.debug(url); busEmitter.emit("getBookmark", t); } else { logger.error('No data block!'); } res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify({adding: url})); }); router.get('/new', function(req, res) { logger.debug('Save new'); busEmitter.emit("getBookmarkRes", req.query.url, res); }); module.exports = router;