"use strict"; /** * Created by Martin on 22/02/2016. */ var express = require('express'); var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util'); var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string'); var zlib = require("zlib"); var log4js = require('log4js'); var logger = log4js.getLogger(); var router = express.Router(); var EventEmitter = require('events'); var nano = require('nano')('http://localhost:5984'); var busEmitter = new EventEmitter(); var db_name = 'keeper'; var dbCouch = nano.use(db_name); var bodyfile = __dirname + '/' + 'body.html'; var htmlfile = __dirname + '/' + 'testoutput.html'; var generics = ['ARTICLE', 'div.content_column', 'div.post']; function cleaner(b) { var _b = b; var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments']; for (var i = 0; i < unwanted.length; i++) { _b.find(unwanted[i]).remove(); } return _b; } function insertBookmark(obj) { logger.debug('Inserting into couch...'); logger.info(util.inspect(obj)); dbCouch.insert(obj, function(err, body,header) { if (err) { logger.error('Error inserting into couch'); return; } }); logger.debug('Insert done..'); } var doInsertBookmark = (obj) =>{ // logger.info('sendSocket: ' + JSON.stringify(obj)); insertBookmark(obj); }; var doGetBookmark = (url) =>{ // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(url); }; var doGetBookmarkRes = (url,res) =>{ logger.debug('doGetBookmarkRes'); // logger.info('sendSocket: ' + JSON.stringify(obj)); genericGrab(url,res); }; // Events busEmitter.on('saveBookmarkData', doInsertBookmark); busEmitter.on('getBookmark', doGetBookmark); busEmitter.on('getBookmarkRes', doGetBookmarkRes); function processBody(body,url) { var $ = cheerio.load(body); var title = $('TITLE').text(); // try to find a body to grab var i = 0; while (($(generics[i]).length == 0) && (i < generics.length)) { // logger.info(generics[i]); // logger.info($(generics[i])); // logger.info('i: ' + i + ', ' + $(generics[i]).length); i++; } logger.debug(i); if (i < generics.length) { var tdihbody = $(generics[i]); var obj = {}; logger.debug(tdihbody.length); tdihbody = cleaner(tdihbody); logger.debug(title); obj.url=url; obj.html = $.html(); obj.reduced = STRING(tdihbody.html()).collapseWhitespace().s; obj.title = STRING(title).collapseWhitespace().s; busEmitter.emit("saveBookmarkData", obj); return obj; } } function genericGrab(url,res) { logger.info(url); request(url, function (err, resp, body) { if (err) throw err; console.log("headers: ", resp.headers); console.log(resp.statusCode); logger.info('A'); logger.info(body); if (resp.headers.hasOwnProperty('content-encoding')) { logger.warn('content-encoding'); if (resp.headers['content-encoding'] == 'gzip') { // to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/ var gunzip = zlib.createGunzip(); var jsonString = ''; resp.pipe(gunzip); gunzip.on('data', function (chunk) { jsonString += chunk; }); gunzip.on('end', function () { console.log((jsonString)); callback(JSON.stringify(jsonString)); }); gunzip.on('error', function (e) { console.log(e); }); } else { logger.info('Processing other body...'); var b = processBody(body,url); console.log(b); if (res != null) { res.render('grabbed'); } } } else { logger.info('Processing body...'); var b = processBody(body,url); if (res != null) { console.log({data:b}); res.render('grabbed',{data:b}); } } logger.info('END'); //fs.writeFileSync(htmlfile, tdihbody.html()); // fs.writeFileSync(bodyfile, $.html()); }); } router.get('/list', function (req, res) { logger.debug('list..'); dbCouch.view('titles','titles',function(err, body) { if (!err) { var outJSON = []; body.rows.forEach(function(doc) { logger.info(doc); outJSON.push({id:doc.id, title:doc.value }) }); //logger.debug(util.inspect(body)); res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify({list: outJSON})); } else { res.writeHead(500, {"ContentType": "application/json"}); res.end(JSON.stringify({})); } }); }); router.get('/entry/:id', function (req, res) { logger.debug('entry..'); logger.debug(req.params.id); dbCouch.get(req.params.id,function(err, body) { if (!err) { var outJSON = {}; logger.debug(body); outJSON.title = body.title; outJSON.reduced = body.reduced; //logger.debug(util.inspect(body)); res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify(outJSON)); } else { res.writeHead(500, {"ContentType": "application/json"}); res.end(JSON.stringify({})); } }); }); router.post('/add', function (req, res) { logger.debug('add entry..'); var t = req.body; if (t.hasOwnProperty('url')) { var url = JSON.parse(t.url.toString()); logger.debug(url); busEmitter.emit("getBookmark", url); } else { logger.error('No data block!'); } res.writeHead(200, {"ContentType": "application/json"}); res.end(JSON.stringify({adding: url})); }); router.get('/new', function (req, res) { logger.debug('Save new'); busEmitter.emit("getBookmarkRes", req.query.url ,res); }); module.exports = router;