keeper/server/keeper.js
2016-03-02 13:53:28 +00:00

242 lines
6.1 KiB
JavaScript

"use strict";
/**
* Created by Martin on 22/02/2016.
*/
var express = require('express');
var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util');
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string');
var zlib = require("zlib");
var log4js = require('log4js');
var logger = log4js.getLogger();
var router = express.Router();
var EventEmitter = require('events');
var nano = require('nano')('http://localhost:5984');
var busEmitter = new EventEmitter();
var db_name = 'keeper';
var dbCouch = nano.use(db_name);
var bodyfile = __dirname + '/' + 'body.html';
var htmlfile = __dirname + '/' + 'testoutput.html';
var generics = ['ARTICLE', 'div.content_column', 'div.post'];
function cleaner(b) {
var _b = b;
var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments'];
for (var i = 0; i < unwanted.length; i++) {
_b.find(unwanted[i]).remove();
}
return _b;
}
function insertBookmark(obj) {
logger.debug('Inserting into couch...');
logger.info(util.inspect(obj));
dbCouch.insert(obj, function(err, body,header) {
if (err) {
logger.error('Error inserting into couch');
return;
}
});
logger.debug('Insert done..');
}
var doInsertBookmark = (obj) =>{
// logger.info('sendSocket: ' + JSON.stringify(obj));
insertBookmark(obj);
};
var doGetBookmark = (url) =>{
// logger.info('sendSocket: ' + JSON.stringify(obj));
genericGrab(url);
};
var doGetBookmarkRes = (url,res) =>{
logger.debug('doGetBookmarkRes');
// logger.info('sendSocket: ' + JSON.stringify(obj));
genericGrab(url,res);
};
// Events
busEmitter.on('saveBookmarkData', doInsertBookmark);
busEmitter.on('getBookmark', doGetBookmark);
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
function processBody(body,url) {
var $ = cheerio.load(body);
var title = $('TITLE').text();
// try to find a body to grab
var i = 0;
while (($(generics[i]).length == 0) && (i < generics.length)) {
// logger.info(generics[i]);
// logger.info($(generics[i]));
// logger.info('i: ' + i + ', ' + $(generics[i]).length);
i++;
}
logger.debug(i);
if (i < generics.length) {
var tdihbody = $(generics[i]);
var obj = {};
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
obj.url=url;
obj.html = $.html();
obj.reduced = STRING(tdihbody.html()).collapseWhitespace().s;
obj.title = STRING(title).collapseWhitespace().s;
busEmitter.emit("saveBookmarkData", obj);
return obj;
}
}
function genericGrab(url,res) {
logger.info(url);
request(url, function (err, resp, body) {
if (err)
throw err;
console.log("headers: ", resp.headers);
console.log(resp.statusCode);
logger.info('A');
logger.info(body);
if (resp.headers.hasOwnProperty('content-encoding')) {
logger.warn('content-encoding');
if (resp.headers['content-encoding'] == 'gzip') {
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
var gunzip = zlib.createGunzip();
var jsonString = '';
resp.pipe(gunzip);
gunzip.on('data', function (chunk) {
jsonString += chunk;
});
gunzip.on('end', function () {
console.log((jsonString));
callback(JSON.stringify(jsonString));
});
gunzip.on('error', function (e) {
console.log(e);
});
}
else
{
logger.info('Processing other body...');
var b = processBody(body,url);
console.log(b);
if (res != null)
{
res.render('grabbed');
}
}
} else
{
logger.info('Processing body...');
var b = processBody(body,url);
if (res != null)
{
console.log({data:b});
res.render('grabbed',{data:b});
}
}
logger.info('END');
//fs.writeFileSync(htmlfile, tdihbody.html());
// fs.writeFileSync(bodyfile, $.html());
});
}
router.get('/list', function (req, res) {
logger.debug('list..');
dbCouch.view('titles','titles',function(err, body) {
if (!err) {
var outJSON = [];
body.rows.forEach(function(doc) {
logger.info(doc);
outJSON.push({id:doc.id, title:doc.value })
});
//logger.debug(util.inspect(body));
res.writeHead(200, {"ContentType": "application/json"});
res.end(JSON.stringify({list: outJSON}));
} else
{
res.writeHead(500, {"ContentType": "application/json"});
res.end(JSON.stringify({}));
}
});
});
router.get('/entry/:id', function (req, res) {
logger.debug('entry..');
logger.debug(req.params.id);
dbCouch.get(req.params.id,function(err, body) {
if (!err) {
var outJSON = {};
logger.debug(body);
outJSON.title = body.title;
outJSON.reduced = body.reduced;
//logger.debug(util.inspect(body));
res.writeHead(200, {"ContentType": "application/json"});
res.end(JSON.stringify(outJSON));
} else
{
res.writeHead(500, {"ContentType": "application/json"});
res.end(JSON.stringify({}));
}
});
});
router.post('/add', function (req, res) {
logger.debug('add entry..');
var t = req.body;
if (t.hasOwnProperty('url')) {
var url = JSON.parse(t.url.toString());
logger.debug(url);
busEmitter.emit("getBookmark", url);
}
else {
logger.error('No data block!');
}
res.writeHead(200, {"ContentType": "application/json"});
res.end(JSON.stringify({adding: url}));
});
router.get('/new', function (req, res) {
logger.debug('Save new');
busEmitter.emit("getBookmarkRes", req.query.url ,res);
});
module.exports = router;