228 lines
5.7 KiB
JavaScript
228 lines
5.7 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Created by Martin on 22/02/2016.
|
|
*/
|
|
var express = require('express');
|
|
var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util');
|
|
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string');
|
|
var zlib = require("zlib");
|
|
var log4js = require('log4js');
|
|
var logger = log4js.getLogger();
|
|
|
|
var router = express.Router();
|
|
|
|
var EventEmitter = require('events');
|
|
|
|
var nano = require('nano')('http://localhost:5984');
|
|
var busEmitter = new EventEmitter();
|
|
|
|
var db_name = 'keeper';
|
|
var dbCouch = nano.use(db_name);
|
|
|
|
|
|
var bodyfile = __dirname + '/' + 'body.html';
|
|
var htmlfile = __dirname + '/' + 'testoutput.html';
|
|
var generics = ['ARTICLE', 'div.content_column', 'div.post'];
|
|
|
|
|
|
function cleaner(b) {
|
|
var _b = b;
|
|
|
|
var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments'];
|
|
|
|
for (var i = 0; i < unwanted.length; i++) {
|
|
_b.find(unwanted[i]).remove();
|
|
}
|
|
return _b;
|
|
}
|
|
|
|
function insertBookmark(obj) {
|
|
logger.debug('Inserting into couch...');
|
|
logger.info(util.inspect(obj));
|
|
dbCouch.insert(obj, function(err, body,header) {
|
|
if (err) {
|
|
logger.error('Error inserting into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Insert done..');
|
|
}
|
|
|
|
var doInsertBookmark = (obj) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
insertBookmark(obj);
|
|
};
|
|
|
|
var doGetBookmark = (url) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(url);
|
|
};
|
|
|
|
var doGetBookmarkRes = (url,res) =>{
|
|
logger.debug('doGetBookmarkRes');
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(url,res);
|
|
};
|
|
|
|
// Events
|
|
busEmitter.on('saveBookmarkData', doInsertBookmark);
|
|
busEmitter.on('getBookmark', doGetBookmark);
|
|
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
|
|
|
|
function processBody(body,url) {
|
|
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
// try to find a body to grab
|
|
|
|
var i = 0;
|
|
|
|
while (($(generics[i]).length == 0) && (i < generics.length)) {
|
|
// logger.info(generics[i]);
|
|
// logger.info($(generics[i]));
|
|
// logger.info('i: ' + i + ', ' + $(generics[i]).length);
|
|
i++;
|
|
}
|
|
logger.debug(i);
|
|
|
|
if (i < generics.length) {
|
|
var tdihbody = $(generics[i]);
|
|
var obj = {};
|
|
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
obj.url=url;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).collapseWhitespace().s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
|
|
busEmitter.emit("saveBookmarkData", obj);
|
|
|
|
return obj;
|
|
}
|
|
|
|
|
|
}
|
|
function genericGrab(url,res) {
|
|
logger.info(url);
|
|
request(url, function (err, resp, body) {
|
|
if (err)
|
|
throw err;
|
|
|
|
if (resp.headers.hasOwnProperty('content-encoding')) {
|
|
logger.warn('content-encoding');
|
|
if (resp.headers['content-encoding'] == 'gzip') {
|
|
|
|
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
|
|
|
|
|
|
var gunzip = zlib.createGunzip();
|
|
var jsonString = '';
|
|
resp.pipe(gunzip);
|
|
gunzip.on('data', function (chunk) {
|
|
jsonString += chunk;
|
|
});
|
|
gunzip.on('end', function () {
|
|
console.log((jsonString));
|
|
callback(JSON.stringify(jsonString));
|
|
});
|
|
gunzip.on('error', function (e) {
|
|
console.log(e);
|
|
});
|
|
}
|
|
else
|
|
{
|
|
var b = processBody(body,url);
|
|
if (res != null)
|
|
{
|
|
res.render('grabbed');
|
|
}
|
|
}
|
|
|
|
} else
|
|
{
|
|
var b = processBody(body,url);
|
|
if (res != null)
|
|
{
|
|
res.render('grabbed',{data:b});
|
|
}
|
|
}
|
|
|
|
});
|
|
}
|
|
|
|
router.get('/list', function (req, res) {
|
|
logger.debug('list..');
|
|
|
|
dbCouch.view('titles','titles',function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = [];
|
|
body.rows.forEach(function(doc) {
|
|
outJSON.push({id:doc.id, title:doc.value })
|
|
});
|
|
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({list: outJSON}));
|
|
|
|
} else
|
|
{
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
});
|
|
|
|
router.get('/entry/:id', function (req, res) {
|
|
logger.debug('entry..');
|
|
|
|
logger.debug(req.params.id);
|
|
|
|
dbCouch.get(req.params.id,function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = {};
|
|
outJSON.title = body.title;
|
|
outJSON.reduced = body.reduced;
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify(outJSON));
|
|
|
|
} else
|
|
{
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
|
|
});
|
|
|
|
router.post('/add', function (req, res) {
|
|
logger.debug('add entry..');
|
|
|
|
var t = req.body;
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = JSON.parse(t.url.toString());
|
|
logger.debug(url);
|
|
busEmitter.emit("getBookmark", url);
|
|
}
|
|
else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
router.get('/new', function (req, res) {
|
|
logger.debug('Save new');
|
|
busEmitter.emit("getBookmarkRes", req.query.url ,res);
|
|
|
|
});
|
|
|
|
module.exports = router;
|