319 lines
7.6 KiB
JavaScript
319 lines
7.6 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Created by Martin on 22/02/2016.
|
|
*/
|
|
var express = require('express');
|
|
var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util');
|
|
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string');
|
|
var zlib = require("zlib");
|
|
var log4js = require('log4js');
|
|
var logger = log4js.getLogger();
|
|
var URL = require('url');
|
|
|
|
var router = express.Router();
|
|
|
|
var EventEmitter = require('events');
|
|
|
|
var nano = require('nano')('http://localhost:5984');
|
|
var busEmitter = new EventEmitter();
|
|
|
|
var db_name = 'keeper';
|
|
var dbCouch = nano.use(db_name);
|
|
|
|
|
|
var bodyfile = __dirname + '/' + 'body.html';
|
|
var htmlfile = __dirname + '/' + 'testoutput.html';
|
|
var generics = ['ARTICLE', 'div.content_column', 'div.post','div.page'];
|
|
|
|
|
|
function cleaner(b) {
|
|
var _b = b;
|
|
|
|
var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments','HEADER'];
|
|
|
|
for (var i = 0; i < unwanted.length; i++) {
|
|
_b.find(unwanted[i]).remove();
|
|
}
|
|
|
|
return _b;
|
|
}
|
|
|
|
function insertBookmark(obj) {
|
|
logger.debug('Inserting into couch...');
|
|
logger.info(util.inspect(obj));
|
|
dbCouch.insert(obj, function(err, body,header) {
|
|
if (err) {
|
|
logger.error('Error inserting into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Insert done..');
|
|
}
|
|
|
|
function updateBookmark(obj,_id, _rev) {
|
|
logger.debug('Updating couch...');
|
|
var _obj = obj;
|
|
_obj._id = _id;
|
|
_obj._rev = _rev;
|
|
logger.info(util.inspect(_obj));
|
|
|
|
dbCouch.insert(_obj, function(err, body,header) {
|
|
if (err) {
|
|
logger.error('Error updating into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Update done..');
|
|
}
|
|
var doInsertBookmark = (obj) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
insertBookmark(obj);
|
|
};
|
|
|
|
var doUpdateBookmark = (obj, _id, _rev) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
updateBookmark(obj,_id, _rev);
|
|
};
|
|
|
|
var doGetBookmark = (obj) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRedo = (obj) =>{
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRes = (url,res) =>{
|
|
logger.debug('doGetBookmarkRes');
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(url,res);
|
|
};
|
|
|
|
// Events
|
|
busEmitter.on('saveBookmarkData', doInsertBookmark);
|
|
busEmitter.on('updateBookmarkData', doUpdateBookmark);
|
|
busEmitter.on('getBookmark', doGetBookmark);
|
|
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
|
|
|
|
busEmitter.on('getBookmarkRedo', doGetBookmarkRedo);
|
|
|
|
function processBody(body, url, _id, _rev) {
|
|
|
|
var obj, tdihbody, i, urlObj, urlPrefix;
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
// try to find a body to grab
|
|
|
|
i = 0;
|
|
|
|
while (($(generics[i]).length == 0) && (i < generics.length)) {
|
|
i++;
|
|
}
|
|
logger.debug(i);
|
|
|
|
obj = {};
|
|
if (i < generics.length) {
|
|
tdihbody = $(generics[i]);
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
} else {
|
|
// bah. nothing to reduce so just grab the body, tidy it and use that
|
|
tdihbody = $('BODY');
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
}
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
try {
|
|
tdihbody.find('IMG').each(function (i, elem) {
|
|
let s, src = $(this).attr("src");
|
|
|
|
if (!STRING(src).startsWith('http')) {
|
|
src = urlPrefix + STRING(src).stripLeft('/').s;
|
|
}
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr("src", s);
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = url;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
|
|
if (_id !== null )
|
|
{
|
|
busEmitter.emit("updateBookmarkData", obj, _id, _rev);
|
|
}
|
|
else
|
|
{
|
|
busEmitter.emit("saveBookmarkData", obj);
|
|
}
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
function genericGrab(obj,res) {
|
|
|
|
var url = obj.url;
|
|
var _id = obj._id || null;
|
|
var _ver = obj._rev || null;
|
|
|
|
logger.info(url);
|
|
logger.info(_id);
|
|
logger.info(_ver);
|
|
|
|
request(url, function (err, resp, body) {
|
|
if (err)
|
|
throw err;
|
|
|
|
if (resp.headers.hasOwnProperty('content-encoding')) {
|
|
logger.warn('content-encoding');
|
|
if (resp.headers['content-encoding'] == 'gzip') {
|
|
|
|
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
|
|
|
|
|
|
var gunzip = zlib.createGunzip();
|
|
var jsonString = '';
|
|
resp.pipe(gunzip);
|
|
gunzip.on('data', function (chunk) {
|
|
jsonString += chunk;
|
|
});
|
|
gunzip.on('end', function () {
|
|
console.log((jsonString));
|
|
callback(JSON.stringify(jsonString));
|
|
});
|
|
gunzip.on('error', function (e) {
|
|
console.log(e);
|
|
});
|
|
}
|
|
else
|
|
{
|
|
var b = processBody(body,url, _id, _ver);
|
|
if (res != null)
|
|
{
|
|
res.render('grabbed');
|
|
}
|
|
}
|
|
|
|
} else
|
|
{
|
|
var b = processBody(body,url, _id, _ver);
|
|
if (res != null)
|
|
{
|
|
res.render('grabbed',{data:b});
|
|
}
|
|
}
|
|
|
|
});
|
|
}
|
|
|
|
router.get('/list', function (req, res) {
|
|
logger.debug('list..');
|
|
|
|
dbCouch.view('titles','titles',function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = [];
|
|
body.rows.forEach(function(doc) {
|
|
outJSON.push({id:doc.id, title:doc.value })
|
|
});
|
|
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({list: outJSON}));
|
|
|
|
} else
|
|
{
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
});
|
|
|
|
router.get('/entry/:id', function (req, res) {
|
|
logger.debug('entry..');
|
|
|
|
logger.debug(req.params.id);
|
|
|
|
dbCouch.get(req.params.id,function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = {};
|
|
outJSON._id = body._id;
|
|
outJSON._rev = body._rev;
|
|
outJSON.title = body.title;
|
|
outJSON.reduced = body.reduced;
|
|
outJSON.url = body.url;
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify(outJSON));
|
|
|
|
} else
|
|
{
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
|
|
});
|
|
|
|
router.post('/add', function (req, res) {
|
|
logger.debug('add entry..');
|
|
|
|
var t = req.body;
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = JSON.parse(t.url.toString());
|
|
logger.debug(url);
|
|
busEmitter.emit("getBookmark", t);
|
|
}
|
|
else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
router.post('/redo', function (req, res) {
|
|
logger.debug('redoing entry..');
|
|
|
|
var t = req.body;
|
|
console.log(t);
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = t.url.toString();
|
|
logger.debug(url);
|
|
busEmitter.emit("getBookmark", t);
|
|
}
|
|
else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
|
|
|
|
router.get('/new', function (req, res) {
|
|
logger.debug('Save new');
|
|
busEmitter.emit("getBookmarkRes", req.query.url ,res);
|
|
|
|
});
|
|
|
|
module.exports = router;
|