keeper/server/keeper.js
2016-12-22 09:31:08 +00:00

820 lines
19 KiB
JavaScript

'use strict';
/**
* Created by Martin on 22/02/2016.
*/
var express = require('express');
var http = require('http'), request = require('request'), cheerio = require(
'cheerio'), util = require('util');
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require(
'string');
var converter = require('html-to-markdown');
var zlib = require('zlib');
var log4js = require('log4js');
var logger = log4js.getLogger();
var URL = require('url');
var router = express.Router();
var EventEmitter = require('events');
//var nano = require('nano')('http://martind2000:1V3D4m526i@localhost:5984');
var busEmitter = new EventEmitter();
var db_name = 'keeper';
//var dbCouch = nano.use(db_name);
/*
We've moved to cloudant through IBM Bluemix for the database
https://25f854ee-1b51-49ff-acd9-5b0ff478d944-bluemix.cloudant.com/dashboard.html#usage
*/
var credentials = {
"username": "25f854ee-1b51-49ff-acd9-5b0ff478d944-bluemix",
"password": "8e417af1b0462ca55726848846cc6b8696fc76defe9d1864cbc334be59549e0c",
"host": "25f854ee-1b51-49ff-acd9-5b0ff478d944-bluemix.cloudant.com",
"port": 443,
"url": "https://25f854ee-1b51-49ff-acd9-5b0ff478d944-bluemix:8e417af1b0462ca55726848846cc6b8696fc76defe9d1864cbc334be59549e0c@25f854ee-1b51-49ff-acd9-5b0ff478d944-bluemix.cloudant.com",
"database" : "keeper"
};
var Cloudant = require('cloudant');
var cloudant = Cloudant({account:credentials.username, password:credentials.password});
var dbCloudant = cloudant.db.use(credentials.database);
var jsonFile = __dirname + '/' + 'output.json';
var bodyfile = __dirname + '/' + 'body.html';
var htmlfile = __dirname + '/' + 'testoutput.html';
var generics = [
'ARTICLE',
'div.content_column',
'div.post',
'div.page',
'#recipe-single',
'div.content.body'
];
var specialHandlers = [{
url: 'www.reddit.com', fn: function(body, url) {
return doReddit(body, url);
}
},
{
url: 'developer.android.com', fn: function(body, url) {
return doAndroidDeveloper(body, url);
}
},
{
url: 'www.engadget.com', fn: function(body, url) {
return doEngadget(body, url);
}
}
];
function cleaner(b) {
var _b = b;
var unwanted = [
'LINK',
'META',
'TITLE',
'div#disqus_thread',
'SCRIPT',
'FOOTER',
'div.ssba',
'.shareaholic-canvas',
'.yarpp-related',
'div.dfad',
'div.postFooterShare',
'div#nextPrevLinks',
'.post-comments',
'HEADER',
'.post-title',
'#side-menu',
'.footer-container',
'#pre-footer',
'#cakephp-global-navigation',
'.masthead',
'.breadcrumb-header',
'.single-recipe-sidebar',
'#recipe-related-videos',
'#tnav',
'.footer',
'#tb-wrapper',
'#comments',
'#menu'
];
for (var i = 0; i < unwanted.length; i++) {
_b.find(unwanted[i]).remove();
}
return _b;
}
function insertBookmark(obj) {
logger.debug('Inserting into couch...');
logger.info(util.inspect(obj));
// dbCouch.insert(obj, function(err, body, header) {
dbCloudant.insert(obj, function(err, body, header) {
if (err) {
logger.error('Error inserting into couch');
return;
}
});
logger.debug('Insert done..');
}
function updateBookmark(obj, _id, _rev) {
logger.debug('Updating couch...');
var _obj = obj;
_obj._id = _id;
_obj._rev = _rev;
//dbCouch.insert(_obj, function(err, body, header) {
dbCloudant.insert(_obj, function(err, body, header) {
if (err) {
logger.error('Error updating into couch');
return;
} else {
logger.info('I think we updated ok...');
busEmitter.emit('updateTagsDB');
}
});
logger.debug('Update done..');
}
var doInsertBookmark = (obj) => {
// Logger.info('sendSocket: ' + JSON.stringify(obj));
insertBookmark(obj);
};
var doUpdateBookmark = (obj, _id, _rev) => {
// Logger.info('sendSocket: ' + JSON.stringify(obj));
updateBookmark(obj, _id, _rev);
};
var doGetBookmark = (obj) => {
// Logger.info('sendSocket: ' + JSON.stringify(obj));
genericGrab(obj);
};
var doGetBookmarkRedo = (obj) => {
// Logger.info('sendSocket: ' + JSON.stringify(obj));
genericGrab(obj);
};
var doGetBookmarkRes = (url, res) => {
logger.debug('doGetBookmarkRes');
// Logger.info('sendSocket: ' + JSON.stringify(obj));
genericGrab(url, res);
};
var doUpdateTagsDB = () => {
logger.debug('Update the tags database...');
// dbCouch.view('getAllTags', 'getAllTags', function(err, body) {
dbCloudant.view('getAllTags', 'getAllTags', function(err, body) {
var masterList = [];
if (!err) {
body.rows.forEach(function(doc) {
masterList = masterList.concat(doc.value);
});
masterList = masterList.filter((value, index, self) => {
return self.indexOf(value) === index;
});
//dbCouch.view('taglist', 'taglist', function(err, body) {
dbCloudant.view('taglist', 'taglist', function(err, body) {
// Logger.debug(body);
if (!err) {
var outJSON = {};
body.rows.forEach(function(doc) {
doSaveTagsDB(doc.value, masterList);
});
} else {
logger.error('NO TAG LIST EXISTS');
}
});
} else {
}
});
};
var doSaveTagsDB = (orig, newList) => {
logger.debug('doSaveTagsDB');
var _obj = orig;
_obj.taglist = newList;
//dbCouch.insert(_obj, function(err, body, header) {
dbCloudant.insert(_obj, function(err, body, header) {
if (err) {
logger.error('Error updating into couch');
return;
} else {
logger.info('Updated the tags list...');
}
});
};
// Events
busEmitter.on('saveBookmarkData', doInsertBookmark);
busEmitter.on('updateBookmarkData', doUpdateBookmark);
busEmitter.on('getBookmark', doGetBookmark);
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
busEmitter.on('getBookmarkRedo', doGetBookmarkRedo);
busEmitter.on('updateTagsDB', doUpdateTagsDB);
busEmitter.on('saveTagsDB', doSaveTagsDB);
function doEngadget(body, url) {
logger.info('GRABBING Engadget');
var obj = {}, tdihbody, i, urlObj, urlPrefix;
var $ = cheerio.load(body);
var title = $('TITLE').text();
tdihbody = $('DIV#page_body');
logger.debug('Length:' , tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug('Title: ', title);
urlObj = URL.parse(url);
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
try {
tdihbody.find('IMG').each(function(i, elem) {
let s, src = $(this).attr('src');
if (src !== null) {
if (!STRING(src).startsWith('http')) {
logger.debug('Stripping:' + src);
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
}
if (typeof obj.thumbnail === 'undefined') {
obj.thumbnail = src;
}
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
$(this).attr('src', s);
}
});
}
catch (e) {
logger.error(e);
}
obj.url = STRING(url).trim().s;
obj.html = $.html();
obj.reduced = STRING(tdihbody.html()).trim().s;
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
obj.title = STRING(title).collapseWhitespace().s;
obj.markdown = converter.convert(obj.reduced);
return obj;
}
function doAndroidDeveloper(body, url) {
logger.info('GRABBING AndroidDeveloper');
var obj = {}, tdihbody, i, urlObj, urlPrefix;
var $ = cheerio.load(body);
var title = $('TITLE').text();
tdihbody = $('DIV.jd-descr');
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
urlObj = URL.parse(url);
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
try {
tdihbody.find('IMG').each(function(i, elem) {
let s, src = $(this).attr('src');
if (src !== null) {
if (!STRING(src).startsWith('http')) {
logger.debug('Stripping:' + src);
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
}
if (typeof obj.thumbnail === 'undefined') {
obj.thumbnail = src;
}
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
$(this).attr('src', s);
}
});
}
catch (e) {
logger.error(e);
}
obj.url = STRING(url).trim().s;
obj.html = $.html();
obj.reduced = STRING(tdihbody.html()).trim().s;
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
obj.title = STRING(title).collapseWhitespace().s;
obj.markdown = converter.convert(obj.reduced);
return obj;
}
function doReddit(body, url) {
logger.info('GRABBING REDDIT');
var obj = {}, tdihbody, i, urlObj, urlPrefix;
var $ = cheerio.load(body);
var title = $('TITLE').text();
tdihbody = $('DIV.entry');
tdihbody.find('A.thumbnail').each(function(i, elem) {
logger.warn($(this));
});
logger.info('++++++');
// Logger.debug(tdihbody.html());
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
obj.url = STRING(url).trim().s;
obj.html = $.html();
obj.reduced = STRING(tdihbody.html()).trim().s;
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
obj.title = STRING(title).collapseWhitespace().s;
obj.markdown = converter.convert(obj.reduced);
return obj;
}
function genericProcessor(body, url) {
logger.info('USING DEFAULT PROCESSOR');
var obj = {}, tdihbody, i, urlObj, urlPrefix;
var $ = cheerio.load(body);
var title = $('TITLE').text();
i = 0;
while (($(generics[i]).length == 0) && (i < generics.length)) {
i++;
}
logger.debug(i);
if (i < generics.length) {
logger.warn('Used a generic');
tdihbody = $(generics[i]);
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
} else {
logger.warn('Using whole body');
// Bah. nothing to reduce so just grab the body, tidy it and use that
tdihbody = $('BODY');
if (tdihbody.length === 0) {
tdihbody = $(':root');
}
logger.debug(tdihbody.length);
tdihbody = cleaner(tdihbody);
logger.debug(title);
}
// Logger.info(util.inspect(tdihbody));
urlObj = URL.parse(url);
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
try {
tdihbody.find('IMG').each(function(i, elem) {
let s, src = $(this).attr('src');
console.log('!!!!' + src);
if (src !== null && typeof src !== 'undefined') {
if (!STRING(src).startsWith('http')) {
logger.debug('Stripping:' + src);
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
}
if (typeof obj.thumbnail === 'undefined') {
obj.thumbnail = src;
}
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
$(this).attr('src', s);
}
});
}
catch (e) {
logger.error(e);
}
obj.url = STRING(url).trim().s;
obj.html = $.html();
obj.reduced = STRING(tdihbody.html()).trim().s;
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
obj.title = STRING(title).collapseWhitespace().s;
obj.markdown = converter.convert(obj.reduced);
return obj;
}
function processBody(body, url, _id, _rev) {
var obj = {}, i, urlObj, urlPrefix;
// Try to find a body to grab
urlObj = URL.parse(url);
logger.debug('host:', urlObj.host);
var flag;
for (i = 0;i < specialHandlers.length;i++) {
if (urlObj.host === specialHandlers[i].url) {
flag = true;
obj = specialHandlers[i].fn(body,url);
}
}
if (!flag) {
// Do generic processing
obj = genericProcessor(body,url);
}
// Logger.warn(obj.reduced);
obj.host = urlObj.host;
/* Jsonfile.writeFile(jsonFile, obj, function (err) {
console.error(err);
});*/
if (_id !== null) {
busEmitter.emit('updateBookmarkData', obj, _id, _rev);
} else {
busEmitter.emit('saveBookmarkData', obj);
}
return obj;
}
function genericGrab(obj, res) {
var url, _id = null, _ver = null;
if (typeof obj === 'string') {
logger.info(obj);
url = obj;
} else {
url = obj.url;
_id = obj._id || null;
_ver = obj._rev || null;
}
logger.warn(typeof obj);
logger.info(url);
logger.info(_id);
logger.info(_ver);
var options = {
url: url,
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'
},
jar: true,
followRedirect: true,
followAllRedirects: true
};
request(options, function(err, resp, body) {
if (err)
throw err;
if (resp.headers.hasOwnProperty('content-encoding')) {
logger.warn('content-encoding');
if (resp.headers['content-encoding'] == 'gzip') {
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
var gunzip = zlib.createGunzip();
var jsonString = '';
resp.pipe(gunzip);
gunzip.on('data', function(chunk) {
jsonString += chunk;
});
gunzip.on('end', function() {
// Console.log((jsonString));
callback(JSON.stringify(jsonString));
});
gunzip.on('error', function(e) {
console.log(e);
});
} else {
var b = processBody(body, url, _id, _ver);
if (res != null) {
res.render('grabbed');
}
}
} else {
var b = processBody(body, url, _id, _ver);
if (res != null) {
res.render('grabbed', {data: b});
}
}
});
}
router.get('/pocket', function(req, res) {
logger.debug('list..');
// dbCouch.view('pocketList', 'pocketList', function(err, body) {
dbCloudant.view('pocketList', 'pocketList', function(err, body) {
if (!err) {
var outJSON = [];
body.rows.forEach(function(doc) {
var obj = {id: doc.id, entry: doc.value};
console.log(typeof obj.entry.tn);
if (typeof obj.entry.tn === 'string') {
console.log('its a string:', typeof obj.entry.tn)
obj.entry.tn = 'http://image.silvrtree.co.uk/100,fit,q80/' + obj.entry.tn;
} else {
obj.entry.tn = 'gfx/fm.png';
}
outJSON.push(obj);
});
logger.debug(util.inspect(body));
logger.info(util.inspect(outJSON));
res.render('pocket', {data: outJSON});
} else {
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
});
router.get('/list', function(req, res) {
logger.debug('list..');
//dbCouch.view('titles', 'titles', function(err, body) {
dbCloudant.view('titles', 'titles', function(err, body) {
if (!err) {
var outJSON = [];
body.rows.forEach(function(doc) {
outJSON.push({id: doc.id, title: doc.value});
});
//Logger.debug(util.inspect(body));
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify({list: outJSON}));
} else {
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
});
router.get('/entry/:id', function(req, res) {
logger.debug('entry..');
logger.debug(req.params.id);
//dbCouch.get(req.params.id, function(err, body) {
dbCloudant.get(req.params.id, function(err, body) {
if (!err) {
var outJSON = {};
outJSON._id = body._id;
outJSON._rev = body._rev;
outJSON.title = body.title;
outJSON.reduced = body.reduced;
outJSON.url = body.url;
outJSON.tags = body.tags || {solid: '', list: []};
//Logger.debug(util.inspect(body));
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify(outJSON));
} else {
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
});
router.route('/tags')
.get(function(req, res, next) {
logger.debug('tag list..');
logger.debug(req.params.id);
//dbCouch.view('taglist', 'taglist', function(err, body) {
dbCloudant.view('taglist', 'taglist', function(err, body) {
if (!err) {
logger.debug(body);
var outJSON = [];
body.rows.forEach(function(doc) {
logger.info(doc.value.taglist);
if (doc.value[0] == req.params.id) {
outJSON = doc.value.taglist.sort();
}
});
//Logger.debug(util.inspect(body));
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify({list: outJSON}));
} else {
logger.error(err);
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
}).post(function(req, res, next) {
var t = req.body;
console.log(t);
logger.info('regetting:' + req.body._id);
//dbCouch.get(req.body._id, function(err, body) {
dbCloudant.get(req.body._id, function(err, body) {
if (!err) {
var obj = {};
obj.url = body.url;
obj.html = body.html;
obj.reduced = body.reduced;
obj.title = body.title;
obj.tags = req.body.tags;
logger.info('Updating...');
busEmitter.emit('updateBookmarkData', obj, body._id, body._rev, res);
var outJSON = {};
outJSON._id = body._id;
outJSON._rev = body._rev;
outJSON.title = body.title;
outJSON.reduced = body.reduced;
outJSON.url = body.url;
outJSON.tags = req.body.tags;
//Logger.debug(util.inspect(body));
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify(outJSON));
} else {
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
});
router.get('/tags/:id', function(req, res) {
logger.debug('entry..');
logger.debug(req.params.id);
//dbCouch.view('getTagByKey', 'getTagByKey', function(err, body) {
dbCloudant.view('getTagByKey', 'getTagByKey', function(err, body) {
if (!err) {
// Logger.debug(body);
var outJSON = [];
body.rows.forEach(function(doc) {
// Logger.debug(doc);
if (doc.value[0] == req.params.id) {
outJSON.push({id: doc.id, title: doc.value[1]})
}
});
//Logger.debug(util.inspect(body));
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify({list: outJSON}));
} else {
logger.error(err);
res.writeHead(500, {ContentType: 'application/json'});
res.end(JSON.stringify({}));
}
});
});
router.post('/add', function(req, res) {
logger.debug('add entry..');
var t = req.body;
if (t.hasOwnProperty('url')) {
var url = JSON.parse(t.url.toString());
logger.debug(url);
busEmitter.emit('getBookmark', t);
} else {
logger.error('No data block!');
}
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify({adding: url}));
});
router.post('/redo', function(req, res) {
logger.debug('redoing entry..');
var t = req.body;
console.log(t);
if (t.hasOwnProperty('url')) {
var url = t.url.toString();
logger.debug(url);
busEmitter.emit('getBookmark', t);
} else {
logger.error('No data block!');
}
res.writeHead(200, {ContentType: 'application/json'});
res.end(JSON.stringify({adding: url}));
});
router.route('/new')
.get(function(req, res, next) {
logger.debug('Save new');
busEmitter.emit('getBookmarkRes', req.query.url, res);
}).post(function(req, res, next) {
logger.debug('Posted Save new');
logger.info(req.body);
if (Object.keys(req.body).length !== 0) {
busEmitter.emit('getBookmarkRes', req.body.url, res);
} else {
res.status(422).end();
}
});
busEmitter.emit('updateTagsDB');
module.exports = router;