347 lines
7.7 KiB
JavaScript
347 lines
7.7 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Created by Martin on 22/02/2016.
|
|
*/
|
|
var express = require('express');
|
|
var http = require('http'), request = require('request'), cheerio = require(
|
|
'cheerio'), util = require('util');
|
|
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require(
|
|
'string');
|
|
var zlib = require("zlib");
|
|
var log4js = require('log4js');
|
|
var logger = log4js.getLogger();
|
|
var URL = require('url');
|
|
|
|
var router = express.Router();
|
|
|
|
var EventEmitter = require('events');
|
|
|
|
var nano = require('nano')('http://localhost:5984');
|
|
var busEmitter = new EventEmitter();
|
|
|
|
var db_name = 'keeper';
|
|
var dbCouch = nano.use(db_name);
|
|
|
|
var bodyfile = __dirname + '/' + 'body.html';
|
|
var htmlfile = __dirname + '/' + 'testoutput.html';
|
|
var generics = [
|
|
'ARTICLE',
|
|
'div.content_column',
|
|
'div.post',
|
|
'div.page',
|
|
'#recipe-single'
|
|
];
|
|
|
|
function cleaner(b) {
|
|
var _b = b;
|
|
|
|
var unwanted = [
|
|
'div#disqus_thread',
|
|
'SCRIPT',
|
|
'FOOTER',
|
|
'div.ssba',
|
|
'.shareaholic-canvas',
|
|
'.yarpp-related',
|
|
'div.dfad',
|
|
'div.postFooterShare',
|
|
'div#nextPrevLinks',
|
|
'.post-comments',
|
|
'HEADER',
|
|
'.post-title',
|
|
'#side-menu',
|
|
'.footer-container',
|
|
'#pre-footer',
|
|
'#cakephp-global-navigation',
|
|
'.masthead',
|
|
'.breadcrumb-header',
|
|
'.single-recipe-sidebar',
|
|
'#recipe-related-videos'
|
|
];
|
|
|
|
for (var i = 0; i < unwanted.length; i++) {
|
|
_b.find(unwanted[i]).remove();
|
|
}
|
|
|
|
return _b;
|
|
}
|
|
|
|
function insertBookmark(obj) {
|
|
logger.debug('Inserting into couch...');
|
|
logger.info(util.inspect(obj));
|
|
dbCouch.insert(obj, function(err, body, header) {
|
|
if (err) {
|
|
logger.error('Error inserting into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Insert done..');
|
|
}
|
|
|
|
function updateBookmark(obj, _id, _rev) {
|
|
logger.debug('Updating couch...');
|
|
var _obj = obj;
|
|
_obj._id = _id;
|
|
_obj._rev = _rev;
|
|
|
|
dbCouch.insert(_obj, function(err, body, header) {
|
|
if (err) {
|
|
logger.error('Error updating into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Update done..');
|
|
}
|
|
var doInsertBookmark = (obj) => {
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
insertBookmark(obj);
|
|
};
|
|
|
|
var doUpdateBookmark = (obj, _id, _rev) => {
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
updateBookmark(obj, _id, _rev);
|
|
};
|
|
|
|
var doGetBookmark = (obj) => {
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRedo = (obj) => {
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRes = (url, res) => {
|
|
logger.debug('doGetBookmarkRes');
|
|
// logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(url, res);
|
|
};
|
|
|
|
// Events
|
|
busEmitter.on('saveBookmarkData', doInsertBookmark);
|
|
busEmitter.on('updateBookmarkData', doUpdateBookmark);
|
|
busEmitter.on('getBookmark', doGetBookmark);
|
|
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
|
|
|
|
busEmitter.on('getBookmarkRedo', doGetBookmarkRedo);
|
|
|
|
function processBody(body, url, _id, _rev) {
|
|
|
|
var obj, tdihbody, i, urlObj, urlPrefix;
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
// try to find a body to grab
|
|
|
|
i = 0;
|
|
|
|
while (($(generics[i]).length == 0) && (i < generics.length)) {
|
|
i++;
|
|
}
|
|
logger.debug(i);
|
|
|
|
obj = {};
|
|
if (i < generics.length) {
|
|
tdihbody = $(generics[i]);
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
}
|
|
else {
|
|
// bah. nothing to reduce so just grab the body, tidy it and use that
|
|
tdihbody = $('BODY');
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
}
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
try {
|
|
tdihbody.find('IMG').each(function(i, elem) {
|
|
let s, src = $(this).attr("src");
|
|
|
|
if (!STRING(src).startsWith('http')) {
|
|
src = urlPrefix + STRING(src).stripLeft('/').s;
|
|
}
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr("src", s);
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = url;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
|
|
if (_id !== null) {
|
|
busEmitter.emit("updateBookmarkData", obj, _id, _rev);
|
|
}
|
|
else {
|
|
busEmitter.emit("saveBookmarkData", obj);
|
|
}
|
|
|
|
return obj;
|
|
|
|
}
|
|
function genericGrab(obj, res) {
|
|
|
|
var url, _id = null, _ver = null;
|
|
|
|
if (typeof obj === 'string') {
|
|
logger.info(obj);
|
|
url = obj;
|
|
}
|
|
else {
|
|
url = obj.url;
|
|
_id = obj._id || null;
|
|
_ver = obj._rev || null;
|
|
}
|
|
|
|
logger.warn(typeof obj);
|
|
|
|
logger.info(url);
|
|
logger.info(_id);
|
|
logger.info(_ver);
|
|
|
|
request(url, function(err, resp, body) {
|
|
if (err)
|
|
throw err;
|
|
|
|
if (resp.headers.hasOwnProperty('content-encoding')) {
|
|
logger.warn('content-encoding');
|
|
if (resp.headers['content-encoding'] == 'gzip') {
|
|
|
|
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
|
|
|
|
var gunzip = zlib.createGunzip();
|
|
var jsonString = '';
|
|
resp.pipe(gunzip);
|
|
gunzip.on('data', function(chunk) {
|
|
jsonString += chunk;
|
|
});
|
|
gunzip.on('end', function() {
|
|
console.log((jsonString));
|
|
callback(JSON.stringify(jsonString));
|
|
});
|
|
gunzip.on('error', function(e) {
|
|
console.log(e);
|
|
});
|
|
}
|
|
else {
|
|
var b = processBody(body, url, _id, _ver);
|
|
if (res != null) {
|
|
res.render('grabbed');
|
|
}
|
|
}
|
|
|
|
}
|
|
else {
|
|
var b = processBody(body, url, _id, _ver);
|
|
if (res != null) {
|
|
res.render('grabbed', {data: b});
|
|
}
|
|
}
|
|
|
|
});
|
|
}
|
|
|
|
router.get('/list', function(req, res) {
|
|
logger.debug('list..');
|
|
|
|
dbCouch.view('titles', 'titles', function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = [];
|
|
body.rows.forEach(function(doc) {
|
|
outJSON.push({id: doc.id, title: doc.value})
|
|
});
|
|
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({list: outJSON}));
|
|
|
|
}
|
|
else {
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
});
|
|
|
|
router.get('/entry/:id', function(req, res) {
|
|
logger.debug('entry..');
|
|
|
|
logger.debug(req.params.id);
|
|
|
|
dbCouch.get(req.params.id, function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = {};
|
|
outJSON._id = body._id;
|
|
outJSON._rev = body._rev;
|
|
outJSON.title = body.title;
|
|
outJSON.reduced = body.reduced;
|
|
outJSON.url = body.url;
|
|
//logger.debug(util.inspect(body));
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify(outJSON));
|
|
|
|
}
|
|
else {
|
|
res.writeHead(500, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
|
|
});
|
|
|
|
router.post('/add', function(req, res) {
|
|
logger.debug('add entry..');
|
|
|
|
var t = req.body;
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = JSON.parse(t.url.toString());
|
|
logger.debug(url);
|
|
busEmitter.emit("getBookmark", t);
|
|
}
|
|
else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
router.post('/redo', function(req, res) {
|
|
logger.debug('redoing entry..');
|
|
|
|
var t = req.body;
|
|
console.log(t);
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = t.url.toString();
|
|
logger.debug(url);
|
|
busEmitter.emit("getBookmark", t);
|
|
}
|
|
else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {"ContentType": "application/json"});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
router.get('/new', function(req, res) {
|
|
logger.debug('Save new');
|
|
busEmitter.emit("getBookmarkRes", req.query.url, res);
|
|
|
|
});
|
|
|
|
module.exports = router;
|