mirror of
https://gitlab.silvrtree.co.uk/martind2000/recipes.git
synced 2025-01-25 17:56:17 +00:00
599 lines
13 KiB
JavaScript
599 lines
13 KiB
JavaScript
'use strict';
|
|
/**
|
|
* Created by Martin on 22/02/2016.
|
|
*/
|
|
var express = require('express');
|
|
var http = require('http'), request = require('request'), cheerio = require(
|
|
'cheerio'), util = require('util');
|
|
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require(
|
|
'string');
|
|
var converter = require('html-to-markdown');
|
|
var markdown = require('markdown').markdown;
|
|
var zlib = require('zlib');
|
|
var log4js = require('log4js');
|
|
var logger = log4js.getLogger();
|
|
var URL = require('url');
|
|
|
|
var router = express.Router();
|
|
|
|
var EventEmitter = require('events');
|
|
|
|
//var nano = require('nano')('http://localhost:5984');
|
|
var nano = require('nano')('http://martind2000:1V3D4m526i@localhost:5984');
|
|
var busEmitter = new EventEmitter();
|
|
|
|
var db_name = 'recipes';
|
|
var dbCouch = nano.use(db_name);
|
|
|
|
var Keeper = require('./keeperService')(busEmitter);
|
|
console.log(Keeper);
|
|
|
|
var grabbers = require('./grabbers');
|
|
console.log(grabbers);
|
|
|
|
var jsonFile = __dirname + '/' + 'output.json';
|
|
var bodyfile = __dirname + '/' + 'body.html';
|
|
var htmlfile = __dirname + '/' + 'testoutput.html';
|
|
var generics = [
|
|
'ARTICLE',
|
|
'div.content_column',
|
|
'div.post',
|
|
'div.page',
|
|
'#recipe-single',
|
|
'div.content.body',
|
|
'div.container'
|
|
];
|
|
|
|
var specialHandlers = [{
|
|
url: 'www.reddit.com', fn: function(body, url) {
|
|
return grabbers.doReddit(body, url);
|
|
}
|
|
},
|
|
{
|
|
url: 'developer.android.com', fn: function(body, url) {
|
|
return grabbers.doAndroidDeveloper(body, url);
|
|
}
|
|
},
|
|
{
|
|
url: 'www.engadget.com', fn: function(body, url) {
|
|
return grabbers.doEngadget(body, url);
|
|
}
|
|
}
|
|
,
|
|
{
|
|
url: 'www.bbcgoodfood.com', fn: function(body, url) {
|
|
return grabbers.doBBCGoodFood(body, url);
|
|
}
|
|
}
|
|
];
|
|
|
|
|
|
|
|
function cleaner(b) {
|
|
var _b = b;
|
|
|
|
var unwanted = [
|
|
'LINK',
|
|
'META',
|
|
'TITLE',
|
|
'div#disqus_thread',
|
|
'SCRIPT',
|
|
'FOOTER',
|
|
'div.ssba',
|
|
'.shareaholic-canvas',
|
|
'.yarpp-related',
|
|
'div.dfad',
|
|
'div.postFooterShare',
|
|
'div#nextPrevLinks',
|
|
'.post-comments',
|
|
'HEADER',
|
|
'.post-title',
|
|
'#side-menu',
|
|
'.footer-container',
|
|
'#pre-footer',
|
|
'#cakephp-global-navigation',
|
|
'.masthead',
|
|
'.breadcrumb-header',
|
|
'.single-recipe-sidebar',
|
|
'#recipe-related-videos',
|
|
'#tnav',
|
|
'.footer',
|
|
'#tb-wrapper',
|
|
'#comments',
|
|
'#menu',
|
|
'aside',
|
|
'#ad-mpu-premium-1-mobile',
|
|
'#recipetools',
|
|
'.adsense-ads-separator',
|
|
'.comments',
|
|
'.related-content',
|
|
'.tip-wrapper',
|
|
'#recipe-related-video-mobile',
|
|
'.float-wrapper',
|
|
'.source-jamie',
|
|
'.ad.mobile',
|
|
'.foodity-wrapper',
|
|
'#ad-most-watched-mobile',
|
|
'#sticky',
|
|
'.nutrition-expand',
|
|
'.grid-list-wrapper',
|
|
'#recipe-finder__box',
|
|
'.browser-upgrade-alert-message',
|
|
'.main-menu',
|
|
'.recipe-media',
|
|
'.method-mobile-prompt-wrapper',
|
|
'.sharebox-wrapper',
|
|
'.jumbotron',
|
|
'.slideshow',
|
|
'.top-cat-recipe',
|
|
'.analytic-box',
|
|
'.recipe-reviews',
|
|
'.recipe-tools',
|
|
'.promo-module',
|
|
'.widgettitle',
|
|
'.post_related'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
];
|
|
|
|
for (var i = 0; i < unwanted.length; i++) {
|
|
_b.find(unwanted[i]).remove();
|
|
}
|
|
|
|
return _b;
|
|
}
|
|
|
|
function insertBookmark(obj) {
|
|
logger.debug('Inserting into couch...');
|
|
logger.info(util.inspect(obj));
|
|
dbCouch.insert(obj, function(err, body, header) {
|
|
if (err) {
|
|
logger.error('Error inserting into couch');
|
|
return;
|
|
}
|
|
});
|
|
logger.debug('Insert done..');
|
|
}
|
|
|
|
function updateBookmark(obj, _id, _rev) {
|
|
logger.debug('Updating couch...');
|
|
var _obj = obj;
|
|
_obj._id = _id;
|
|
_obj._rev = _rev;
|
|
|
|
dbCouch.insert(_obj, function(err, body, header) {
|
|
if (err) {
|
|
logger.error('Error updating into couch');
|
|
return;
|
|
} else {
|
|
logger.info('I think we updated ok...');
|
|
busEmitter.emit('updateTagsDB');
|
|
|
|
}
|
|
});
|
|
logger.debug('Update done..');
|
|
}
|
|
var doInsertBookmark = (obj) => {
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
insertBookmark(obj);
|
|
};
|
|
|
|
var doUpdateBookmark = (obj, _id, _rev) => {
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
updateBookmark(obj, _id, _rev);
|
|
};
|
|
|
|
var doGetBookmark = (obj) => {
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRedo = (obj) => {
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(obj);
|
|
};
|
|
|
|
var doGetBookmarkRes = (url, res) => {
|
|
logger.debug('doGetBookmarkRes');
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
genericGrab(url, res);
|
|
};
|
|
|
|
var doSaveNew = (obj) => {
|
|
logger.debug('doGetBookmarkRes');
|
|
// Logger.info('sendSocket: ' + JSON.stringify(obj));
|
|
saveNew(obj);
|
|
};
|
|
|
|
var doUpdateTagsDB = () => {
|
|
logger.debug('Update the tags database...');
|
|
|
|
dbCouch.view('getAllTags', 'getAllTags', function(err, body) {
|
|
var masterList = [];
|
|
if (!err) {
|
|
body.rows.forEach(function(doc) {
|
|
|
|
masterList = masterList.concat(doc.value);
|
|
});
|
|
|
|
masterList = masterList.filter((value, index, self) => {
|
|
return self.indexOf(value) === index;
|
|
});
|
|
|
|
dbCouch.view('taglist', 'taglist', function(err, body) {
|
|
// Logger.debug(body);
|
|
if (!err) {
|
|
|
|
var outJSON = {};
|
|
|
|
body.rows.forEach(function(doc) {
|
|
doSaveTagsDB(doc.value, masterList);
|
|
});
|
|
|
|
} else {
|
|
logger.error('NO TAG LIST EXISTS');
|
|
}
|
|
});
|
|
|
|
} else {
|
|
|
|
}
|
|
});
|
|
|
|
};
|
|
|
|
var doSaveTagsDB = (orig, newList) => {
|
|
logger.debug('doSaveTagsDB');
|
|
|
|
var _obj = orig;
|
|
|
|
_obj.taglist = newList;
|
|
|
|
dbCouch.insert(_obj, function(err, body, header) {
|
|
if (err) {
|
|
logger.error('Error updating into couch');
|
|
return;
|
|
} else {
|
|
logger.info('Updated the tags list...');
|
|
|
|
}
|
|
});
|
|
};
|
|
|
|
// Events
|
|
busEmitter.on('saveBookmarkData', doInsertBookmark);
|
|
busEmitter.on('updateBookmarkData', doUpdateBookmark);
|
|
busEmitter.on('getBookmark', doGetBookmark);
|
|
busEmitter.on('getBookmarkRes', doGetBookmarkRes);
|
|
|
|
busEmitter.on('getBookmarkRedo', doGetBookmarkRedo);
|
|
busEmitter.on('updateTagsDB', doUpdateTagsDB);
|
|
busEmitter.on('saveTagsDB', doSaveTagsDB);
|
|
|
|
busEmitter.on('saveNew', doSaveNew);
|
|
|
|
|
|
function saveNew(obj) {
|
|
logger.info('Saving new page');
|
|
|
|
var md = markdown.toHTML(obj.body);
|
|
obj.url = '';
|
|
obj.html = md;
|
|
obj.reduced = STRING(md).trim().s;
|
|
obj.nib = STRING(md).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(obj.title).collapseWhitespace().s;
|
|
obj.markdown = obj.body;
|
|
|
|
busEmitter.emit('saveBookmarkData', obj);
|
|
}
|
|
|
|
|
|
|
|
function genericProcessor(body, url) {
|
|
logger.info('USING DEFAULT PROCESSOR');
|
|
var obj = {}, tdihbody, i, urlObj, urlPrefix;
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
i = 0;
|
|
|
|
while (($(generics[i]).length == 0) && (i < generics.length)) {
|
|
i++;
|
|
}
|
|
logger.debug(i);
|
|
|
|
if (i < generics.length) {
|
|
logger.warn('Used a generic');
|
|
tdihbody = $(generics[i]);
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
} else {
|
|
logger.warn('Using whole body');
|
|
// Bah. nothing to reduce so just grab the body, tidy it and use that
|
|
tdihbody = $('BODY');
|
|
|
|
if (tdihbody.length === 0) {
|
|
|
|
tdihbody = $(':root');
|
|
|
|
}
|
|
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
}
|
|
|
|
// Logger.info(util.inspect(tdihbody));
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
|
|
try {
|
|
tdihbody.find('IMG').each(function(i, elem) {
|
|
let s, src = $(this).attr('src');
|
|
|
|
console.log('!!!!' + src);
|
|
if (src !== null && typeof src !== 'undefined') {
|
|
if (!STRING(src).startsWith('http')) {
|
|
logger.debug('Stripping:' + src);
|
|
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
|
|
}
|
|
|
|
if (typeof obj.thumbnail === 'undefined') {
|
|
obj.thumbnail = src;
|
|
}
|
|
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr('src', s);
|
|
}
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = STRING(url).trim().s;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
obj.markdown = converter.convert(obj.reduced);
|
|
|
|
|
|
return obj;
|
|
}
|
|
function processBody(body, url, _id, _rev) {
|
|
|
|
var obj = {}, i, urlObj, urlPrefix;
|
|
|
|
|
|
|
|
|
|
// Try to find a body to grab
|
|
|
|
urlObj = URL.parse(url);
|
|
|
|
logger.debug('host:', urlObj.host);
|
|
|
|
var flag;
|
|
for (i = 0;i < specialHandlers.length;i++) {
|
|
if (urlObj.host === specialHandlers[i].url) {
|
|
flag = true;
|
|
obj = specialHandlers[i].fn(body,url);
|
|
}
|
|
}
|
|
|
|
if (!flag) {
|
|
// Do generic processing
|
|
obj = genericProcessor(body,url);
|
|
}
|
|
|
|
// Logger.warn(obj.reduced);
|
|
|
|
obj.host = urlObj.host;
|
|
|
|
|
|
/* Jsonfile.writeFile(jsonFile, obj, function (err) {
|
|
console.error(err);
|
|
});*/
|
|
|
|
if (_id !== null) {
|
|
busEmitter.emit('updateBookmarkData', obj, _id, _rev);
|
|
} else {
|
|
busEmitter.emit('saveBookmarkData', obj);
|
|
}
|
|
|
|
return obj;
|
|
|
|
}
|
|
function genericGrab(obj, res) {
|
|
|
|
var url, _id = null, _ver = null;
|
|
|
|
if (typeof obj === 'string') {
|
|
logger.info(obj);
|
|
url = obj;
|
|
} else {
|
|
url = obj.url;
|
|
_id = obj._id || null;
|
|
_ver = obj._rev || null;
|
|
}
|
|
|
|
logger.warn(typeof obj);
|
|
|
|
logger.info(url);
|
|
logger.info(_id);
|
|
logger.info(_ver);
|
|
|
|
var options = {
|
|
url: url,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'
|
|
},
|
|
jar: true,
|
|
followRedirect: true,
|
|
followAllRedirects: true
|
|
};
|
|
|
|
request(options, function(err, resp, body) {
|
|
if (err)
|
|
throw err;
|
|
|
|
if (resp.headers.hasOwnProperty('content-encoding')) {
|
|
logger.warn('content-encoding');
|
|
if (resp.headers['content-encoding'] == 'gzip') {
|
|
|
|
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
|
|
|
|
var gunzip = zlib.createGunzip();
|
|
var jsonString = '';
|
|
resp.pipe(gunzip);
|
|
gunzip.on('data', function(chunk) {
|
|
jsonString += chunk;
|
|
});
|
|
gunzip.on('end', function() {
|
|
// Console.log((jsonString));
|
|
callback(JSON.stringify(jsonString));
|
|
});
|
|
gunzip.on('error', function(e) {
|
|
console.log(e);
|
|
});
|
|
} else {
|
|
var b = processBody(body, url, _id, _ver);
|
|
if (res != null) {
|
|
res.render('grabbed');
|
|
}
|
|
}
|
|
|
|
} else {
|
|
var b = processBody(body, url, _id, _ver);
|
|
if (res != null) {
|
|
res.render('grabbed', {data: b});
|
|
}
|
|
}
|
|
|
|
});
|
|
}
|
|
|
|
router.get('/pocket', function(req, res) {
|
|
logger.debug('list..');
|
|
|
|
|
|
dbCouch.view('pocketList', 'pocketList', function(err, body) {
|
|
if (!err) {
|
|
|
|
var outJSON = [];
|
|
body.rows.forEach(function(doc) {
|
|
var obj = {id: doc.id, entry: doc.value};
|
|
console.log(typeof obj.entry.tn);
|
|
if (typeof obj.entry.tn === 'string') {
|
|
console.log('its a string:', typeof obj.entry.tn)
|
|
obj.entry.tn = 'http://image.silvrtree.co.uk/100,fit,q80/' + obj.entry.tn;
|
|
} else {
|
|
obj.entry.tn = 'gfx/fm.png';
|
|
}
|
|
|
|
outJSON.push(obj);
|
|
|
|
|
|
});
|
|
|
|
logger.debug(util.inspect(body));
|
|
logger.info(util.inspect(outJSON));
|
|
res.render('pocket', {data: outJSON});
|
|
} else {
|
|
res.writeHead(500, {ContentType: 'application/json'});
|
|
res.end(JSON.stringify({}));
|
|
}
|
|
});
|
|
});
|
|
|
|
|
|
|
|
router.get('/list', function(req, res) {
|
|
Keeper.listGET({},res);
|
|
});
|
|
|
|
router.get('/entry/:id', function(req, res) {
|
|
Keeper.entryIdGET(req.params, res);
|
|
});
|
|
|
|
router.route('/tags')
|
|
.get(function(req, res, next) {
|
|
Keeper.tagsGET(req.params, res);
|
|
}).post(function(req, res, next) {
|
|
Keeper.tagsPOST(req.body,res);
|
|
});
|
|
|
|
|
|
router.get('/tags/:id', function(req, res) {
|
|
Keeper.tagsIDGET(req.params, res);
|
|
});
|
|
|
|
router.post('/add', function(req, res) {
|
|
Keeper.addPOST(req.body, res);
|
|
});
|
|
|
|
router.post('/savenew', function(req, res) {
|
|
logger.debug('save new entry..');
|
|
|
|
var t = req.body;
|
|
logger.debug(t);
|
|
|
|
|
|
if (t.hasOwnProperty('title')) {
|
|
var title = t.title.toString();
|
|
logger.debug(title);
|
|
busEmitter.emit('saveNew', t);
|
|
} else {
|
|
logger.error('No data block!');
|
|
}
|
|
|
|
res.writeHead(200, {ContentType: 'application/json'});
|
|
res.end(JSON.stringify({}));
|
|
|
|
});
|
|
|
|
router.post('/redo', function(req, res) {
|
|
logger.debug('redoing entry..');
|
|
|
|
var t = req.body;
|
|
console.log(t);
|
|
if (t.hasOwnProperty('url')) {
|
|
var url = t.url.toString();
|
|
logger.debug(url);
|
|
busEmitter.emit('getBookmark', t);
|
|
} else {
|
|
logger.error('No data block!');
|
|
}
|
|
res.writeHead(200, {ContentType: 'application/json'});
|
|
res.end(JSON.stringify({adding: url}));
|
|
|
|
});
|
|
|
|
router.route('/new')
|
|
.get(function(req, res, next) {
|
|
logger.debug('Save new');
|
|
busEmitter.emit('getBookmarkRes', req.query.url, res);
|
|
}).post(function(req, res, next) {
|
|
logger.debug('Posted Save new');
|
|
logger.info(req.body);
|
|
if (Object.keys(req.body).length !== 0) {
|
|
busEmitter.emit('getBookmarkRes', req.body.url, res);
|
|
} else {
|
|
res.status(422).end();
|
|
}
|
|
});
|
|
|
|
busEmitter.emit('updateTagsDB');
|
|
|
|
module.exports = router;
|