This commit is contained in:
Martin Donnelly 2016-03-21 10:41:03 +00:00
parent cf1efc6b17
commit 5e6d3d0a43
2 changed files with 169 additions and 145 deletions

View File

@ -1,32 +1,41 @@
{
"predef": [
"Promise"
],
"node": true,
"esnext": true,
"bitwise": true,
"camelcase": true,
"browser": false,
"boss": true,
"curly": true,
"debug": false,
"devel": true,
"eqeqeq": true,
"immed": true,
"indent": 2,
"latedef": true,
"evil": true,
"forin": false,
"immed": false,
"laxbreak": false,
"newcap": true,
"noarg": true,
"quotmark": "single",
"regexp": true,
"noempty": false,
"nonew": false,
"nomen": false,
"onevar": false,
"plusplus": false,
"regexp": false,
"undef": true,
"sub": true,
"strict": false,
"white": false,
"eqnull": true,
"esnext": true,
"unused": true,
"strict": true,
"trailing": true,
"smarttabs": true,
"white": true,
"validthis": true,
"browser" : true,
"jquery":true,
"supernew": true,
"globals": {
"$": false,
"EJS": false,
"MicroEvent": false
"MicroEvent": false,
'express':false,
'http':false,
'request':false,
'cheerio':false
}
}

View File

@ -3,8 +3,10 @@
* Created by Martin on 22/02/2016.
*/
var express = require('express');
var http = require('http'), request = require('request'), cheerio = require('cheerio'), util = require('util');
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require('string');
var http = require('http'), request = require('request'), cheerio = require(
'cheerio'), util = require('util');
var jsonfile = require('jsonfile'), fs = require('fs'), STRING = require(
'string');
var zlib = require("zlib");
var log4js = require('log4js');
var logger = log4js.getLogger();
@ -20,16 +22,41 @@ var busEmitter = new EventEmitter();
var db_name = 'keeper';
var dbCouch = nano.use(db_name);
var bodyfile = __dirname + '/' + 'body.html';
var htmlfile = __dirname + '/' + 'testoutput.html';
var generics = ['ARTICLE', 'div.content_column', 'div.post','div.page'];
var generics = [
'ARTICLE',
'div.content_column',
'div.post',
'div.page',
'#recipe-single'
];
function cleaner(b) {
var _b = b;
var unwanted = ['div#disqus_thread', 'SCRIPT', 'FOOTER', 'div.ssba', '.shareaholic-canvas', '.yarpp-related', 'div.dfad', 'div.postFooterShare', 'div#nextPrevLinks', '.post-comments','HEADER', '.post-title','#side-menu','.footer-container','#pre-footer','#cakephp-global-navigation', '.masthead','.breadcrumb-header'];
var unwanted = [
'div#disqus_thread',
'SCRIPT',
'FOOTER',
'div.ssba',
'.shareaholic-canvas',
'.yarpp-related',
'div.dfad',
'div.postFooterShare',
'div#nextPrevLinks',
'.post-comments',
'HEADER',
'.post-title',
'#side-menu',
'.footer-container',
'#pre-footer',
'#cakephp-global-navigation',
'.masthead',
'.breadcrumb-header',
'.single-recipe-sidebar',
'#recipe-related-videos'
];
for (var i = 0; i < unwanted.length; i++) {
_b.find(unwanted[i]).remove();
@ -120,7 +147,8 @@ function processBody(body, url, _id, _rev) {
tdihbody = cleaner(tdihbody);
logger.debug(title);
} else {
}
else {
// bah. nothing to reduce so just grab the body, tidy it and use that
tdihbody = $('BODY');
logger.debug(tdihbody.length);
@ -152,16 +180,13 @@ function processBody(body, url, _id, _rev) {
obj.reduced = STRING(tdihbody.html()).trim().s;
obj.title = STRING(title).collapseWhitespace().s;
if (_id !== null )
{
if (_id !== null) {
busEmitter.emit("updateBookmarkData", obj, _id, _rev);
}
else
{
else {
busEmitter.emit("saveBookmarkData", obj);
}
return obj;
}
@ -169,22 +194,18 @@ function genericGrab(obj,res) {
var url, _id = null, _ver = null;
if (typeof obj === 'string')
{
if (typeof obj === 'string') {
logger.info(obj);
url = obj;
}
else
{
else {
url = obj.url;
_id = obj._id || null;
_ver = obj._rev || null;
}
logger.warn(typeof obj);
logger.info(url);
logger.info(_id);
logger.info(_ver);
@ -199,7 +220,6 @@ var url, _id=null, _ver=null;
// to test http://chaosinthekitchen.com/2009/07/lime-and-coconut-chicken/
var gunzip = zlib.createGunzip();
var jsonString = '';
resp.pipe(gunzip);
@ -214,20 +234,17 @@ var url, _id=null, _ver=null;
console.log(e);
});
}
else
{
else {
var b = processBody(body, url, _id, _ver);
if (res != null)
{
if (res != null) {
res.render('grabbed');
}
}
} else
{
}
else {
var b = processBody(body, url, _id, _ver);
if (res != null)
{
if (res != null) {
res.render('grabbed', {data: b});
}
}
@ -250,8 +267,8 @@ router.get('/list', function (req, res) {
res.writeHead(200, {"ContentType": "application/json"});
res.end(JSON.stringify({list: outJSON}));
} else
{
}
else {
res.writeHead(500, {"ContentType": "application/json"});
res.end(JSON.stringify({}));
}
@ -276,8 +293,8 @@ router.get('/entry/:id', function (req, res) {
res.writeHead(200, {"ContentType": "application/json"});
res.end(JSON.stringify(outJSON));
} else
{
}
else {
res.writeHead(500, {"ContentType": "application/json"});
res.end(JSON.stringify({}));
}
@ -320,8 +337,6 @@ router.post('/redo', function (req, res) {
});
router.get('/new', function(req, res) {
logger.debug('Save new');
busEmitter.emit("getBookmarkRes", req.query.url, res);