diff --git a/.gitignore b/.gitignore
index a02038c..c8f8389 100644
--- a/.gitignore
+++ b/.gitignore
@@ -145,3 +145,4 @@ fabric.properties
/src/bundle.js
/src/bundle.js.map
/live/
+!/output/
diff --git a/package-lock.json b/package-lock.json
index 3da7161..fcca00f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -54,7 +54,7 @@
},
"@sinonjs/formatio": {
"version": "2.0.0",
- "resolved": "http://registry.npmjs.org/@sinonjs/formatio/-/formatio-2.0.0.tgz",
+ "resolved": "https://registry.npmjs.org/@sinonjs/formatio/-/formatio-2.0.0.tgz",
"integrity": "sha512-ls6CAMA6/5gG+O/IdsBcblvnd8qcO/l1TYoNeAzp3wcISOxlPXQEus0mLcdwazEkWjaBdaJ3TaxmNgCLWwvWzg==",
"dev": true,
"requires": {
@@ -1093,7 +1093,7 @@
},
"browserify-rsa": {
"version": "4.0.1",
- "resolved": "http://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz",
+ "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz",
"integrity": "sha1-IeCr+vbyApzy+vsTNWenAdQTVSQ=",
"requires": {
"bn.js": "^4.1.0",
@@ -4270,7 +4270,7 @@
},
"readable-stream": {
"version": "1.0.34",
- "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-1.0.34.tgz",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.34.tgz",
"integrity": "sha1-Elgg40vIQtLyqq+v5MKRbuMsFXw=",
"dev": true,
"requires": {
@@ -4282,13 +4282,13 @@
},
"string_decoder": {
"version": "0.10.31",
- "resolved": "http://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
"integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=",
"dev": true
},
"through2": {
"version": "0.6.5",
- "resolved": "http://registry.npmjs.org/through2/-/through2-0.6.5.tgz",
+ "resolved": "https://registry.npmjs.org/through2/-/through2-0.6.5.tgz",
"integrity": "sha1-QaucZ7KdVyCQcUEOHXp6lozTrUg=",
"dev": true,
"requires": {
@@ -4393,7 +4393,7 @@
},
"lodash": {
"version": "1.0.2",
- "resolved": "http://registry.npmjs.org/lodash/-/lodash-1.0.2.tgz",
+ "resolved": "https://registry.npmjs.org/lodash/-/lodash-1.0.2.tgz",
"integrity": "sha1-j1dWDIO1n8JwvT1WG2kAQ0MOJVE=",
"dev": true
},
@@ -4524,7 +4524,7 @@
"dependencies": {
"semver": {
"version": "4.3.6",
- "resolved": "http://registry.npmjs.org/semver/-/semver-4.3.6.tgz",
+ "resolved": "https://registry.npmjs.org/semver/-/semver-4.3.6.tgz",
"integrity": "sha1-MAvG4OhjdPe6YQaLWx7NV/xlMto=",
"dev": true
}
@@ -5395,7 +5395,7 @@
},
"htmlescape": {
"version": "1.1.1",
- "resolved": "http://registry.npmjs.org/htmlescape/-/htmlescape-1.1.1.tgz",
+ "resolved": "https://registry.npmjs.org/htmlescape/-/htmlescape-1.1.1.tgz",
"integrity": "sha1-OgPtwiFLyjtmQko+eVk0lQnLA1E="
},
"htmlparser2": {
@@ -9143,7 +9143,7 @@
},
"pretty-hrtime": {
"version": "1.0.3",
- "resolved": "http://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz",
+ "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz",
"integrity": "sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=",
"dev": true
},
@@ -9977,7 +9977,7 @@
},
"safe-regex": {
"version": "1.1.0",
- "resolved": "http://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz",
+ "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz",
"integrity": "sha1-QKNmnzsHfR6UPURinhV91IAjvy4=",
"dev": true,
"requires": {
@@ -10147,7 +10147,7 @@
},
"shasum": {
"version": "1.0.2",
- "resolved": "http://registry.npmjs.org/shasum/-/shasum-1.0.2.tgz",
+ "resolved": "https://registry.npmjs.org/shasum/-/shasum-1.0.2.tgz",
"integrity": "sha1-5wEjENj0F/TetXEhUOVni4euVl8=",
"requires": {
"json-stable-stringify": "~0.0.0",
@@ -10572,7 +10572,7 @@
},
"stream-browserify": {
"version": "2.0.1",
- "resolved": "http://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz",
+ "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz",
"integrity": "sha1-ZiZu5fm9uZQKTkUUyvtDu3Hlyds=",
"requires": {
"inherits": "~2.0.1",
@@ -11340,7 +11340,7 @@
},
"unique-stream": {
"version": "1.0.0",
- "resolved": "http://registry.npmjs.org/unique-stream/-/unique-stream-1.0.0.tgz",
+ "resolved": "https://registry.npmjs.org/unique-stream/-/unique-stream-1.0.0.tgz",
"integrity": "sha1-1ZpKdUJ0R9mqbJHnAmP40mpLEEs=",
"dev": true
},
@@ -11574,7 +11574,7 @@
},
"readable-stream": {
"version": "1.0.34",
- "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-1.0.34.tgz",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.34.tgz",
"integrity": "sha1-Elgg40vIQtLyqq+v5MKRbuMsFXw=",
"dev": true,
"requires": {
@@ -11586,13 +11586,13 @@
},
"string_decoder": {
"version": "0.10.31",
- "resolved": "http://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
"integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=",
"dev": true
},
"through2": {
"version": "0.6.5",
- "resolved": "http://registry.npmjs.org/through2/-/through2-0.6.5.tgz",
+ "resolved": "https://registry.npmjs.org/through2/-/through2-0.6.5.tgz",
"integrity": "sha1-QaucZ7KdVyCQcUEOHXp6lozTrUg=",
"dev": true,
"requires": {
diff --git a/package.json b/package.json
index 7cf4a87..60b0f41 100644
--- a/package.json
+++ b/package.json
@@ -63,7 +63,7 @@
},
"devDependencies": {
"expect.js": "^0.3.1",
- "gulp": "^3.9.1",
+ "gulp": "3.9.1",
"gulp-google-webfonts": "0.0.14",
"gulp-rename": "^1.4.0",
"gulp-sass": "^3.2.1",
diff --git a/server/RightByMe.js b/server/RightByMe.js
index 037ca58..84c75a9 100644
--- a/server/RightByMe.js
+++ b/server/RightByMe.js
@@ -20,6 +20,10 @@ const twitterClient = new Twitter({
logger.level = 'debug';
+// google api key AIzaSyBl7O9LHIthCagcqIaDkQ4um_hghYG5reE
+
+
+
function nowTS() {
const now = new Date();
@@ -322,7 +326,7 @@ function doGetMoreDetail(id) {
.then((d) => {
logger.info('Final', d.name, d.id);
jsonfile.writeFileSync(`output/${d.id}-doGetMoreDetail.json`, d);
-
+
return resolve(d);
});
});
diff --git a/server/euronews.js b/server/euronews.js
index 4b89439..975d0dd 100644
--- a/server/euronews.js
+++ b/server/euronews.js
@@ -3,7 +3,7 @@ const fecha = require('fecha');
const request = require('request');
const http = require('http');
-const { reduceArticle } = require('./reducers/euronews');
+const { reduceArticle, reduceArticleV2 } = require('./reducers/euronews');
const logger = require('log4js').getLogger('Euronews');
logger.level = 'debug';
@@ -40,7 +40,7 @@ class Template {
function doGetEuroNews() {
return new Promise((resolve, reject) => {
- logger.info('Retrieving Euronews Headlines..');
+ logger.info('doGetEuroNews:Retrieving Euronews Headlines..');
// http://feeds.feedburner.com/euronews/en/home/
// http://feeds.feedburner.com/euronews/en/news/
@@ -114,7 +114,7 @@ function doGetArticle(guid = '') {
return reject(err);
// Throw err;
- const output = reduceArticle(body);
+ const output = reduceArticleV2(body);
logger.debug(JSON.stringify(output));
diff --git a/server/lib/readability.js b/server/lib/readability.js
new file mode 100644
index 0000000..375d22b
--- /dev/null
+++ b/server/lib/readability.js
@@ -0,0 +1,358 @@
+var readabilityVersion = '2';
+var readStyle = 'style-ebook';
+var readSize = 'size-medium';
+var readMargin = 'margin-wide';
+(function() {
+ // removing all existing scripts so they don't cause conflicts...
+ var docscripts = document.getElementsByTagName('script');
+ for (k = 0;k < docscripts.length; k++)
+ if (docscripts[k].src != null && ! docscripts[k].src.match(/readability|[Cc]lippability/))
+ docscripts[k].parentNode.removeChild(docscripts[k]);
+
+ // let's just load jQuery and get it over with
+ var gjs = document.createElement('SCRIPT');
+ gjs.type = 'text/javascript';
+ gjs.src = 'http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js';
+ document.getElementsByTagName('head')[0].appendChild(gjs);
+ gjs.onload = gjs.onreadystatechange = function() {
+ $('script').each(function() {
+ // jQuery gets scripts inside of conditional comments far more easily than I could figure out
+ if (! this.src.match(/readability|[Cc]lippability|jquery\.min\.js$/)) $(this).remove();
+ });
+ };
+
+ var objOverlay = document.createElement('div');
+ var objinnerDiv = document.createElement('div');
+
+ objOverlay.id = 'readOverlay';
+ objinnerDiv.id = 'readInner';
+
+ // Apply user-selected styling:
+ document.body.className = readStyle;
+ objOverlay.className = readStyle;
+ objinnerDiv.className = `${readMargin } ${ readSize}`;
+
+ objinnerDiv.appendChild(grabArticle()); // Get the article and place it inside the inner Div
+ objOverlay.appendChild(objinnerDiv); // Insert the inner div into the overlay
+
+ // For totally hosed HTML, add body node that can't be found because of bad HTML or something.
+ if(document.body == null) {
+ body = document.createElement('body');
+ document.body = body;
+ }
+
+ document.body.innerHTML = '';
+
+ // Inserts the new content :
+
+ document.body.insertBefore(objOverlay, document.body.firstChild);
+ var o = document.body.firstChild;
+
+ return o.innerHTML;
+})();
+
+function getElementsByClassName(classname, node) {
+ if(!node) node = document.getElementsByTagName('body')[0];
+ var a = [];
+ var re = new RegExp(`\\b${ classname }\\b`);
+ var els = node.getElementsByTagName('*');
+ for(var i = 0, j = els.length; i < j; i++)
+ if(re.test(els[i].className))a.push(els[i]);
+
+ return a;
+}
+
+function grabArticle() {
+ var allParagraphs = document.getElementsByTagName('p');
+ var topDivCount = 0;
+ var topDiv = null;
+ var topDivParas;
+
+ var articleContent = document.createElement('DIV');
+ var articleTitle = document.createElement('H1');
+ var articleFooter = document.createElement('DIV');
+
+ // Replace all doubled-up
tags with
tags, and remove fonts.
+ var pattern = new RegExp ('
[ \r\n\s]*
', 'g');
+ document.body.innerHTML = document.body.innerHTML.replace(pattern, '
').replace(/<\/?font[^>]*>/g, ''); + + // Grab the title from the
's, commas, special classes, etc. + for (var j = 0; j < allParagraphs.length; j++) { + parentNode = allParagraphs[j].parentNode; + + // Initialize readability data + if(typeof parentNode.readability === 'undefined') { + parentNode.readability = { 'contentScore': 0 }; + + // Look for a special classname + if(parentNode.className.match(/(comment|meta|footer|footnote)/)) + parentNode.readability.contentScore -= 50; + else if(parentNode.className.match(/((^|\\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\\s|$))/)) + parentNode.readability.contentScore += 25; + + // Look for a special ID + if(parentNode.id.match(/(comment|meta|footer|footnote)/)) + parentNode.readability.contentScore -= 50; + else if(parentNode.id.match(/^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/)) + parentNode.readability.contentScore += 25; + } + + // Add a point for the paragraph found + if(getInnerText(allParagraphs[j]).length > 10) + parentNode.readability.contentScore++; + + // Add points for any commas within this paragraph + parentNode.readability.contentScore += getCharCount(allParagraphs[j]); + } + + // Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 + for(nodeIndex = 0; (node = document.getElementsByTagName('*')[nodeIndex]); nodeIndex++) + if(typeof node.readability !== 'undefined' && (topDiv == null || node.readability.contentScore > topDiv.readability.contentScore)) + topDiv = node; + + if(topDiv == null) { + topDiv = document.createElement('div'); + topDiv.innerHTML = 'Sorry, clippable was unable to parse this page for content. If you feel like it should have been able to, please let us know.'; + } + + // REMOVES ALL STYLESHEETS ... + for (var k = 0;k < document.styleSheets.length; k++) + if (document.styleSheets[k].href != null && document.styleSheets[k].href.lastIndexOf('readability') == -1) + document.styleSheets[k].disabled = true; + + var sh = getElementsByClassName('syntaxhighlighter'); + for (var i = 0;i < sh.length;i++) { + var bar = getElementsByClassName('toolbar', sh[i]); + if (bar.length > 0) + for (var bn = 0;bn < bar.length;bn++) + bar[bn].parentNode.removeChild(bar[bn]); + + var numbers = getElementsByClassName('number', sh[i]); + if (numbers.length > 0) + for (var num = 0;num < numbers.length;num++) + numbers[num].parentNode.removeChild(numbers[num]); + } + + var dp = getElementsByClassName('dp-highlighter'); + for (var d = 0;d < dp.length;d++) + dp[d].parentNode.removeChild(dp[d]); + + var sth = getElementsByClassName('standardLighter'); + for (d = 0;d < sth.length;d++) + sth[d].parentNode.removeChild(sth[d]); + + // Remove all style tags in head (not doing this on IE) : + var styleTags = document.getElementsByTagName('style'); + for (var l = 0;l < styleTags.length; l++) + if (navigator.appName != 'Microsoft Internet Explorer') + styleTags[l].textContent = ''; + + topDiv = killCodeSpans(topDiv); // removes span tags + cleanStyles(topDiv); // Removes all style attributes + topDiv = killDivs(topDiv); // Goes in and removes DIV's that have more non
stuff than
stuff
+ topDiv = killBreaks(topDiv); // Removes any consecutive
's into just one
+
+ // Cleans out junk from the topDiv just in case:
+ topDiv = clean(topDiv, 'form');
+ // topDiv = clean(topDiv, "object");
+ topDiv = clean(topDiv, 'table', 8);
+ topDiv = clean(topDiv, 'h1');
+ // topDiv = clean(topDiv, "h2");
+ topDiv = clean(topDiv, 'iframe');
+
+ // Add the footer and contents:
+ articleFooter.id = 'readFooter';
+ articleFooter.innerHTML = `\
+ \
+ \
+ Follow us on Twitter »\
+