diff --git a/examples/filefeed.js b/examples/filefeed.js index e76f408..5175c37 100644 --- a/examples/filefeed.js +++ b/examples/filefeed.js @@ -2,6 +2,7 @@ var feed = { "feed_name" : "test file feed", "default_count" : 1, "no_cdata_fields" : ['description'], + "plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'], "meta" : { "title": "Test File Feed", "description": "This feed comes from a file", diff --git a/lib/RssBraider.js b/lib/RssBraider.js index 96573c5..5183bdd 100644 --- a/lib/RssBraider.js +++ b/lib/RssBraider.js @@ -6,43 +6,63 @@ var FeedParser = require('feedparser'), async = require('async'), request = require('request'), RSS = require('rss'), - fs = require('fs'), - util = require('util'); // DEBUG - + fs = require('fs'); var logger; var RssBraider = function (options) { - this.feeds = options.feeds || null; // TOOD validate feed configs + this.feeds = options.feeds || null; this.logger = logger = options.logger || bunyan.createLogger({name: 'rss-braider'}); - this.indent = options.indent || " "; - this.dedupe_fields = options.dedupe_fields || []; + this.indent = options.indent || "\t"; + this.dedupe_fields = options.dedupe_fields || []; // The fields to use to identify duplicate articles this.date_sort_order = options.date_sort_order || "desc"; + + // load plugins from plugins folder + // TODO, specify plugins location + this.plugins = {}; + this.loadPlugins(); + }; -RssBraider.prototype.init = function() { - // Validate the feeds? +// loadup self.plugins with the plugin functions +RssBraider.prototype.loadPlugins = function () { + var self = this, + path = __dirname + '/plugins', + filenames = fs.readdirSync(path); + + // load up each file and assign it to the plugins + filenames.forEach(function(filename){ + var plugin_name = filename.replace(/.js$/, ''); + self.plugins[plugin_name] = require(path + '/' + plugin_name); + // logger.info("plugin loaded:", plugin_name); + }); }; -RssBraider.prototype.feedExists = function (feed_name) { - if (this.feeds && this.feeds[feed_name]) { - return true; - } else { - return false; - } -}; +// RssBraider.prototype.feedExists = function (feed_name) { +// if (this.feeds && this.feeds[feed_name]) { +// return true; +// } else { +// return false; +// } +// }; +// Gather data from all feed sources, process each article/item through plugins, +// trim down to desired count, dedupe and sort RssBraider.prototype.processFeed = function(feed_name, format, callback) { - // DEBUG - console.time("process"); - if (!format) { - format = 'json'; + format = 'rss'; } var self = this, feed = this.feeds[feed_name], feed_articles = []; + // set these for the request + self.feed_name = feed_name; + self.format = format; + self.feed = feed; + + // logger.info("DEBUG processFeed: feed is set to " + feed_name); + if (!feed || !feed.sources || feed.sources.length < 1) { return callback("No definition for feed name: " + feed_name); } @@ -153,6 +173,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback) // Accepts a feed-parser item and builds a node-rss itemOptions object RssBraider.prototype.processItem = function (item) { + var self = this; + if (!item) { logger.error("processItem: no item passed in"); return null; @@ -170,96 +192,33 @@ RssBraider.prototype.processItem = function (item) { custom_elements : [] }; - - ////////////////// - // Custom elements Move to PLUGINS - ////////////////// - - // kqed source - if (item.source_url) { - itemOptions.custom_elements.push( - { 'kqed:source': item.source_url } - ); - } - - // content:encoded (i.e. description) - if (item["content:encoded"] && item["content:encoded"]["#"]){ - var content_encoded = item["content:encoded"]["#"]; - itemOptions.custom_elements.push( - { "content:encoded": - { - _cdata: content_encoded - } - } - ); - } - - // // wfw - if (item["wfw:commentrss"] && item["wfw:commentrss"]["#"]){ - itemOptions.custom_elements.push({ "wfw:commentRss": item["wfw:commentrss"]["#"]}); - } - - // // // slash comments - if (item["slash:comments"] && item["slash:comments"]["#"]){ - itemOptions.custom_elements.push({ "slash:comments": item["slash:comments"]["#"]}); - } - - // Images - // Take 'media:thumbnail', - // else - // 'media:content'[0]'media:thumbnail' - // else - // 'media:thumbnail' - var thumbnail; - if (item['media:thumbnail'] && item['media:thumbnail']['#']) { - thumbnail = { - 'media:thumbnail': item['media:thumbnail']['#'] - }; - itemOptions.custom_elements.push(thumbnail); - } else { - if (item["media:content"]) { - var media_contents; - if (! _.isArray(item['media:content'])) { - media_contents = [item['media:content']]; - } else { - media_contents = item['media:content']; - } - - if ( media_contents[0] && - media_contents[0]['media:thumbnail'] && - media_contents[0]['media:thumbnail']['@'] && - media_contents[0]['media:thumbnail']['@'].url) { - - thumbnail = { - 'media:thumbnail' : [{ - _attr: { - url: media_contents[0]['media:thumbnail']['@'].url - } - }] - }; - // itemOptions.custom_elements.push({'media:thumbnail' : { url: media_contents[0]['media:thumbnail']['@'].url}} ); - itemOptions.custom_elements.push(thumbnail); - } else { - thumbnail = { - 'media:thumbnail' : [{ - _attr: { - url: media_contents[0]['@'].url - } - }] - }; - itemOptions.custom_elements.push(thumbnail); - } - } - } + // Run the plugins specified by the "plugins" section of the + // feed config file to build out any custom elements or + // do transforms + self.runPlugins(item, itemOptions); return itemOptions; }; +RssBraider.prototype.runPlugins = function (item, itemOptions) { + var self = this, + feed_plugins = self.feed.plugins || []; + + // Process the item through the desired feed plugins + feed_plugins.forEach(function(plugin_name){ + if (self.plugins[plugin_name]) { + // logger.info("DEBUG runPlugins running " + plugin_name); + self.plugins[plugin_name](item, itemOptions); + } else { + logger.error("A plugin named '" + plugin_name + "' hasn't been registered"); + } + }); +}; + // Dedupe articles in node-rss itemOptions format // Accepts an array of fields to dedupe on, or does a basic uniq // operation on the articles array RssBraider.prototype.dedupe = function(articles_arr, fields){ - // logger.info("dedupe fields", fields); if ( !fields || fields.length < 1 ) { return _.uniq(articles_arr); } else { diff --git a/lib/plugins/add_media_thumbnail.js b/lib/plugins/add_media_thumbnail.js new file mode 100644 index 0000000..f24d916 --- /dev/null +++ b/lib/plugins/add_media_thumbnail.js @@ -0,0 +1,54 @@ +// Add a media:thumbnail element +// Take 'media:thumbnail', +// else +// 'media:content'[0]'media:thumbnail' +// else +// 'media:thumbnail' +var _ = require('lodash'); +module.exports = function (item, itemOptions) { + if (!item || !itemOptions) { + return; + } + + var thumbnail; + if (item['media:thumbnail'] && item['media:thumbnail']['#']) { + thumbnail = { + 'media:thumbnail': item['media:thumbnail']['#'] + }; + itemOptions.custom_elements.push(thumbnail); + } else { + if (item["media:content"]) { + var media_contents; + if (! _.isArray(item['media:content'])) { + media_contents = [item['media:content']]; + } else { + media_contents = item['media:content']; + } + + if ( media_contents[0] && + media_contents[0]['media:thumbnail'] && + media_contents[0]['media:thumbnail']['@'] && + media_contents[0]['media:thumbnail']['@'].url) { + + thumbnail = { + 'media:thumbnail' : [{ + _attr: { + url: media_contents[0]['media:thumbnail']['@'].url + } + }] + }; + // itemOptions.custom_elements.push({'media:thumbnail' : { url: media_contents[0]['media:thumbnail']['@'].url}} ); + itemOptions.custom_elements.push(thumbnail); + } else { + thumbnail = { + 'media:thumbnail' : [{ + _attr: { + url: media_contents[0]['@'].url + } + }] + }; + itemOptions.custom_elements.push(thumbnail); + } + } + } +}; \ No newline at end of file diff --git a/lib/plugins/content_encoded.js b/lib/plugins/content_encoded.js new file mode 100644 index 0000000..ea1efa4 --- /dev/null +++ b/lib/plugins/content_encoded.js @@ -0,0 +1,16 @@ +module.exports = function (item, itemOptions) { + if (!item || !itemOptions) { + return; + } + // content:encoded (i.e. description) + if (item["content:encoded"] && item["content:encoded"]["#"]){ + var content_encoded = item["content:encoded"]["#"]; + itemOptions.custom_elements.push( + { "content:encoded": + { + _cdata: content_encoded + } + } + ); + } +}; \ No newline at end of file diff --git a/lib/plugins/kqed.js b/lib/plugins/kqed.js new file mode 100644 index 0000000..d25e6c8 --- /dev/null +++ b/lib/plugins/kqed.js @@ -0,0 +1,11 @@ +// define kqed source +module.exports = function (item, itemOptions) { + if (!item || !itemOptions) { + return; + } + if (item.source_url) { + itemOptions.custom_elements.push( + { 'kqed:source': item.source_url } + ); + } +}; \ No newline at end of file diff --git a/lib/plugins/wfw_slash_comments.js b/lib/plugins/wfw_slash_comments.js new file mode 100644 index 0000000..bc7e2f8 --- /dev/null +++ b/lib/plugins/wfw_slash_comments.js @@ -0,0 +1,14 @@ +module.exports = function (item, itemOptions) { + if (!item || !itemOptions) { + return; + } + // wfw + if (item["wfw:commentrss"] && item["wfw:commentrss"]["#"]){ + itemOptions.custom_elements.push({ "wfw:commentRss": item["wfw:commentrss"]["#"]}); + } + + // // // slash comments + if (item["slash:comments"] && item["slash:comments"]["#"]){ + itemOptions.custom_elements.push({ "slash:comments": item["slash:comments"]["#"]}); + } +}; \ No newline at end of file diff --git a/test/input_files/date_sort.js b/test/input_files/date_sort.js index e17be6e..6cd2efa 100644 --- a/test/input_files/date_sort.js +++ b/test/input_files/date_sort.js @@ -2,6 +2,7 @@ var feed = { "feed_name" : "test file feed", "default_count" : 1, "no_cdata_fields" : ['description'], + "plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'], "meta" : { "title": "Test File Feed", "description": "This feed comes from a file", diff --git a/test/input_files/sample_feed.js b/test/input_files/sample_feed.js index b3382cb..b8ce504 100644 --- a/test/input_files/sample_feed.js +++ b/test/input_files/sample_feed.js @@ -1,7 +1,8 @@ var feed = { "feed_name" : "test file feed", "default_count" : 1, - "no_cdata_fields" : ['description'], + "no_cdata_fields" : ['description'], + "plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'], "meta" : { "title": "Test File Feed", "description": "This feed comes from a file", diff --git a/test/input_files/sample_feed_duplicates.js b/test/input_files/sample_feed_duplicates.js index 027b18e..ca4b2e4 100644 --- a/test/input_files/sample_feed_duplicates.js +++ b/test/input_files/sample_feed_duplicates.js @@ -2,6 +2,7 @@ var feed = { "feed_name" : "test file feed", "default_count" : 1, "no_cdata_fields" : ['description'], + "plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'], "meta" : { "title": "Test File Feed", "description": "This feed comes from a file",