diff --git a/lib/RssBraider.js b/lib/RssBraider.js index 2359231..eb8c2c6 100644 --- a/lib/RssBraider.js +++ b/lib/RssBraider.js @@ -1,19 +1,20 @@ // process feed-reader item into node-rss item -var FeedParser = require('feedparser'), - bunyan = require('bunyan'), - _ = require('lodash'), - async = require('async'), - request = require('request'), - RSS = require('rss'), - fs = require('fs'); +var FeedParser = require('feedparser'), + bunyan = require('bunyan'), + _ = require('lodash'), + async = require('async'), + request = require('request'), + RSS = require('rss'), + fs = require('fs'), + package_json = require('../package.json'), + logger; -var logger; var RssBraider = function (options) { if (!options) { options = {}; } this.feeds = options.feeds || null; - this.logger = options.logger || bunyan.createLogger({name: 'rss-braider'}); + this.logger = options.logger || bunyan.createLogger({name: package_json.name}); if (options.log_level) { this.logger.level(options.log_level); @@ -33,7 +34,7 @@ RssBraider.prototype.loadPlugins = function () { var self = this; if (self.plugins_directories.length < 1) { - // self.logger.info("No plugins_directories specified. No plugins loaded."); + self.logger.debug("No plugins_directories specified. No plugins loaded."); } self.plugins_directories.forEach(function(path){ // load up each file and assign it to the plugins @@ -44,7 +45,7 @@ RssBraider.prototype.loadPlugins = function () { self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin"); } self.plugins[plugin_name] = require(path + '/' + plugin_name); - // self.logger.info("plugin registered:", plugin_name); + self.logger.debug("plugin registered:", plugin_name); }); }); }; @@ -61,33 +62,30 @@ RssBraider.prototype.feedExists = function (feed_name) { // trim down to desired count, dedupe and sort RssBraider.prototype.processFeed = function(feed_name, format, callback) { - if (!format) { - format = 'rss'; - } var self = this, feed = self.feeds[feed_name], feed_articles = []; - // self.logger.info("DEBUG processFeed: feed is set to " + feed_name); + if (!format) { + format = 'rss'; + } if (!feed || !feed.sources || feed.sources.length < 1) { return callback("No definition for feed name: " + feed_name); } + // Process each feed source through Feedparser to get articles. + // Then process each item/article through rss-braider and any plugins async.each(feed.sources, function(source, callback) { - var count = source.count || feed.default_count || 10, // Number of articles + var count = source.count || feed.default_count || 10, // Number of articles per source url = source.feed_url || null, file_path = source.file_path || null, source_articles = []; - // self.logger.debug("Requesting source:" + source.name + " at " + url + " for feed:" + feed_name); - // todo: Check if source.file is set and set up a fs stream read var feedparser = new FeedParser(); if (url) { var req = request(url); - // self.logger.info("request to", url); - req.on('error', function (error) { self.logger.error(error); }); @@ -123,6 +121,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback) } // Process Item/Article var article = self.processItem(item, source, feed_name); + + // plugins may filter items and return null if (article) { source_articles.push(article); } @@ -130,7 +130,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback) }); feedparser.on("end", function(){ - // sort and de-dupe this feed's articles and push them into array + // de-dupe , date sort, and trim this feed's articles and push them into array source_articles = self.dedupe(source_articles, self.dedupe_fields); source_articles = self.date_sort(source_articles); source_articles = source_articles.slice(0, count); @@ -147,7 +147,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback) feed_articles = self.dedupe(feed_articles, self.dedupe_fields); feed_articles = self.date_sort(feed_articles); - // Create new feed with these articles + // Create new feed with these articles. Follows node-rss spec var options = { title : feed.meta.title, description : feed.meta.description, @@ -230,7 +230,6 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name // A plugin returning -1 means skip this item if (filteredItemOptions === -1) { - var short_title = item.title.substring(0,25); self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid); itemOptions = null; break; @@ -252,6 +251,7 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name // operation on the articles array // TODO, make this a plugin? RssBraider.prototype.dedupe = function(articles_arr, fields){ + var self = this; if ( !fields || fields.length < 1 ) { return _.uniq(articles_arr); } else { @@ -273,8 +273,9 @@ RssBraider.prototype.dedupe = function(articles_arr, fields){ // it's unique deduped_articles.push(article); } else { - // The article matched all of another article's fields - // Do nothing + // The article matched all of another article's "dedupe" fields + // so filter it out (i.e. do nothing) + self.logger.debug("skipping duplicate", '"' + article.title + '"', article.guid); } }); return deduped_articles; diff --git a/test/index.js b/test/index.js index 199c696..21c7ab5 100644 --- a/test/index.js +++ b/test/index.js @@ -59,6 +59,7 @@ test('de-duplicate feed', function(t) { plugins_directories : [__dirname + '/../examples/plugins/'] }; var rss_braider = RssBraider.createClient(braider_options); + rss_braider.logger.level('info'); rss_braider.processFeed('sample_feed', 'rss', function(err, data){ if (err) { @@ -133,7 +134,6 @@ test('filter all articles out using plugin', function(t) { }); }); - test("Don't break when a filter fails and returns null", function(t) { t.plan(1); var feeds = {};