RSS-26: Cleanup, logging, commenting

This commit is contained in:
Kip Gebhardt 2015-06-12 12:52:55 -07:00
parent 5080b8cb64
commit a82a06c0bb
2 changed files with 27 additions and 26 deletions

View File

@ -1,19 +1,20 @@
// process feed-reader item into node-rss item // process feed-reader item into node-rss item
var FeedParser = require('feedparser'), var FeedParser = require('feedparser'),
bunyan = require('bunyan'), bunyan = require('bunyan'),
_ = require('lodash'), _ = require('lodash'),
async = require('async'), async = require('async'),
request = require('request'), request = require('request'),
RSS = require('rss'), RSS = require('rss'),
fs = require('fs'); fs = require('fs'),
package_json = require('../package.json'),
logger;
var logger;
var RssBraider = function (options) { var RssBraider = function (options) {
if (!options) { if (!options) {
options = {}; options = {};
} }
this.feeds = options.feeds || null; this.feeds = options.feeds || null;
this.logger = options.logger || bunyan.createLogger({name: 'rss-braider'}); this.logger = options.logger || bunyan.createLogger({name: package_json.name});
if (options.log_level) { if (options.log_level) {
this.logger.level(options.log_level); this.logger.level(options.log_level);
@ -33,7 +34,7 @@ RssBraider.prototype.loadPlugins = function () {
var self = this; var self = this;
if (self.plugins_directories.length < 1) { if (self.plugins_directories.length < 1) {
// self.logger.info("No plugins_directories specified. No plugins loaded."); self.logger.debug("No plugins_directories specified. No plugins loaded.");
} }
self.plugins_directories.forEach(function(path){ self.plugins_directories.forEach(function(path){
// load up each file and assign it to the plugins // load up each file and assign it to the plugins
@ -44,7 +45,7 @@ RssBraider.prototype.loadPlugins = function () {
self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin"); self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin");
} }
self.plugins[plugin_name] = require(path + '/' + plugin_name); self.plugins[plugin_name] = require(path + '/' + plugin_name);
// self.logger.info("plugin registered:", plugin_name); self.logger.debug("plugin registered:", plugin_name);
}); });
}); });
}; };
@ -61,33 +62,30 @@ RssBraider.prototype.feedExists = function (feed_name) {
// trim down to desired count, dedupe and sort // trim down to desired count, dedupe and sort
RssBraider.prototype.processFeed = function(feed_name, format, callback) RssBraider.prototype.processFeed = function(feed_name, format, callback)
{ {
if (!format) {
format = 'rss';
}
var self = this, var self = this,
feed = self.feeds[feed_name], feed = self.feeds[feed_name],
feed_articles = []; feed_articles = [];
// self.logger.info("DEBUG processFeed: feed is set to " + feed_name); if (!format) {
format = 'rss';
}
if (!feed || !feed.sources || feed.sources.length < 1) { if (!feed || !feed.sources || feed.sources.length < 1) {
return callback("No definition for feed name: " + feed_name); return callback("No definition for feed name: " + feed_name);
} }
// Process each feed source through Feedparser to get articles.
// Then process each item/article through rss-braider and any plugins
async.each(feed.sources, function(source, callback) { async.each(feed.sources, function(source, callback) {
var count = source.count || feed.default_count || 10, // Number of articles var count = source.count || feed.default_count || 10, // Number of articles per source
url = source.feed_url || null, url = source.feed_url || null,
file_path = source.file_path || null, file_path = source.file_path || null,
source_articles = []; source_articles = [];
// self.logger.debug("Requesting source:" + source.name + " at " + url + " for feed:" + feed_name);
// todo: Check if source.file is set and set up a fs stream read
var feedparser = new FeedParser(); var feedparser = new FeedParser();
if (url) { if (url) {
var req = request(url); var req = request(url);
// self.logger.info("request to", url);
req.on('error', function (error) { req.on('error', function (error) {
self.logger.error(error); self.logger.error(error);
}); });
@ -123,6 +121,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
} }
// Process Item/Article // Process Item/Article
var article = self.processItem(item, source, feed_name); var article = self.processItem(item, source, feed_name);
// plugins may filter items and return null
if (article) { if (article) {
source_articles.push(article); source_articles.push(article);
} }
@ -130,7 +130,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
}); });
feedparser.on("end", function(){ feedparser.on("end", function(){
// sort and de-dupe this feed's articles and push them into array // de-dupe , date sort, and trim this feed's articles and push them into array
source_articles = self.dedupe(source_articles, self.dedupe_fields); source_articles = self.dedupe(source_articles, self.dedupe_fields);
source_articles = self.date_sort(source_articles); source_articles = self.date_sort(source_articles);
source_articles = source_articles.slice(0, count); source_articles = source_articles.slice(0, count);
@ -147,7 +147,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
feed_articles = self.dedupe(feed_articles, self.dedupe_fields); feed_articles = self.dedupe(feed_articles, self.dedupe_fields);
feed_articles = self.date_sort(feed_articles); feed_articles = self.date_sort(feed_articles);
// Create new feed with these articles // Create new feed with these articles. Follows node-rss spec
var options = { var options = {
title : feed.meta.title, title : feed.meta.title,
description : feed.meta.description, description : feed.meta.description,
@ -230,7 +230,6 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
// A plugin returning -1 means skip this item // A plugin returning -1 means skip this item
if (filteredItemOptions === -1) { if (filteredItemOptions === -1) {
var short_title = item.title.substring(0,25);
self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid); self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid);
itemOptions = null; itemOptions = null;
break; break;
@ -252,6 +251,7 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
// operation on the articles array // operation on the articles array
// TODO, make this a plugin? // TODO, make this a plugin?
RssBraider.prototype.dedupe = function(articles_arr, fields){ RssBraider.prototype.dedupe = function(articles_arr, fields){
var self = this;
if ( !fields || fields.length < 1 ) { if ( !fields || fields.length < 1 ) {
return _.uniq(articles_arr); return _.uniq(articles_arr);
} else { } else {
@ -273,8 +273,9 @@ RssBraider.prototype.dedupe = function(articles_arr, fields){
// it's unique // it's unique
deduped_articles.push(article); deduped_articles.push(article);
} else { } else {
// The article matched all of another article's fields // The article matched all of another article's "dedupe" fields
// Do nothing // so filter it out (i.e. do nothing)
self.logger.debug("skipping duplicate", '"' + article.title + '"', article.guid);
} }
}); });
return deduped_articles; return deduped_articles;

View File

@ -59,6 +59,7 @@ test('de-duplicate feed', function(t) {
plugins_directories : [__dirname + '/../examples/plugins/'] plugins_directories : [__dirname + '/../examples/plugins/']
}; };
var rss_braider = RssBraider.createClient(braider_options); var rss_braider = RssBraider.createClient(braider_options);
rss_braider.logger.level('info');
rss_braider.processFeed('sample_feed', 'rss', function(err, data){ rss_braider.processFeed('sample_feed', 'rss', function(err, data){
if (err) { if (err) {
@ -133,7 +134,6 @@ test('filter all articles out using plugin', function(t) {
}); });
}); });
test("Don't break when a filter fails and returns null", function(t) { test("Don't break when a filter fails and returns null", function(t) {
t.plan(1); t.plan(1);
var feeds = {}; var feeds = {};