RSS-26: Cleanup, logging, commenting

This commit is contained in:
Kip Gebhardt 2015-06-12 12:52:55 -07:00
parent 5080b8cb64
commit a82a06c0bb
2 changed files with 27 additions and 26 deletions

View File

@ -1,19 +1,20 @@
// process feed-reader item into node-rss item
var FeedParser = require('feedparser'),
bunyan = require('bunyan'),
_ = require('lodash'),
async = require('async'),
request = require('request'),
RSS = require('rss'),
fs = require('fs');
var FeedParser = require('feedparser'),
bunyan = require('bunyan'),
_ = require('lodash'),
async = require('async'),
request = require('request'),
RSS = require('rss'),
fs = require('fs'),
package_json = require('../package.json'),
logger;
var logger;
var RssBraider = function (options) {
if (!options) {
options = {};
}
this.feeds = options.feeds || null;
this.logger = options.logger || bunyan.createLogger({name: 'rss-braider'});
this.logger = options.logger || bunyan.createLogger({name: package_json.name});
if (options.log_level) {
this.logger.level(options.log_level);
@ -33,7 +34,7 @@ RssBraider.prototype.loadPlugins = function () {
var self = this;
if (self.plugins_directories.length < 1) {
// self.logger.info("No plugins_directories specified. No plugins loaded.");
self.logger.debug("No plugins_directories specified. No plugins loaded.");
}
self.plugins_directories.forEach(function(path){
// load up each file and assign it to the plugins
@ -44,7 +45,7 @@ RssBraider.prototype.loadPlugins = function () {
self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin");
}
self.plugins[plugin_name] = require(path + '/' + plugin_name);
// self.logger.info("plugin registered:", plugin_name);
self.logger.debug("plugin registered:", plugin_name);
});
});
};
@ -61,33 +62,30 @@ RssBraider.prototype.feedExists = function (feed_name) {
// trim down to desired count, dedupe and sort
RssBraider.prototype.processFeed = function(feed_name, format, callback)
{
if (!format) {
format = 'rss';
}
var self = this,
feed = self.feeds[feed_name],
feed_articles = [];
// self.logger.info("DEBUG processFeed: feed is set to " + feed_name);
if (!format) {
format = 'rss';
}
if (!feed || !feed.sources || feed.sources.length < 1) {
return callback("No definition for feed name: " + feed_name);
}
// Process each feed source through Feedparser to get articles.
// Then process each item/article through rss-braider and any plugins
async.each(feed.sources, function(source, callback) {
var count = source.count || feed.default_count || 10, // Number of articles
var count = source.count || feed.default_count || 10, // Number of articles per source
url = source.feed_url || null,
file_path = source.file_path || null,
source_articles = [];
// self.logger.debug("Requesting source:" + source.name + " at " + url + " for feed:" + feed_name);
// todo: Check if source.file is set and set up a fs stream read
var feedparser = new FeedParser();
if (url) {
var req = request(url);
// self.logger.info("request to", url);
req.on('error', function (error) {
self.logger.error(error);
});
@ -123,6 +121,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
}
// Process Item/Article
var article = self.processItem(item, source, feed_name);
// plugins may filter items and return null
if (article) {
source_articles.push(article);
}
@ -130,7 +130,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
});
feedparser.on("end", function(){
// sort and de-dupe this feed's articles and push them into array
// de-dupe , date sort, and trim this feed's articles and push them into array
source_articles = self.dedupe(source_articles, self.dedupe_fields);
source_articles = self.date_sort(source_articles);
source_articles = source_articles.slice(0, count);
@ -147,7 +147,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
feed_articles = self.dedupe(feed_articles, self.dedupe_fields);
feed_articles = self.date_sort(feed_articles);
// Create new feed with these articles
// Create new feed with these articles. Follows node-rss spec
var options = {
title : feed.meta.title,
description : feed.meta.description,
@ -230,7 +230,6 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
// A plugin returning -1 means skip this item
if (filteredItemOptions === -1) {
var short_title = item.title.substring(0,25);
self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid);
itemOptions = null;
break;
@ -252,6 +251,7 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
// operation on the articles array
// TODO, make this a plugin?
RssBraider.prototype.dedupe = function(articles_arr, fields){
var self = this;
if ( !fields || fields.length < 1 ) {
return _.uniq(articles_arr);
} else {
@ -273,8 +273,9 @@ RssBraider.prototype.dedupe = function(articles_arr, fields){
// it's unique
deduped_articles.push(article);
} else {
// The article matched all of another article's fields
// Do nothing
// The article matched all of another article's "dedupe" fields
// so filter it out (i.e. do nothing)
self.logger.debug("skipping duplicate", '"' + article.title + '"', article.guid);
}
});
return deduped_articles;

View File

@ -59,6 +59,7 @@ test('de-duplicate feed', function(t) {
plugins_directories : [__dirname + '/../examples/plugins/']
};
var rss_braider = RssBraider.createClient(braider_options);
rss_braider.logger.level('info');
rss_braider.processFeed('sample_feed', 'rss', function(err, data){
if (err) {
@ -133,7 +134,6 @@ test('filter all articles out using plugin', function(t) {
});
});
test("Don't break when a filter fails and returns null", function(t) {
t.plan(1);
var feeds = {};