mirror of
https://gitlab.silvrtree.co.uk/martind2000/rss-braider.git
synced 2025-02-11 06:19:15 +00:00
RSS-26: Cleanup, logging, commenting
This commit is contained in:
parent
5080b8cb64
commit
a82a06c0bb
@ -1,19 +1,20 @@
|
|||||||
// process feed-reader item into node-rss item
|
// process feed-reader item into node-rss item
|
||||||
var FeedParser = require('feedparser'),
|
var FeedParser = require('feedparser'),
|
||||||
bunyan = require('bunyan'),
|
bunyan = require('bunyan'),
|
||||||
_ = require('lodash'),
|
_ = require('lodash'),
|
||||||
async = require('async'),
|
async = require('async'),
|
||||||
request = require('request'),
|
request = require('request'),
|
||||||
RSS = require('rss'),
|
RSS = require('rss'),
|
||||||
fs = require('fs');
|
fs = require('fs'),
|
||||||
|
package_json = require('../package.json'),
|
||||||
|
logger;
|
||||||
|
|
||||||
var logger;
|
|
||||||
var RssBraider = function (options) {
|
var RssBraider = function (options) {
|
||||||
if (!options) {
|
if (!options) {
|
||||||
options = {};
|
options = {};
|
||||||
}
|
}
|
||||||
this.feeds = options.feeds || null;
|
this.feeds = options.feeds || null;
|
||||||
this.logger = options.logger || bunyan.createLogger({name: 'rss-braider'});
|
this.logger = options.logger || bunyan.createLogger({name: package_json.name});
|
||||||
|
|
||||||
if (options.log_level) {
|
if (options.log_level) {
|
||||||
this.logger.level(options.log_level);
|
this.logger.level(options.log_level);
|
||||||
@ -33,7 +34,7 @@ RssBraider.prototype.loadPlugins = function () {
|
|||||||
var self = this;
|
var self = this;
|
||||||
|
|
||||||
if (self.plugins_directories.length < 1) {
|
if (self.plugins_directories.length < 1) {
|
||||||
// self.logger.info("No plugins_directories specified. No plugins loaded.");
|
self.logger.debug("No plugins_directories specified. No plugins loaded.");
|
||||||
}
|
}
|
||||||
self.plugins_directories.forEach(function(path){
|
self.plugins_directories.forEach(function(path){
|
||||||
// load up each file and assign it to the plugins
|
// load up each file and assign it to the plugins
|
||||||
@ -44,7 +45,7 @@ RssBraider.prototype.loadPlugins = function () {
|
|||||||
self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin");
|
self.logger.warn("Duplicate plugin name: ", plugin_name, "Overwriting with newer plugin");
|
||||||
}
|
}
|
||||||
self.plugins[plugin_name] = require(path + '/' + plugin_name);
|
self.plugins[plugin_name] = require(path + '/' + plugin_name);
|
||||||
// self.logger.info("plugin registered:", plugin_name);
|
self.logger.debug("plugin registered:", plugin_name);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@ -61,33 +62,30 @@ RssBraider.prototype.feedExists = function (feed_name) {
|
|||||||
// trim down to desired count, dedupe and sort
|
// trim down to desired count, dedupe and sort
|
||||||
RssBraider.prototype.processFeed = function(feed_name, format, callback)
|
RssBraider.prototype.processFeed = function(feed_name, format, callback)
|
||||||
{
|
{
|
||||||
if (!format) {
|
|
||||||
format = 'rss';
|
|
||||||
}
|
|
||||||
var self = this,
|
var self = this,
|
||||||
feed = self.feeds[feed_name],
|
feed = self.feeds[feed_name],
|
||||||
feed_articles = [];
|
feed_articles = [];
|
||||||
|
|
||||||
// self.logger.info("DEBUG processFeed: feed is set to " + feed_name);
|
if (!format) {
|
||||||
|
format = 'rss';
|
||||||
|
}
|
||||||
|
|
||||||
if (!feed || !feed.sources || feed.sources.length < 1) {
|
if (!feed || !feed.sources || feed.sources.length < 1) {
|
||||||
return callback("No definition for feed name: " + feed_name);
|
return callback("No definition for feed name: " + feed_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process each feed source through Feedparser to get articles.
|
||||||
|
// Then process each item/article through rss-braider and any plugins
|
||||||
async.each(feed.sources, function(source, callback) {
|
async.each(feed.sources, function(source, callback) {
|
||||||
var count = source.count || feed.default_count || 10, // Number of articles
|
var count = source.count || feed.default_count || 10, // Number of articles per source
|
||||||
url = source.feed_url || null,
|
url = source.feed_url || null,
|
||||||
file_path = source.file_path || null,
|
file_path = source.file_path || null,
|
||||||
source_articles = [];
|
source_articles = [];
|
||||||
|
|
||||||
// self.logger.debug("Requesting source:" + source.name + " at " + url + " for feed:" + feed_name);
|
|
||||||
// todo: Check if source.file is set and set up a fs stream read
|
|
||||||
var feedparser = new FeedParser();
|
var feedparser = new FeedParser();
|
||||||
if (url) {
|
if (url) {
|
||||||
var req = request(url);
|
var req = request(url);
|
||||||
|
|
||||||
// self.logger.info("request to", url);
|
|
||||||
|
|
||||||
req.on('error', function (error) {
|
req.on('error', function (error) {
|
||||||
self.logger.error(error);
|
self.logger.error(error);
|
||||||
});
|
});
|
||||||
@ -123,6 +121,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
|
|||||||
}
|
}
|
||||||
// Process Item/Article
|
// Process Item/Article
|
||||||
var article = self.processItem(item, source, feed_name);
|
var article = self.processItem(item, source, feed_name);
|
||||||
|
|
||||||
|
// plugins may filter items and return null
|
||||||
if (article) {
|
if (article) {
|
||||||
source_articles.push(article);
|
source_articles.push(article);
|
||||||
}
|
}
|
||||||
@ -130,7 +130,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
|
|||||||
});
|
});
|
||||||
|
|
||||||
feedparser.on("end", function(){
|
feedparser.on("end", function(){
|
||||||
// sort and de-dupe this feed's articles and push them into array
|
// de-dupe , date sort, and trim this feed's articles and push them into array
|
||||||
source_articles = self.dedupe(source_articles, self.dedupe_fields);
|
source_articles = self.dedupe(source_articles, self.dedupe_fields);
|
||||||
source_articles = self.date_sort(source_articles);
|
source_articles = self.date_sort(source_articles);
|
||||||
source_articles = source_articles.slice(0, count);
|
source_articles = source_articles.slice(0, count);
|
||||||
@ -147,7 +147,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
|
|||||||
feed_articles = self.dedupe(feed_articles, self.dedupe_fields);
|
feed_articles = self.dedupe(feed_articles, self.dedupe_fields);
|
||||||
feed_articles = self.date_sort(feed_articles);
|
feed_articles = self.date_sort(feed_articles);
|
||||||
|
|
||||||
// Create new feed with these articles
|
// Create new feed with these articles. Follows node-rss spec
|
||||||
var options = {
|
var options = {
|
||||||
title : feed.meta.title,
|
title : feed.meta.title,
|
||||||
description : feed.meta.description,
|
description : feed.meta.description,
|
||||||
@ -230,7 +230,6 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
|
|||||||
|
|
||||||
// A plugin returning -1 means skip this item
|
// A plugin returning -1 means skip this item
|
||||||
if (filteredItemOptions === -1) {
|
if (filteredItemOptions === -1) {
|
||||||
var short_title = item.title.substring(0,25);
|
|
||||||
self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid);
|
self.logger.debug("Plugin '" + plugin_name + "' filtered item from feed '" + feed.meta.title + "'", item.guid);
|
||||||
itemOptions = null;
|
itemOptions = null;
|
||||||
break;
|
break;
|
||||||
@ -252,6 +251,7 @@ RssBraider.prototype.runPlugins = function (item, itemOptions, source, feed_name
|
|||||||
// operation on the articles array
|
// operation on the articles array
|
||||||
// TODO, make this a plugin?
|
// TODO, make this a plugin?
|
||||||
RssBraider.prototype.dedupe = function(articles_arr, fields){
|
RssBraider.prototype.dedupe = function(articles_arr, fields){
|
||||||
|
var self = this;
|
||||||
if ( !fields || fields.length < 1 ) {
|
if ( !fields || fields.length < 1 ) {
|
||||||
return _.uniq(articles_arr);
|
return _.uniq(articles_arr);
|
||||||
} else {
|
} else {
|
||||||
@ -273,8 +273,9 @@ RssBraider.prototype.dedupe = function(articles_arr, fields){
|
|||||||
// it's unique
|
// it's unique
|
||||||
deduped_articles.push(article);
|
deduped_articles.push(article);
|
||||||
} else {
|
} else {
|
||||||
// The article matched all of another article's fields
|
// The article matched all of another article's "dedupe" fields
|
||||||
// Do nothing
|
// so filter it out (i.e. do nothing)
|
||||||
|
self.logger.debug("skipping duplicate", '"' + article.title + '"', article.guid);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return deduped_articles;
|
return deduped_articles;
|
||||||
|
@ -59,6 +59,7 @@ test('de-duplicate feed', function(t) {
|
|||||||
plugins_directories : [__dirname + '/../examples/plugins/']
|
plugins_directories : [__dirname + '/../examples/plugins/']
|
||||||
};
|
};
|
||||||
var rss_braider = RssBraider.createClient(braider_options);
|
var rss_braider = RssBraider.createClient(braider_options);
|
||||||
|
rss_braider.logger.level('info');
|
||||||
|
|
||||||
rss_braider.processFeed('sample_feed', 'rss', function(err, data){
|
rss_braider.processFeed('sample_feed', 'rss', function(err, data){
|
||||||
if (err) {
|
if (err) {
|
||||||
@ -133,7 +134,6 @@ test('filter all articles out using plugin', function(t) {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
test("Don't break when a filter fails and returns null", function(t) {
|
test("Don't break when a filter fails and returns null", function(t) {
|
||||||
t.plan(1);
|
t.plan(1);
|
||||||
var feeds = {};
|
var feeds = {};
|
||||||
|
Loading…
Reference in New Issue
Block a user