Pulled out custom processing into a plugin system. Register plugins on the feed config object and they get run automatically.

This commit is contained in:
Kip Gebhardt 2015-01-30 16:07:48 -08:00
parent cd3d3fd94d
commit bc9c889992
9 changed files with 160 additions and 102 deletions

View File

@ -2,6 +2,7 @@ var feed = {
"feed_name" : "test file feed",
"default_count" : 1,
"no_cdata_fields" : ['description'],
"plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'],
"meta" : {
"title": "Test File Feed",
"description": "This feed comes from a file",

View File

@ -6,43 +6,63 @@ var FeedParser = require('feedparser'),
async = require('async'),
request = require('request'),
RSS = require('rss'),
fs = require('fs'),
util = require('util'); // DEBUG
fs = require('fs');
var logger;
var RssBraider = function (options) {
this.feeds = options.feeds || null; // TOOD validate feed configs
this.feeds = options.feeds || null;
this.logger = logger = options.logger || bunyan.createLogger({name: 'rss-braider'});
this.indent = options.indent || " ";
this.dedupe_fields = options.dedupe_fields || [];
this.indent = options.indent || "\t";
this.dedupe_fields = options.dedupe_fields || []; // The fields to use to identify duplicate articles
this.date_sort_order = options.date_sort_order || "desc";
// load plugins from plugins folder
// TODO, specify plugins location
this.plugins = {};
this.loadPlugins();
};
RssBraider.prototype.init = function() {
// Validate the feeds?
// loadup self.plugins with the plugin functions
RssBraider.prototype.loadPlugins = function () {
var self = this,
path = __dirname + '/plugins',
filenames = fs.readdirSync(path);
// load up each file and assign it to the plugins
filenames.forEach(function(filename){
var plugin_name = filename.replace(/.js$/, '');
self.plugins[plugin_name] = require(path + '/' + plugin_name);
// logger.info("plugin loaded:", plugin_name);
});
};
RssBraider.prototype.feedExists = function (feed_name) {
if (this.feeds && this.feeds[feed_name]) {
return true;
} else {
return false;
}
};
// RssBraider.prototype.feedExists = function (feed_name) {
// if (this.feeds && this.feeds[feed_name]) {
// return true;
// } else {
// return false;
// }
// };
// Gather data from all feed sources, process each article/item through plugins,
// trim down to desired count, dedupe and sort
RssBraider.prototype.processFeed = function(feed_name, format, callback)
{
// DEBUG
console.time("process");
if (!format) {
format = 'json';
format = 'rss';
}
var self = this,
feed = this.feeds[feed_name],
feed_articles = [];
// set these for the request
self.feed_name = feed_name;
self.format = format;
self.feed = feed;
// logger.info("DEBUG processFeed: feed is set to " + feed_name);
if (!feed || !feed.sources || feed.sources.length < 1) {
return callback("No definition for feed name: " + feed_name);
}
@ -153,6 +173,8 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
// Accepts a feed-parser item and builds a node-rss itemOptions object
RssBraider.prototype.processItem = function (item) {
var self = this;
if (!item) {
logger.error("processItem: no item passed in");
return null;
@ -170,96 +192,33 @@ RssBraider.prototype.processItem = function (item) {
custom_elements : []
};
//////////////////
// Custom elements Move to PLUGINS
//////////////////
// kqed source
if (item.source_url) {
itemOptions.custom_elements.push(
{ 'kqed:source': item.source_url }
);
}
// content:encoded (i.e. description)
if (item["content:encoded"] && item["content:encoded"]["#"]){
var content_encoded = item["content:encoded"]["#"];
itemOptions.custom_elements.push(
{ "content:encoded":
{
_cdata: content_encoded
}
}
);
}
// // wfw
if (item["wfw:commentrss"] && item["wfw:commentrss"]["#"]){
itemOptions.custom_elements.push({ "wfw:commentRss": item["wfw:commentrss"]["#"]});
}
// // // slash comments
if (item["slash:comments"] && item["slash:comments"]["#"]){
itemOptions.custom_elements.push({ "slash:comments": item["slash:comments"]["#"]});
}
// Images
// Take 'media:thumbnail',
// else
// 'media:content'[0]'media:thumbnail'
// else
// 'media:thumbnail'
var thumbnail;
if (item['media:thumbnail'] && item['media:thumbnail']['#']) {
thumbnail = {
'media:thumbnail': item['media:thumbnail']['#']
};
itemOptions.custom_elements.push(thumbnail);
} else {
if (item["media:content"]) {
var media_contents;
if (! _.isArray(item['media:content'])) {
media_contents = [item['media:content']];
} else {
media_contents = item['media:content'];
}
if ( media_contents[0] &&
media_contents[0]['media:thumbnail'] &&
media_contents[0]['media:thumbnail']['@'] &&
media_contents[0]['media:thumbnail']['@'].url) {
thumbnail = {
'media:thumbnail' : [{
_attr: {
url: media_contents[0]['media:thumbnail']['@'].url
}
}]
};
// itemOptions.custom_elements.push({'media:thumbnail' : { url: media_contents[0]['media:thumbnail']['@'].url}} );
itemOptions.custom_elements.push(thumbnail);
} else {
thumbnail = {
'media:thumbnail' : [{
_attr: {
url: media_contents[0]['@'].url
}
}]
};
itemOptions.custom_elements.push(thumbnail);
}
}
}
// Run the plugins specified by the "plugins" section of the
// feed config file to build out any custom elements or
// do transforms
self.runPlugins(item, itemOptions);
return itemOptions;
};
RssBraider.prototype.runPlugins = function (item, itemOptions) {
var self = this,
feed_plugins = self.feed.plugins || [];
// Process the item through the desired feed plugins
feed_plugins.forEach(function(plugin_name){
if (self.plugins[plugin_name]) {
// logger.info("DEBUG runPlugins running " + plugin_name);
self.plugins[plugin_name](item, itemOptions);
} else {
logger.error("A plugin named '" + plugin_name + "' hasn't been registered");
}
});
};
// Dedupe articles in node-rss itemOptions format
// Accepts an array of fields to dedupe on, or does a basic uniq
// operation on the articles array
RssBraider.prototype.dedupe = function(articles_arr, fields){
// logger.info("dedupe fields", fields);
if ( !fields || fields.length < 1 ) {
return _.uniq(articles_arr);
} else {

View File

@ -0,0 +1,54 @@
// Add a media:thumbnail element
// Take 'media:thumbnail',
// else
// 'media:content'[0]'media:thumbnail'
// else
// 'media:thumbnail'
var _ = require('lodash');
module.exports = function (item, itemOptions) {
if (!item || !itemOptions) {
return;
}
var thumbnail;
if (item['media:thumbnail'] && item['media:thumbnail']['#']) {
thumbnail = {
'media:thumbnail': item['media:thumbnail']['#']
};
itemOptions.custom_elements.push(thumbnail);
} else {
if (item["media:content"]) {
var media_contents;
if (! _.isArray(item['media:content'])) {
media_contents = [item['media:content']];
} else {
media_contents = item['media:content'];
}
if ( media_contents[0] &&
media_contents[0]['media:thumbnail'] &&
media_contents[0]['media:thumbnail']['@'] &&
media_contents[0]['media:thumbnail']['@'].url) {
thumbnail = {
'media:thumbnail' : [{
_attr: {
url: media_contents[0]['media:thumbnail']['@'].url
}
}]
};
// itemOptions.custom_elements.push({'media:thumbnail' : { url: media_contents[0]['media:thumbnail']['@'].url}} );
itemOptions.custom_elements.push(thumbnail);
} else {
thumbnail = {
'media:thumbnail' : [{
_attr: {
url: media_contents[0]['@'].url
}
}]
};
itemOptions.custom_elements.push(thumbnail);
}
}
}
};

View File

@ -0,0 +1,16 @@
module.exports = function (item, itemOptions) {
if (!item || !itemOptions) {
return;
}
// content:encoded (i.e. description)
if (item["content:encoded"] && item["content:encoded"]["#"]){
var content_encoded = item["content:encoded"]["#"];
itemOptions.custom_elements.push(
{ "content:encoded":
{
_cdata: content_encoded
}
}
);
}
};

11
lib/plugins/kqed.js Normal file
View File

@ -0,0 +1,11 @@
// define kqed source
module.exports = function (item, itemOptions) {
if (!item || !itemOptions) {
return;
}
if (item.source_url) {
itemOptions.custom_elements.push(
{ 'kqed:source': item.source_url }
);
}
};

View File

@ -0,0 +1,14 @@
module.exports = function (item, itemOptions) {
if (!item || !itemOptions) {
return;
}
// wfw
if (item["wfw:commentrss"] && item["wfw:commentrss"]["#"]){
itemOptions.custom_elements.push({ "wfw:commentRss": item["wfw:commentrss"]["#"]});
}
// // // slash comments
if (item["slash:comments"] && item["slash:comments"]["#"]){
itemOptions.custom_elements.push({ "slash:comments": item["slash:comments"]["#"]});
}
};

View File

@ -2,6 +2,7 @@ var feed = {
"feed_name" : "test file feed",
"default_count" : 1,
"no_cdata_fields" : ['description'],
"plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'],
"meta" : {
"title": "Test File Feed",
"description": "This feed comes from a file",

View File

@ -1,7 +1,8 @@
var feed = {
"feed_name" : "test file feed",
"default_count" : 1,
"no_cdata_fields" : ['description'],
"no_cdata_fields" : ['description'],
"plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'],
"meta" : {
"title": "Test File Feed",
"description": "This feed comes from a file",

View File

@ -2,6 +2,7 @@ var feed = {
"feed_name" : "test file feed",
"default_count" : 1,
"no_cdata_fields" : ['description'],
"plugins" : ['kqed', 'content_encoded', 'wfw_slash_comments', 'add_media_thumbnail'],
"meta" : {
"title": "Test File Feed",
"description": "This feed comes from a file",