Better filtering

This commit is contained in:
unknown 2015-07-23 11:37:04 +01:00
parent 1b1364e71f
commit 98901a09c5
9 changed files with 1472 additions and 650 deletions

6
.gitignore vendored
View File

@ -1 +1,7 @@
/node_modules/
/html/jobs-local.html
/html/jobs-local.xml
/html/lifestyle.xml
/html/paleo.html
/html/paleo.json
/html/paleo.xml

File diff suppressed because one or more lines are too long

View File

@ -14,7 +14,7 @@ feeds.simple_test_feed = {
"feed_name" : "feed",
"default_count" : 1,
"no_cdata_fields" : [], // Don't wrap these fields in CDATA tags
"plugins" : [ ],
"plugins" : ['filter_today_only' ],
"meta" : {
"title": "Jobs",
"description": "Combined Jobs Feed",
@ -204,7 +204,8 @@ var braider_options = {
indent: " ",
date_sort_order: "desc", // Newest first
log_level: "warn",
dedupe_fields: ['link', 'guid']
dedupe_fields: ['link', 'guid'],
plugins_directories : [__dirname + "/plugins/"]
};
var rss_braider = RssBraider.createClient(braider_options);

View File

@ -19,6 +19,7 @@ feeds.simple_test_feed = {
"description": "Combined Lifestyle Feed",
'site_url': 'http://pipes.silvrtree.co.uk/lifestyle.xml'
},
"plugins" : ['filter_last_week' ],
"sources" : [
{
@ -195,7 +196,8 @@ var braider_options = {
indent: " ",
date_sort_order: "desc", // Newest first
log_level: "warn",
dedupe_fields: ['link', 'guid']
dedupe_fields: ['link', 'guid'],
plugins_directories : [__dirname + "/plugins/"]
};
var rss_braider = RssBraider.createClient(braider_options);

View File

@ -21,47 +21,13 @@ feeds.simple_test_feed = {
"description": "Combined Paleo Feed",
'site_url':'http://pipes.silvrtree.co.uk/paleo.xml'
},
"plugins" : ['capitalize_title','filter_last_week'],
"sources" : [
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://feeds.feedburner.com/PaleoPlan"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://nomnompaleo.com/rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://thepaleodiet.com/feed/"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://paleoleap.com/feed/"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://ultimatepaleoguide.com/feed/"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://elanaspantry.com/feed/"
}
,
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://feeds.feedburner.com/Paleomg-PaleoRecipes"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://civilizedcavemancooking.com/feed/"
}
]
};
@ -69,13 +35,15 @@ var braider_options = {
feeds : feeds,
indent : " ",
date_sort_order : "desc", // Newest first
log_level : "warn",
dedupe_fields : ['link','guid']
log_level : "debug",
dedupe_fields : ['link','guid'],
plugins_directories : [__dirname + "/plugins/"],
};
var rss_braider = RssBraider.createClient(braider_options);
// Override logging level (debug, info, warn, err, off)
rss_braider.logger.level('off');
rss_braider.logger.level('debug');
// Output braided feed as rss. use 'json' for JSON output.
//rss_braider.processFeed('simple_test_feed', 'rss', function(err, data){
@ -87,7 +55,7 @@ rss_braider.logger.level('off');
var j = JSON.parse(data);
j.items.forEach(function (obj, index) {
/* j.items.forEach(function (obj, index) {
// console.log(obj); // logs "3", "5", "7"
// console.log(index); // logs "0", "1", "2"
@ -104,7 +72,7 @@ rss_braider.logger.level('off');
console.log('- - -');
}
});
*/
// var ejsOutput = ejs.compile(str)(j);
//console.log(j);

View File

@ -20,6 +20,7 @@ feeds.simple_test_feed = {
"description": "Combined Paleo Feed",
'site_url': 'http://pipes.silvrtree.co.uk/paleo.xml'
},
"plugins" : ['filter_last_week' ],
"sources": [
{
/* "name" : "JobServe",*/
@ -69,7 +70,8 @@ var braider_options = {
indent: " ",
date_sort_order: "desc", // Newest first
log_level: "warn",
dedupe_fields: ['link', 'guid']
dedupe_fields: ['link', 'guid'],
plugins_directories : [__dirname + "/plugins/"]
};
var rss_braider = RssBraider.createClient(braider_options);

View File

@ -0,0 +1,19 @@
module.exports = function (item, itemOptions, source) {
function inDays(d1, d2) {
var t2 = d2.getTime();
var t1 = d1.getTime();
return parseInt((t2-t1)/(24*3600*1000));
}
console.log(itemOptions.date);
var now = new Date();
var then = new Date(itemOptions.date);
var d = inDays(then,now);
console.log(d);
return itemOptions;
};

View File

@ -0,0 +1,18 @@
module.exports = function (item, itemOptions, source) {
function inDays(d1, d2) {
var t2 = d2.getTime();
var t1 = d1.getTime();
return parseInt((t2-t1)/(24*3600*1000));
}
var now = new Date();
var then = new Date(itemOptions.date);
var d = inDays(then,now);
if (d === 0)
return itemOptions;
else
return -1;
};

136
test.js Normal file
View File

@ -0,0 +1,136 @@
var RssBraider = require('rss-braider'),
fs = require('fs'),
ejs = require('ejs'),
read = require('fs').readFileSync,
join = require('path').join,
str = read(join(__dirname, '/templates/rss.ejs'), 'utf8'),
sqlite3 = require('sqlite3').verbose(),
feeds = {};
var db = new sqlite3.Database(__dirname + '/images.db');
// Pull feeds from config files: ,
// feeds.simple_test_feed = require("./config/feed").feed;
// Or define in-line
feeds.simple_test_feed = {
"feed_name" : "test",
"default_count" : 1,
"no_cdata_fields" : [], // Don't wrap these fields in CDATA tags
"meta" : {
"title": "test",
"description": "test",
'site_url':'http://pipes.silvrtree.co.uk/test.xml'
},
"plugins" : ['capitalize_title','filter_last_week'],
"sources" : [
{
"count": 5,
"feed_url": "http://feeds.bbci.co.uk/news/uk/rss.xml"
}
]
};
var braider_options = {
feeds : feeds,
indent : " ",
date_sort_order : "desc", // Newest first
log_level : "debug",
dedupe_fields : ['link','guid'],
plugins_directories : [__dirname + "/plugins/"],
};
var rss_braider = RssBraider.createClient(braider_options);
// Override logging level (debug, info, warn, err, off)
rss_braider.logger.level('debug');
// Output braided feed as rss. use 'json' for JSON output.
//rss_braider.processFeed('simple_test_feed', 'rss', function(err, data){
rss_braider.processFeed('simple_test_feed', 'json', function(err, data){
if (err) {
return console.log(err);
}
var j = JSON.parse(data);
/* j.items.forEach(function (obj, index) {
// console.log(obj); // logs "3", "5", "7"
// console.log(index); // logs "0", "1", "2"
var desc = obj.description;
var imgRegEx = /<img[^>]+src="([^">]+)"/;
var myArray = imgRegEx.exec(desc) || [];
if (myArray.length > 0)
{
console.log('Length: ' + myArray.length);
console.log(myArray[0]);
console.log(myArray[1]);
console.log('- - -');
}
});
*/
// var ejsOutput = ejs.compile(str)(j);
//console.log(j);
// console.log(ejsOutput);
});
/*
// grabbing images
<img[^>]+src="([^">]+)"
var fs = require('fs');
var request = require('request');
// Or with cookies
// var request = require('request').defaults({jar: true});
request.get({url: 'https://someurl/somefile.torrent', encoding: 'binary'}, function (err, response, body) {
fs.writeFile("/tmp/test.torrent", body, 'binary', function(err) {
if(err)
console.log(err);
else
console.log("The file was saved!");
});
});
*/
var DateDiff = {
inDays: function(d1, d2) {
var t2 = d2.getTime();
var t1 = d1.getTime();
return parseInt((t2-t1)/(24*3600*1000));
},
inWeeks: function(d1, d2) {
var t2 = d2.getTime();
var t1 = d1.getTime();
return parseInt((t2-t1)/(24*3600*1000*7));
},
inMonths: function(d1, d2) {
var d1Y = d1.getFullYear();
var d2Y = d2.getFullYear();
var d1M = d1.getMonth();
var d2M = d2.getMonth();
return (d2M+12*d2Y)-(d1M+12*d1Y);
},
inYears: function(d1, d2) {
return d2.getFullYear()-d1.getFullYear();
}
}