RSS-13: Dedupe code and better sorting

This commit is contained in:
Kip Gebhardt 2015-01-28 16:53:54 -08:00
parent 9066644250
commit ed547b947b
4 changed files with 111 additions and 26 deletions

View File

@ -6,7 +6,8 @@ var FeedParser = require('feedparser'),
async = require('async'),
request = require('request'),
RSS = require('rss'),
fs = require('fs');
fs = require('fs'),
util = require('util'); // DEBUG
var logger;
@ -14,6 +15,8 @@ var RssBraider = function (options) {
this.feeds = options.feeds || null; // TOOD validate feed configs
this.logger = logger = options.logger || bunyan.createLogger({name: 'rss-braider'});
this.indent = options.indent || " ";
this.dedupe_fields = options.dedupe_fields || [];
this.date_sort_order = options.date_sort_order || "desc";
};
RssBraider.prototype.init = function() {
@ -99,7 +102,7 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
feedparser.on("end", function(){
// sort and de-dupe this feed's articles and push them into array
source_articles = self.dedupe(source_articles);
source_articles = self.dedupe(source_articles, self.dedupe_fields);
source_articles = self.date_sort(source_articles);
source_articles = source_articles.slice(0, count);
feed_articles = feed_articles.concat(source_articles);
@ -111,10 +114,9 @@ RssBraider.prototype.processFeed = function(feed_name, format, callback)
logger.error(err);
return callback(err);
} else {
// Sort the stories for the source by date descending
feed_articles = self.dedupe(feed_articles);
// Final Dedupe step and resort
feed_articles = self.dedupe(feed_articles, self.dedupe_fields);
feed_articles = self.date_sort(feed_articles);
feed_articles.reverse();
// Create new feed with these articles
var options = {
@ -254,16 +256,48 @@ RssBraider.prototype.processItem = function (item) {
};
// Dedupe articles in node-rss itemOptions format
RssBraider.prototype.dedupe = function(articles_arr){
// TODO: sort by guid, url, etc
return _.uniq(articles_arr);
// Accepts an array of fields to dedupe on, or does a basic uniq
// operation on the articles array
RssBraider.prototype.dedupe = function(articles_arr, fields){
// logger.info("dedupe fields", fields);
if ( !fields || fields.length < 1 ) {
return _.uniq(articles_arr);
} else {
var uniques = {},
deduped_articles = [];
articles_arr.forEach(function(article){
var count = 0;
fields.forEach(function(field){
if (!uniques[field]) {
uniques[field] = [];
}
if (uniques[field].indexOf(article[field]) !== -1) {
count++;
} else {
uniques[field].push(article[field]);
}
});
if (count !== fields.length) {
// it's unique
deduped_articles.push(article);
} else {
// The article matched all of another article's fields
// Do nothing
}
});
return deduped_articles;
}
};
// Could be a plugin
// TODO: Could be a plugin
// Sort articles by date
RssBraider.prototype.date_sort = function(articles_arr) {
var sorted_articles = _.sortBy(articles_arr, function(article) {
return article.date.getTime();
});
if (this.date_sort_order === "desc") {
sorted_articles.reverse();
}
return sorted_articles;
};

View File

@ -7,20 +7,49 @@
<generator>rss-braider</generator>
<lastBuildDate>Wed, 31 Dec 2014 00:00:01 GMT</lastBuildDate>
<item>
<title><![CDATA[Rent Hike For Dance Mission Theater Has Artists Worried About Uncertain Future]]></title>
<description>&lt;p&gt;Stepping out of BART at 24th and Mission at most hours of the day, one is likely to hear the pulse of African drums, hip-hop or salsa emanating from the second-floor studios of Dance Brigade&apos;s Dance Mission Theater. But that music may not continue forever.&lt;/p&gt;
&lt;p&gt;The performance space and dance school &lt;a href=&quot;http://ww2.kqed.org/news/2014/12/20/dance-mission-theater-rent-increase-worries-artists/&quot; target=&quot;_self&quot; id=&quot;rssmi_more&quot;&gt; ...read more&lt;/a&gt;</description>
<link>http://ww2.kqed.org/news/2014/12/20/dance-mission-theater-rent-increase-worries-artists/</link>
<guid isPermaLink="false">http://ww2.kqed.org/arts/2014/12/20/rent-hike-for-dance-mission-theater-has-artists-worried-about-uncertain-future/</guid>
<dc:creator><![CDATA[KQED Arts]]></dc:creator>
<pubDate>Sat, 20 Dec 2014 09:00:22 GMT</pubDate>
<content:encoded><![CDATA[<p>Stepping out of BART at 24th and Mission at most hours of the day, one is likely to hear the pulse of African drums, hip-hop or salsa emanating from the second-floor studios of Dance Brigade&#8217;s Dance Mission Theater. But that music may not continue forever.</p>
<p>The performance space and dance school <a href="http://ww2.kqed.org/news/2014/12/20/dance-mission-theater-rent-increase-worries-artists/" target="_self" id="rssmi_more"> &#8230;read more</a>
<p>Source:: <a href="http://ww2.kqed.org/news/2014/12/20/dance-mission-theater-rent-increase-worries-artists/" target="_self" title="Rent Hike For Dance Mission Theater Has Artists Worried About Uncertain Future">Arts News</a></p>
<title><![CDATA[Top 10 Movie Moments of 2014]]></title>
<description>A handful of scenes resonate from a mediocre year for film.</description>
<link>http://ww2.kqed.org/arts/2014/12/26/top-10-movie-moments-of-2014/</link>
<guid isPermaLink="false">http://ww2.kqed.org/arts/?p=10222283</guid>
<dc:creator><![CDATA[Michael Fox]]></dc:creator>
<pubDate>Fri, 26 Dec 2014 14:00:18 GMT</pubDate>
<content:encoded><![CDATA[<p>The year just ending wasnt a terrible one for movies, but it will be remembered as depressingly uninspired. Earnest craftsmanship is the mantra of the moment, particularly in the risk-phobic American cinema. Consider the siege weve endured (especially since Labor Day) of serious, solid movies &#8212; including <em>Gone Girl</em>, <em>Foxcatcher</em>, <em>Exodus: Gods and Kings</em>, <em>American Sniper,</em> <em>The Imitation Game</em>, <em>Unbroken</em>, <em>A Most Violent Year</em> and <em>Still Alice &#8212; </em>that demanded our attention for long hours and repaid us with the briefest flashes of transcendent joy, insight or pathos.</p>
<p>It was no easy task summoning memorable moments from the years morass, yet here in chronological order are the sequences that, for me, best captured the vitality and intelligent power that movies are capable of expressing.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/ida2.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/ida2.jpg" alt="ida2" width="640" height="420" class="alignright size-full wp-image-10238620" /></a></p>
<h3>Ida</h3>
<p>Polish-born, English-based writer-director Pawel Pawlikowski returned to his birthplace to make a stark black-and-white moral tale that was not only set in the early 1960s, but designed to look and feel like a movie from that period. Pawlikowski cast a non-professional as a blank-faced, convent-raised young woman on the verge of taking her vows who is first sent to meet the aunt she didnt know she had. The women consequently embark on a road trip to a nightmare past and potentially freeing future. Among countless haunting sequences in this profound, stripped-down movie, I see the aunt &#8212; brilliantly depicted by Agata Kulesz as a ruthlessly idealistic and savagely disappointed Communist long mired in 100 proof cynicism &#8212; lighting a cigarette in a bare-bones restaurant and scoping out a nearby male with all the warmth and empathy of a Siberian wolf.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/leweekend.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/leweekend.jpg" alt="leweekend" width="640" height="399" class="alignright size-full wp-image-10238621" /></a></p>
<h3>Le Week-End</h3>
<p>Leading roles for women were in short supply (so whats new?), especially for actresses of a certain age. Hanif Kureishis screenplay about a long-married and palpably frustrated British couple channeling happier days and looking for lost magic in Paris paired the astonishing Lindsay Duncan with national treasure Jim Broadbent. Duncan is a delicious revelation, by turns scathing and rambunctious, flirty and brutally direct. Her playing of a restaurant scene with Broadbent, especially after the shockingly large check arrives, was one of the years high points.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/strangelove.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/strangelove.jpg" alt="strangelove" width="640" height="397" class="alignright size-full wp-image-10238636" /></a></p>
<h3>Love Is Strange</h3>
<p>Another older couple, the newly married gay men portrayed by Alfred Molina and John Lithgow, supplies the heart and soulfulness of Ira Sachs endearing yet rigorously unsentimental family drama. You may relish conflict in movies; I savor unexpected moments of connection and tenderness. <em>Love is Strange</em> gives us a precious handful, notably a late-night conversation in which Lithgows usually oblivious character offers encouragement &#8212; and conveys some understanding &#8212; to the justifiably resentful teenager compelled by circumstances to share his personal space (i.e., his bunk bed) with an much older gay relative.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/boyhood.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/boyhood.jpg" alt="boyhood" width="640" height="449" class="alignright size-full wp-image-10238742" /></a></p>
<h3>Boyhood</h3>
<p>My first three choices suggest that I identify more with older characters each passing year. In my defense, how could you connect with the bland, blank slate that Richard Linklater chose as the focus of his lengthy, superficial opus? Consequently, the moments I most vividly recall involve Ethan Hawke. Linklaters decision to use the same actors over a decade-plus of filming produce some unique results &#8212; hence <em>Boyhood</em>s inclusion on this list &#8212; but the film has surprisingly little to say about the way this childs passions and values were influenced by his family and society. For a coming-of-age story with exceptional character insight that also punches you in the gut, revisit Francois Truffauts <em>The 400 Blows</em>.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/calvary2.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/calvary2.jpg" alt="calvary2" width="640" height="449" class="alignright size-full wp-image-10238750" /></a></p>
<h3>Calvary</h3>
<p>The gripping opening scene of John Michael McDonaghs existential Irish mortality play comprises a single, static shot of a priests face in the confession booth as he listens to an unidentified parishioner promise to kill him the following Sunday. Brendan Gleesons hulking yet ambivalent portrayal &#8212; in complete partnership with McDonaghs literate, grown-up script &#8212; carries the scene and the movie into dark, rich places.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/chelseagirls.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/chelseagirls.jpg" alt="chelseagirls" width="640" height="231" class="alignright size-full wp-image-10238749" /></a></p>
<h3>Chelsea Girls</h3>
<p>Revived by San Francisco Cinematheque at the Castro in November, Andy Warhols 1966 double projection, three-and-a half-hour quasi-fictional portrait of denizens of New Yorks Chelsea Hotel was one of the weirder pleasures of the year. The parade of fanatically long takes was quintessentially Warholian in that the interminable moments were as central to Warhols conception as the compelling ones. I shall long remember Nico standing in a kitchen endlessly trimming her bangs in a hand mirror (file under Innocence), beloved cult figure and in-person guest Mary Woronov skulking and glowering onscreen (No-method Acting) and Pope Ondine shooting speed and going off on some poor woman (Mania).</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/listenup2.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/listenup2.jpg" alt="listenup2" width="640" height="450" class="alignright size-full wp-image-10238747" /></a></p>
<h3>Listen Up Philip </h3>
<p>Alex Ross Perrys frenetic tale of a self-obsessed young novelist features a relentless performance by Jason Schwartzman as the most insufferable subspecies of educated urban schmuck &#8212; the kind who thinks that being self-aware and owning it somehow redeems his schmuckiness. Perry, wisely recognizing that audiences need a break from this egomaniac, dispatches Philip for a good, long while to follow his erstwhile girlfriend.</p>
<p>Elisabeth Moss (<em>Mad Men</em>) delivers the best pure, concentrated acting to grace a screen this year, most memorably in a sequence where she wordlessly glides through a sequence of four or five emotions in response to a piece of news.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/overnighters.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/overnighters.jpg" alt="overnighters" width="640" height="360" class="alignright size-full wp-image-10238746" /></a></p>
<h3>The Overnighters</h3>
<p>A North Dakota pastor risks alienating his congregation by providing shelter and assistance to the horde of homeless men whove come from all over seeking oil-related jobs in this riveting profile by Bay Area documentary filmmaker Jesse Moss. In a strong year for documentaries (so whats new?), <em>The Overnighters</em> exposed the post-Depression dislocation and desperation that is pervasive yet somehow invisible (at least on television). I have questions about the docs structure and ethics, but theres no denying the unsettling effectiveness of an awkward dinner-table scene with the ministers family.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/selma2.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/selma2.jpg" alt="selma2" width="640" height="360" class="alignright size-full wp-image-10238745" /></a></p>
<h3>Selma</h3>
<p>I am ticked off, to tell you the truth, that Ava DuVernays impeccably mounted and frequently moving reenactment of a pivotal chain of events in the Civil Rights Movement wasnt booked into theaters a month before Election Day. Its all about money, of course: Opening on Christmas Day when children are out of school (and will be for the next week or two) will likely result in better box office than an October run. OK, but if the films goals include <em>making a difference</em> &#8212; well, you get my point. I suppose Ill embrace the silver lining, namely that <em>Annie</em> wont be the only screen representation of black people that white people will see this holiday season.</p>
<p><a href="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/mrturner.jpg"><img src="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/mrturner.jpg" alt="mrturner" width="640" height="384" class="alignright size-full wp-image-10238744" /></a></p>
<h3>Mr. Turner</h3>
<p>A lovingly rendered period piece thats all nuance, shadow and light, Mike Leighs portrait of British painter J.M.W. Turner consists almost entirely of small moments. There are no heart-wrenching revelations or confessions, no knockdown, drag-out fights. So how to choose a defining image from Leighs compositions or Timothy Spalls fully inhabited performance? I cant, except to cite any of the many instances of the rotund, top-heavy Turner walking &#8212; navigating whatever terrain with supreme self-confidence, accepting the labor required without hesitation, oblivious to other people and seeing what only he can see. We feel we have the experience of being privy to a man living his life, not an actor playing a role or following a script. That may or may not be a kind of magic or miracle, but it is transcendent.
<div id='ctx-module' class='ctx-module-container ctx-clearfix'></div>]]></content:encoded>
<wfw:commentRss>http://ww2.kqed.org/arts/2014/12/20/rent-hike-for-dance-mission-theater-has-artists-worried-about-uncertain-future/feed/</wfw:commentRss>
<wfw:commentRss>http://ww2.kqed.org/arts/2014/12/26/top-10-movie-moments-of-2014/feed/</wfw:commentRss>
<slash:comments>0</slash:comments>
<media:thumbnail url="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/10232900-thumb.jpg">
<media:thumbnail url="http://ww2.kqed.org/arts/wp-content/uploads/sites/2/2014/12/ida1-400x299.jpg">
</media:thumbnail>
</item>
</channel>

View File

@ -1,5 +1,3 @@
// prova is a wrapper for tape
// use npm run test:browser to run tests in a browser
var test = require('tape'),
RssBraider = require('../index'),
includeFolder = require('include-folder'),
@ -13,8 +11,9 @@ test('braid feed from file', function(t) {
var feeds = {};
feeds.sample_feed = require("./input_files/sample_feed").feed;
var braider_options = {
feeds : feeds,
indent : " "
feeds : feeds,
indent : " ",
date_sort_order : "desc"
};
var rss_braider = RssBraider.createClient(braider_options);
@ -22,6 +21,28 @@ test('braid feed from file', function(t) {
if (err) {
return t.fail(err);
}
// console.log(data);
t.equal(data, expectedOutput.fileFeedOutput);
});
});
test('deduplicate feed from file', function(t) {
t.plan(1);
var feeds = {};
feeds.sample_feed = require("./input_files/sample_feed_duplicates").feed;
var braider_options = {
feeds : feeds,
indent : " ",
dedupe_fields : ["title", "guid"]
};
var rss_braider = RssBraider.createClient(braider_options);
rss_braider.processFeed('sample_feed', 'rss', function(err, data){
if (err) {
return t.fail(err);
}
// console.log(data);
t.equal(data, expectedOutput.fileFeedOutput);
});
});

View File

@ -20,7 +20,8 @@
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<generator>http://wordpress.org/?v=4.0.1</generator>
<atom:link rel="hub" href="http://pubsubhubbub.appspot.com"/><atom:link rel="hub" href="http://pubsubhubbub.superfeedr.com"/> <item>
<atom:link rel="hub" href="http://pubsubhubbub.appspot.com"/><atom:link rel="hub" href="http://pubsubhubbub.superfeedr.com"/>
<item>
<title>Top 10 Movie Moments of 2014</title>
<link>http://ww2.kqed.org/arts/2014/12/26/top-10-movie-moments-of-2014/</link>
<comments>http://ww2.kqed.org/arts/2014/12/26/top-10-movie-moments-of-2014/#comments</comments>