var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require( 'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require( 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), 'utf8'), feeds = {}; var RSS = require('rss'); var log4js = require('log4js'); var logger = log4js.getLogger(); const stopwords = require('stopwords-en'); var striptags = require('striptags'); let jsonfile = require('jsonfile'); let strippedArray = []; let corpus = {}; // Pull feeds from config files: // feeds.simple_test_feed = require("./config/feed").feed; // Or define in-line feeds.simple_test_feed = { "feed_name": "feed", "default_count": 1, "no_cdata_fields": [], // Don't wrap these fields in CDATA tags "plugins": [ 'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only' /*'filter_location', 'filter_today_only'*/ ], "meta": { "title": "Jobs", "description": "Combined Jobs Feed", 'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml' }, "sources": [ { "count": 100, "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss", "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml" }, { /* "name" : "JobServe",*/ "count": 100, "feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml" } ] }; var braider_options = { feeds: feeds, indent: " ", date_sort_order: "desc", // Newest first log_level: "warn", dedupe_fields: ['link', 'guid'], plugins_directories: [__dirname + "/plugins/"] }; console.log('Working..'); var rss_braider = RssBraider.createClient(braider_options); // Override logging level (debug, info, warn, err, off) rss_braider.logger.level('error'); function doFeeds() { rss_braider.processFeed('simple_test_feed', 'json', function (err, data) { if (err) { return console.log(err); } var j = JSON.parse(data); j = processJson(j); /*for (let mm in j) { console.log(mm); }*/ var ejsOutput = ejs.compile(str)(j); fs.writeFile(__dirname + "/dist/" + outputFile + ".html", ejsOutput, function (err) { if (err) { return console.log(err); } console.log("The file was saved!"); }); fs.writeFile(__dirname + "/dist/" + outputFile + ".json", data, function (err) { if (err) { logger.error(err); return console.log(err); } console.log("The file was saved!"); }); saveCorpus(); var newfeed = new RSS({}, j.items); // console.log(newfeed); fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", newfeed.xml(' '), function(err) { if (err) { return console.log(err); } console.log("The XML file was saved!"); }); // }); } function scoreEntry(s) { let score = 0; let used = []; for(let t of s) { //console.log(`processing: ${t}`); if (!corpus.hasOwnProperty(t)) { corpus[t] = {score:0, common:1}; } else { corpus[t].common++; if (used.indexOf(t) === -1) { score = score + corpus[t].score; used.push(t); } } } return score; } function processJson(j) { // console.log(j); for (let item of j.items) { console.log(item.title); let description = striptags(item.description); description = description.replace(/( )/ig, ' '); let stripped = getNoneStopWords(description); //console.log(stripped); let score = scoreEntry(stripped); console.log(`Score ${score}`); item.description = `
Score ${score}
` + description; } return j; } function getNoneStopWords(sentence) { let common = getStopWords(); let wordArr = sentence.match(/\w+/g), commonObj = {}, uncommonArr = [], word, i; for (i = 0; i < common.length; i++) { commonObj[ common[i].trim() ] = true; } for (i = 0; i < wordArr.length; i++) { word = wordArr[i].trim().toLowerCase(); if (!commonObj[word]) { uncommonArr.push(word); } } return uncommonArr; } function getStopWords() { return stopwords; } function doFeedsXML() { rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) { if (err) { return console.log(err); } console.log('Saving', __dirname + "/dist/" + outputFile + ".xml"); fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) { if (err) { return console.log(err); } console.log("The file was saved!"); }); }); } function loadCorpus() { let fileName = "./dist/corpus.json"; console.log(`Loading ${fileName}`); jsonfile.readFile(fileName, function(err, obj) { corpus = obj; let entries = Object.keys(corpus).length; console.log(`Corpus loaded (${entries} entries)`); doFeeds(); }); } function saveCorpus() { let fileName = "./dist/corpus.json"; jsonfile.writeFile(fileName, corpus,function(err, obj) { console.error(err); console.log(obj); }); } function go() { loadCorpus(); } go();