var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require( 'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require( 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), 'utf8'), feeds = {}; var RSS = require('rss'); var log4js = require('log4js'); var logger = log4js.getLogger(); const stopwords = require('stopwords-en'); var striptags = require('striptags'); const jsonfile = require('jsonfile'); let strippedArray = []; let corpus = {}; let sources = [ { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss', 'page': 'https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652' }, { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/C7B25D86D0844A.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/m7dp711z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/pfvf7o7z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/lluqnt8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/tu33qt8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/u3btnz8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062' }, { 'count': 100, 'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169' }, { 'count': 100, 'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029' }, { 'count': 100, 'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105' }, { 'count': 100, 'feed_url': 'http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall' }, { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL' }, { 'count': 100, 'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml' } ]; sources = [ { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/C7B25D86D0844A.rss' }, { 'count': 100, 'feed_url': 'http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/m7dp711z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/pfvf7o7z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/lluqnt8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/tu33qt8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/u3btnz8z2r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml' }, { 'count': 100, 'feed_url': 'http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml' } ]; sources = [{ 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2FBAEBF3BDF82B8FEF.rss' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D47820652' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2F6DA9769BA89834AA.rss'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2FEDF47BEA6B31EF.rss' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2F3CAD044BEF2BFA.rss'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2FC7B25D86D0844A.rss' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2F64A3EEF615FA4C.rss'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21564698' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21564712' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21942123' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33166238' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34888173' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fm7dp711z2r.xml'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fpfvf7o7z2r.xml' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Flluqnt8z2r.xml'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Ftu33qt8z2r.xml' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fu3btnz8z2r.xml'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33256062' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33450169' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34517029' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34888105' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.technojobs.co.uk%2Frss.php%2Fglasgow%2Fsearchtypeand%2FlocationScotland%2Fsortbyrelevant%2Fjobtypeall' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2F6FC7E9ED5F042ECB.rss'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DLondon%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DCO_LONDON%26compare_search%3DLondon%26search_emp_mkt_cd%3DALL' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DGlasgow%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DTO_G1_GLASGOW%26compare_search%3DGlasgow%26search_emp_mkt_cd%3DALL' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26search_emp_mkt_cd%3DALL' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DGermany%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DCY_GERMANY%26compare_search%3DGermany%26search_emp_mkt_cd%3DALL' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fb1d7e6c3a9a11964z3r.xml'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2FCA49421A86CA3F74.rss' }, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fddeded091b6f6d33z3r.xml'}, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2F3eafc1ea20f1ca02z3r.xml' }, { 'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2C%2520asp%2C%2520web%2C%2520sql%2C%2520delphi%2C%2520vb%2C%2520vbscript%2C%2520php%2C%2520ajax%2C%2520mysql%2C%2520sqlserver%2C%2520javascript%2C%2520intranet%2C%2520vmware%2C%2520virtulization%26location_include%3DAbu%2520Dhabi%26compare_resolved%3DRE_ABUDHABI_UNITEDARABEMIRATES%26compare_search%3DAbu%2520Dhabi%26jobtype%3DX%26search_emp_mkt_cd%3DALL' }]; // Pull feeds from config files: // feeds.simple_test_feed = require("./config/feed").feed; // Or define in-line feeds.simple_test_feed = { 'feed_name': 'feed', 'default_count': 1, 'no_cdata_fields': [], // Don't wrap these fields in CDATA tags 'plugins': [ 'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only' /*'filter_location', 'filter_today_only'*/ ], 'meta': { 'title': 'Jobs', 'description': 'Combined Jobs Feed', 'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml' }, 'sources': sources }; var braider_options = { feeds: feeds, indent: ' ', date_sort_order: 'desc', // Newest first log_level: 'debug', dedupe_fields: ['link', 'guid'], plugins_directories: [__dirname + '/plugins/'] }; console.log('Working..'); var rss_braider = RssBraider.createClient(braider_options); // Override logging level (debug, info, warn, err, off) rss_braider.logger.level('error'); function doFeeds() { console.log('Doing feeds...'); rss_braider.processFeed('simple_test_feed', 'json', function (err, data) { console.log('Moving on..'); if (err) { return console.log(err); } let j = JSON.parse(data); j = processJson(j); /*for (let mm in j) { console.log(mm); }*/ let ejsOutput = ejs.compile(str)(j); fs.writeFile(__dirname + '/dist/' + outputFile + '.html', ejsOutput, function (err) { if (err) { return console.log(err); } console.log('The file was saved!'); }); fs.writeFile(__dirname + '/dist/' + outputFile + '.json', data, function (err) { if (err) { logger.error(err); return console.log(err); } console.log('The file was saved!'); }); saveCorpus(); let newfeed = new RSS({title: 'Jobs Corpus'}, j.items); // console.log(newfeed); fs.writeFile(__dirname + '/dist/' + outputFile + '.xml', newfeed.xml(' '), function (err) { if (err) { return console.log(err); } console.log('The XML file was saved!'); }); // }); } function scoreEntry(s) { let score = 0; let used = []; for (let t of s) { //console.log(`processing: ${t}`); if (!corpus.hasOwnProperty(t)) { corpus[t] = {score: 0, common: 1}; } else { corpus[t].common++; if (used.indexOf(t) === -1) { score = score + corpus[t].score; used.push(t); } } } return score; } function processJson(j) { // console.log(j); console.log('Processing...'); for (let item of j.items) { let description = striptags(item.description); description = description.replace(/( )/ig, ' '); let stripped = getNoneStopWords(description); //console.log(stripped); let score = scoreEntry(stripped); console.log(`Score ${score}`); let scoreText = `