Rinser/jobs-corpus.js

530 lines
18 KiB
JavaScript
Raw Normal View History

2017-05-31 14:47:53 +00:00
var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require(
2017-09-11 13:55:11 +00:00
'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require(
'path').join, str = read(join(__dirname, '/templates/rss.ejs'),
'utf8'), feeds = {};
2017-05-31 15:20:56 +00:00
var RSS = require('rss');
2017-05-31 14:47:53 +00:00
var log4js = require('log4js');
var logger = log4js.getLogger();
const stopwords = require('stopwords-en');
var striptags = require('striptags');
2017-06-14 15:14:04 +00:00
const jsonfile = require('jsonfile');
2017-05-31 14:47:53 +00:00
let strippedArray = [];
let corpus = {};
2017-07-19 07:15:22 +00:00
let sources = [
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss',
'page': 'https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/C7B25D86D0844A.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/m7dp711z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/pfvf7o7z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/lluqnt8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/tu33qt8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/u3btnz8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml'
2017-07-19 07:15:22 +00:00
}
];
2017-09-11 11:33:33 +00:00
sources = [
2017-07-19 07:15:22 +00:00
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/C7B25D86D0844A.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/m7dp711z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/pfvf7o7z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/lluqnt8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/tu33qt8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/u3btnz8z2r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml'
2017-07-19 07:15:22 +00:00
},
{
2017-09-11 11:29:20 +00:00
'count': 100,
'feed_url': 'http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml'
2017-07-19 07:15:22 +00:00
}
];
2017-09-11 13:55:11 +00:00
sources = [{
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2FBAEBF3BDF82B8FEF.rss'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D47820652'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2F6DA9769BA89834AA.rss'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2FEDF47BEA6B31EF.rss'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2F3CAD044BEF2BFA.rss'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2FC7B25D86D0844A.rss'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.JobServe.com%2FMySearch%2F64A3EEF615FA4C.rss'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21564698'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21564712'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D21942123'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33166238'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34888173'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fm7dp711z2r.xml'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fpfvf7o7z2r.xml'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Flluqnt8z2r.xml'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Ftu33qt8z2r.xml'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fu3btnz8z2r.xml'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33256062'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D33450169'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34517029'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.purelyit.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26daysback%3D1%26jbe_id%3D34888105'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.technojobs.co.uk%2Frss.php%2Fglasgow%2Fsearchtypeand%2FlocationScotland%2Fsortbyrelevant%2Fjobtypeall'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2F6FC7E9ED5F042ECB.rss'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DLondon%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DCO_LONDON%26compare_search%3DLondon%26search_emp_mkt_cd%3DALL'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DGlasgow%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DTO_G1_GLASGOW%26compare_search%3DGlasgow%26search_emp_mkt_cd%3DALL'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26search_emp_mkt_cd%3DALL'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2520%2C%2520asp%2520%2C%2520web%2520%2C%2520sql%2520%2C%2520delphi%2520%2C%2520vb%2520%2C%2520vbscript%2520%2C%2520php%2520%2C%2520ajax%2520%2C%2520mysql%2520%2C%2520sqlserver%2520%2C%2520javascript%2520%2C%2520intranet%2520%2C%2520vmware%2520%2C%2520virtulization%26location_include%3DGermany%26location_within%3D10%26reqd_salary%3DANY%7C%26daysback%3D7%26scc%3DUK%26compare_resolved%3DCY_GERMANY%26compare_search%3DGermany%26search_emp_mkt_cd%3DALL'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fb1d7e6c3a9a11964z3r.xml'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobserve.com%2FMySearch%2FCA49421A86CA3F74.rss'
}, {'count': 100, 'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2Fddeded091b6f6d33z3r.xml'}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.s1jobs.com%2Fxml%2F3eafc1ea20f1ca02z3r.xml'
}, {
'count': 100,
'feed_url': 'http://52.211.105.9:6566/http%3A%2F%2Fwww.jobsite.co.uk%2Fcgi-bin%2Fadvsearch%3Frss_feed%3D1%26skill_atleast%3Dhtml%2C%2520asp%2C%2520web%2C%2520sql%2C%2520delphi%2C%2520vb%2C%2520vbscript%2C%2520php%2C%2520ajax%2C%2520mysql%2C%2520sqlserver%2C%2520javascript%2C%2520intranet%2C%2520vmware%2C%2520virtulization%26location_include%3DAbu%2520Dhabi%26compare_resolved%3DRE_ABUDHABI_UNITEDARABEMIRATES%26compare_search%3DAbu%2520Dhabi%26jobtype%3DX%26search_emp_mkt_cd%3DALL'
}];
2017-07-19 07:15:22 +00:00
2017-05-31 14:47:53 +00:00
// Pull feeds from config files:
// feeds.simple_test_feed = require("./config/feed").feed;
// Or define in-line
feeds.simple_test_feed = {
2017-09-11 11:29:20 +00:00
'feed_name': 'feed', 'default_count': 1, 'no_cdata_fields': [], // Don't wrap these fields in CDATA tags
'plugins': [
2017-05-31 14:47:53 +00:00
'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only'
/*'filter_location', 'filter_today_only'*/
2017-09-11 11:29:20 +00:00
], 'meta': {
'title': 'Jobs',
'description': 'Combined Jobs Feed',
2017-05-31 14:47:53 +00:00
'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml'
2017-09-11 11:29:20 +00:00
}, 'sources': sources
2017-05-31 14:47:53 +00:00
};
var braider_options = {
feeds: feeds,
2017-09-11 11:29:20 +00:00
indent: ' ',
date_sort_order: 'desc', // Newest first
log_level: 'debug',
2017-05-31 14:47:53 +00:00
dedupe_fields: ['link', 'guid'],
2017-09-11 11:29:20 +00:00
plugins_directories: [__dirname + '/plugins/']
2017-05-31 14:47:53 +00:00
};
console.log('Working..');
var rss_braider = RssBraider.createClient(braider_options);
// Override logging level (debug, info, warn, err, off)
rss_braider.logger.level('error');
2017-05-31 15:20:56 +00:00
function doFeeds() {
2017-06-12 14:51:37 +00:00
console.log('Doing feeds...');
2017-05-31 15:20:56 +00:00
rss_braider.processFeed('simple_test_feed', 'json', function (err, data) {
2017-06-12 14:53:44 +00:00
console.log('Moving on..');
2017-05-31 15:20:56 +00:00
if (err) {
return console.log(err);
}
2017-05-31 14:47:53 +00:00
2017-06-12 14:53:44 +00:00
let j = JSON.parse(data);
2017-05-31 14:47:53 +00:00
2017-05-31 15:20:56 +00:00
j = processJson(j);
/*for (let mm in j) {
console.log(mm);
}*/
2017-06-12 14:53:44 +00:00
let ejsOutput = ejs.compile(str)(j);
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
fs.writeFile(__dirname + '/dist/' + outputFile + '.html',
2017-05-31 15:20:56 +00:00
ejsOutput,
function (err) {
2017-05-31 14:47:53 +00:00
2017-05-31 15:20:56 +00:00
if (err) {
return console.log(err);
}
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
console.log('The file was saved!');
2017-05-31 15:20:56 +00:00
});
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
fs.writeFile(__dirname + '/dist/' + outputFile + '.json',
2017-05-31 15:20:56 +00:00
data,
function (err) {
if (err) {
logger.error(err);
return console.log(err);
}
2017-09-11 11:29:20 +00:00
console.log('The file was saved!');
2017-05-31 15:20:56 +00:00
});
saveCorpus();
2017-09-11 11:29:20 +00:00
let newfeed = new RSS({title: 'Jobs Corpus'}, j.items);
2017-05-31 15:20:56 +00:00
// console.log(newfeed);
2017-09-11 11:29:20 +00:00
fs.writeFile(__dirname + '/dist/' + outputFile + '.xml', newfeed.xml(' '), function (err) {
2017-07-19 07:15:22 +00:00
if (err) {
return console.log(err);
}
2017-05-31 15:20:56 +00:00
2017-09-11 11:29:20 +00:00
console.log('The XML file was saved!');
2017-07-19 07:15:22 +00:00
});
2017-05-31 15:20:56 +00:00
//
});
}
2017-05-31 14:47:53 +00:00
2017-05-31 14:51:17 +00:00
2017-05-31 14:47:53 +00:00
function scoreEntry(s) {
let score = 0;
let used = [];
2017-09-11 11:29:20 +00:00
for (let t of s) {
2017-07-19 07:15:22 +00:00
//console.log(`processing: ${t}`);
if (!corpus.hasOwnProperty(t)) {
2017-09-11 11:29:20 +00:00
corpus[t] = {score: 0, common: 1};
2017-07-19 07:15:22 +00:00
} else {
corpus[t].common++;
if (used.indexOf(t) === -1) {
score = score + corpus[t].score;
used.push(t);
2017-05-31 14:47:53 +00:00
}
2017-07-19 07:15:22 +00:00
}
}
return score;
2017-05-31 14:47:53 +00:00
}
2017-09-11 11:29:20 +00:00
2017-05-31 14:47:53 +00:00
function processJson(j) {
2017-05-31 14:55:01 +00:00
// console.log(j);
2017-06-12 14:51:37 +00:00
console.log('Processing...');
2017-05-31 14:47:53 +00:00
for (let item of j.items) {
let description = striptags(item.description);
2017-05-31 15:39:06 +00:00
description = description.replace(/( )/ig, ' ');
2017-05-31 14:47:53 +00:00
let stripped = getNoneStopWords(description);
//console.log(stripped);
let score = scoreEntry(stripped);
2017-06-12 14:51:37 +00:00
console.log(`Score ${score}`);
2017-05-31 15:45:43 +00:00
let scoreText = `<div style='color:red;weight:900'>Score ${score}</div>`;
let linkText = `<div><a href='${item.url}'>Link</a></div>`;
2017-05-31 15:49:11 +00:00
item.description = scoreText + description;
2017-05-31 14:47:53 +00:00
}
return j;
}
2017-09-11 11:29:20 +00:00
2017-05-31 14:47:53 +00:00
function getNoneStopWords(sentence) {
2017-07-19 07:15:22 +00:00
let common = getStopWords();
let wordArr = sentence.match(/\w+/g),
commonObj = {},
uncommonArr = [],
word, i;
for (i = 0; i < common.length; i++) {
2017-09-11 11:29:20 +00:00
commonObj[common[i].trim()] = true;
2017-07-19 07:15:22 +00:00
}
2017-05-31 14:47:53 +00:00
2017-07-19 07:15:22 +00:00
for (i = 0; i < wordArr.length; i++) {
word = wordArr[i].trim().toLowerCase();
if (!commonObj[word]) {
uncommonArr.push(word);
2017-05-31 14:47:53 +00:00
}
2017-07-19 07:15:22 +00:00
}
return uncommonArr;
}
2017-05-31 14:47:53 +00:00
2017-07-19 07:15:22 +00:00
function getStopWords() {
return stopwords;
}
2017-05-31 14:47:53 +00:00
2017-07-19 07:15:22 +00:00
function doFeedsXML() {
2017-09-11 11:29:20 +00:00
rss_braider.processFeed('simple_test_feed', 'rss', function (err, data) {
2017-07-19 07:15:22 +00:00
if (err) {
return console.log(err);
}
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
console.log('Saving', __dirname + '/dist/' + outputFile + '.xml');
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
fs.writeFile(__dirname + '/dist/' + outputFile + '.xml', data, function (err) {
2017-07-19 07:15:22 +00:00
if (err) {
return console.log(err);
}
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
console.log('The file was saved!');
2017-07-19 07:15:22 +00:00
});
});
2017-05-31 14:47:53 +00:00
2017-07-19 07:15:22 +00:00
}
2017-05-31 14:47:53 +00:00
function loadCorpus() {
2017-09-11 11:29:20 +00:00
let fileName = __dirname + '/dist/corpus.json';
2017-07-19 07:15:22 +00:00
console.log(`Loading ${fileName}`);
2017-05-31 14:47:53 +00:00
2017-09-11 11:29:20 +00:00
jsonfile.readFile(fileName, function (err, obj) {
2017-07-19 07:15:22 +00:00
corpus = obj;
let entries = Object.keys(corpus).length;
console.log(`Corpus loaded (${entries} entries)`);
doFeeds();
});
2017-05-31 14:47:53 +00:00
}
function saveCorpus() {
2017-09-11 11:29:20 +00:00
let fileName = __dirname + '/dist/corpus.json';
2017-05-31 19:41:16 +00:00
console.log(`Saving ${fileName}`);
2017-09-11 11:29:20 +00:00
jsonfile.writeFile(fileName, corpus, function (err, obj) {
2017-07-19 07:15:22 +00:00
console.error(err);
console.log(obj);
});
2017-05-31 14:47:53 +00:00
}
function go() {
loadCorpus();
}
go();