Rinser/jobs-special.js

276 lines
9.2 KiB
JavaScript
Raw Normal View History

2016-08-24 08:39:58 +00:00
var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require(
'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require(
'path').join, str = read(join(__dirname, '/templates/rss.ejs'),
'utf8'), feeds = {};
2016-08-17 14:36:28 +00:00
2017-05-30 08:23:57 +00:00
var log4js = require('log4js');
var logger = log4js.getLogger();
const stopwords = require('stopwords-en');
2016-08-17 14:36:28 +00:00
// Pull feeds from config files:
// feeds.simple_test_feed = require("./config/feed").feed;
// Or define in-line
feeds.simple_test_feed = {
2016-08-24 08:39:58 +00:00
"feed_name": "feed", "default_count": 1, "no_cdata_fields": [], // Don't wrap these fields in CDATA tags
"plugins": [
2016-08-24 11:23:02 +00:00
'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only'
2017-05-30 08:23:57 +00:00
/*'filter_location', 'filter_today_only'*/
2016-08-24 08:39:58 +00:00
], "meta": {
"title": "Jobs",
"description": "Combined Jobs Feed",
'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml'
}, "sources": [
2016-08-17 14:36:28 +00:00
2017-05-30 08:23:57 +00:00
{
"count": 100,
"feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss",
"page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C"
},
2016-08-24 11:23:02 +00:00
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652"
},
2016-08-24 08:39:58 +00:00
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml"
},
{
/* "name" : "JobServe",*/
"count": 100,
"feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml"
}
]
2016-08-17 14:36:28 +00:00
};
var braider_options = {
2016-08-24 08:39:58 +00:00
feeds: feeds,
indent: " ",
date_sort_order: "desc", // Newest first
log_level: "warn",
dedupe_fields: ['link', 'guid'],
plugins_directories: [__dirname + "/plugins/"]
2016-08-17 14:36:28 +00:00
};
console.log('Working..');
var rss_braider = RssBraider.createClient(braider_options);
// Override logging level (debug, info, warn, err, off)
rss_braider.logger.level('error');
2016-08-24 08:39:58 +00:00
rss_braider.processFeed('simple_test_feed', 'json', function(err, data) {
if (err) {
return console.log(err);
}
2016-08-17 14:36:28 +00:00
2016-08-24 08:39:58 +00:00
var j = JSON.parse(data);
var ejsOutput = ejs.compile(str)(j);
2016-08-17 14:36:28 +00:00
2016-08-24 08:39:58 +00:00
fs.writeFile(__dirname + "/dist/" + outputFile + ".html",
ejsOutput,
function(err) {
2016-08-17 14:36:28 +00:00
2016-08-24 08:39:58 +00:00
if (err) {
return console.log(err);
}
2016-08-17 14:36:28 +00:00
2016-08-24 08:39:58 +00:00
console.log("The file was saved!");
2016-08-17 14:36:28 +00:00
});
2016-08-24 08:39:58 +00:00
fs.writeFile(__dirname + "/dist/" + outputFile + ".json",
data,
function(err) {
if (err) {
logger.error(err);
return console.log(err);
}
2016-08-17 14:36:28 +00:00
2016-08-24 08:39:58 +00:00
console.log("The file was saved!");
});
2016-08-17 14:36:28 +00:00
});
2017-05-30 08:23:57 +00:00
function getNoneStopWords(sentence) {
2017-05-31 14:47:53 +00:00
let common = getStopWords();
let wordArr = sentence.match(/\w+/g),
2017-05-30 08:23:57 +00:00
commonObj = {},
uncommonArr = [],
word, i;
for (i = 0; i < common.length; i++) {
commonObj[ common[i].trim() ] = true;
}
for (i = 0; i < wordArr.length; i++) {
word = wordArr[i].trim().toLowerCase();
if (!commonObj[word]) {
uncommonArr.push(word);
}
}
return uncommonArr;
}
function getStopWords() {
return stopwords;
}
2016-08-24 08:39:58 +00:00
rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) {
if (err) {
return console.log(err);
}
2017-05-30 08:23:57 +00:00
2016-08-24 08:39:58 +00:00
console.log('Saving', __dirname + "/dist/" + outputFile + ".xml");
2017-05-30 08:23:57 +00:00
2016-08-24 08:39:58 +00:00
fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) {
2016-08-17 14:36:28 +00:00
if (err) {
2016-08-24 08:39:58 +00:00
return console.log(err);
2016-08-17 14:36:28 +00:00
}
2016-08-24 08:39:58 +00:00
console.log("The file was saved!");
});
2016-08-17 14:36:28 +00:00
});