From 17166c74a410ca270d73a319a11d91fcec1ad12b Mon Sep 17 00:00:00 2001 From: Martin Donnelly Date: Wed, 19 Jul 2017 08:15:22 +0100 Subject: [PATCH] jobs fix, reverted --- jobs-corpus.js | 542 ++++++++++++++++++++++++++++--------------------- 1 file changed, 312 insertions(+), 230 deletions(-) diff --git a/jobs-corpus.js b/jobs-corpus.js index 3ebc7eb..891a8db 100644 --- a/jobs-corpus.js +++ b/jobs-corpus.js @@ -1,7 +1,7 @@ var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require( - 'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require( - 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), - 'utf8'), feeds = {}; + 'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require( + 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), + 'utf8'), feeds = {}; var RSS = require('rss'); var log4js = require('log4js'); var logger = log4js.getLogger(); @@ -11,6 +11,252 @@ const jsonfile = require('jsonfile'); let strippedArray = []; let corpus = {}; +let sources = [ + + { + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss", + "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652" + }, + + { + + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062" + }, + { + + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169" + }, + { + + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029" + }, + { + + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105" + }, + { + + "count": 100, + "feed_url": "http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall" + }, + { + + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL" + }, + { + + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml" + } + +]; + +sources = [ + + { + + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss" + }, + { + + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml" + }, + { + + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml" + } +]; + + // Pull feeds from config files: // feeds.simple_test_feed = require("./config/feed").feed; // Or define in-line @@ -23,171 +269,7 @@ feeds.simple_test_feed = { "title": "Jobs", "description": "Combined Jobs Feed", 'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml' - }, "sources": [ - - { - "count": 100, - "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss", - "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652" - }, - - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml" - }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml" - } - - ] + }, "sources": sources }; var braider_options = { @@ -249,12 +331,12 @@ function doFeeds() { // console.log(newfeed); fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", newfeed.xml(' '), function(err) { - if (err) { - return console.log(err); - } + if (err) { + return console.log(err); + } - console.log("The XML file was saved!"); - }); + console.log("The XML file was saved!"); + }); // @@ -267,21 +349,21 @@ function scoreEntry(s) { let score = 0; let used = []; for(let t of s) { - //console.log(`processing: ${t}`); + //console.log(`processing: ${t}`); - if (!corpus.hasOwnProperty(t)) { - corpus[t] = {score:0, common:1}; - } else { - corpus[t].common++; - if (used.indexOf(t) === -1) { - score = score + corpus[t].score; - used.push(t); - } - - } + if (!corpus.hasOwnProperty(t)) { + corpus[t] = {score:0, common:1}; + } else { + corpus[t].common++; + if (used.indexOf(t) === -1) { + score = score + corpus[t].score; + used.push(t); } - return score; + } + } + + return score; } function processJson(j) { // console.log(j); @@ -302,73 +384,73 @@ function processJson(j) { return j; } function getNoneStopWords(sentence) { - let common = getStopWords(); - let wordArr = sentence.match(/\w+/g), - commonObj = {}, - uncommonArr = [], - word, i; + let common = getStopWords(); + let wordArr = sentence.match(/\w+/g), + commonObj = {}, + uncommonArr = [], + word, i; - for (i = 0; i < common.length; i++) { - commonObj[ common[i].trim() ] = true; - } + for (i = 0; i < common.length; i++) { + commonObj[ common[i].trim() ] = true; + } - for (i = 0; i < wordArr.length; i++) { - word = wordArr[i].trim().toLowerCase(); - if (!commonObj[word]) { - uncommonArr.push(word); - } - } - return uncommonArr; + for (i = 0; i < wordArr.length; i++) { + word = wordArr[i].trim().toLowerCase(); + if (!commonObj[word]) { + uncommonArr.push(word); } + } + return uncommonArr; +} - function getStopWords() { - return stopwords; +function getStopWords() { + return stopwords; +} + + + + +function doFeedsXML() { + rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) { + if (err) { + return console.log(err); } + console.log('Saving', __dirname + "/dist/" + outputFile + ".xml"); - function doFeedsXML() { - rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) { - if (err) { - return console.log(err); - } + fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) { + if (err) { + return console.log(err); + } + console.log("The file was saved!"); + }); + }); - - console.log('Saving', __dirname + "/dist/" + outputFile + ".xml"); - - fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) { - if (err) { - return console.log(err); - } - - console.log("The file was saved!"); - }); - }); - - } +} function loadCorpus() { let fileName = __dirname + "/dist/corpus.json"; - console.log(`Loading ${fileName}`); + console.log(`Loading ${fileName}`); jsonfile.readFile(fileName, function(err, obj) { - corpus = obj; - let entries = Object.keys(corpus).length; - console.log(`Corpus loaded (${entries} entries)`); - doFeeds(); - }); + corpus = obj; + let entries = Object.keys(corpus).length; + console.log(`Corpus loaded (${entries} entries)`); + doFeeds(); + }); } function saveCorpus() { let fileName = __dirname + "/dist/corpus.json"; console.log(`Saving ${fileName}`); - jsonfile.writeFile(fileName, corpus,function(err, obj) { - console.error(err); - console.log(obj); - }); + jsonfile.writeFile(fileName, corpus,function(err, obj) { + console.error(err); + console.log(obj); + }); }