diff --git a/.idea/misc.xml b/.idea/misc.xml
index 24eb271..886fd10 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,4 +3,7 @@
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 522f851..3bb7154 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -1,149 +1,28 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -164,108 +43,114 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ logg
+ 6DA9769BA89834AA
+
+
@@ -282,11 +173,6 @@
@@ -351,10 +242,10 @@
-
-
-
-
+
+
+
+
@@ -372,6 +263,8 @@
+
+
@@ -394,14 +287,26 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
@@ -409,7 +314,7 @@
-
+
@@ -463,58 +368,17 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
@@ -523,18 +387,18 @@
+
+
+
+
+
+
-
-
-
-
-
-
@@ -614,7 +478,8 @@
-
+ C:\Users\mdonnel\AppData\Roaming\Subversion
+ 125
@@ -656,6 +521,7 @@
+
1458043875334
@@ -1004,31 +870,33 @@
-
+
-
+
+
-
-
-
-
-
-
+
-
+
+
+
+
+
+
+
+
-
@@ -1104,19 +972,14 @@
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
@@ -1212,13 +1075,6 @@
-
-
-
-
-
-
-
@@ -1316,15 +1172,6 @@
-
-
-
-
-
-
-
-
-
@@ -1337,49 +1184,17 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
+
@@ -1387,7 +1202,7 @@
-
+
@@ -1395,8 +1210,106 @@
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/bayes.js b/bayes.js
new file mode 100644
index 0000000..261e9c2
--- /dev/null
+++ b/bayes.js
@@ -0,0 +1,76 @@
+/**
+ * Created by mdonnel on 18/05/2017.
+ */
+let outputFile = 'jobs-special';
+
+let log4js = require('log4js');
+let logger = log4js.getLogger();
+const stopwords = require('stopwords-en');
+let jsonfile = require('jsonfile');
+var striptags = require('striptags');
+
+let strippedArray = [];
+
+function getNoneStopWords(sentence) {
+ var common = getStopWords();
+ var wordArr = sentence.match(/\w+/g),
+ commonObj = {},
+ uncommonArr = [],
+ word, i;
+
+ for (i = 0; i < common.length; i++) {
+ commonObj[ common[i].trim() ] = true;
+ }
+
+ for (i = 0; i < wordArr.length; i++) {
+ word = wordArr[i].trim().toLowerCase();
+ if (!commonObj[word]) {
+ uncommonArr.push(word);
+ }
+ }
+ return uncommonArr;
+ }
+
+ function getStopWords() {
+ return stopwords;
+ }
+
+
+
+
+function processFile(data) {
+// console.log(stopwords);
+ let fileName = ".\\dist\\stripped.json";
+ console.log(fileName);
+ for (let item of data.items) {
+ let description = striptags(item.description);
+ let stripped = getNoneStopWords(description);
+
+ strippedArray.push(stripped);
+ }
+
+ //let o = JSON.stringify({s:strippedArray});
+ //console.log(strippedArray);
+ jsonfile.writeFile(fileName, strippedArray,function(err, obj) {
+ console.error(err);
+ console.log(obj);
+ });
+}
+
+
+function go() {
+
+//let fileName = __dirname + "\\dist\\" + outputFile + ".json";
+ let fileName = ".\\dist\\" + outputFile + ".json";
+console.log(fileName);
+
+let data = jsonfile.readFile(fileName, function(err, obj) {
+ processFile(obj);
+});
+
+}
+
+
+
+go();
+
diff --git a/jobs-special.js b/jobs-special.js
index e0128cc..e940b6c 100644
--- a/jobs-special.js
+++ b/jobs-special.js
@@ -3,6 +3,10 @@ var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = requi
'path').join, str = read(join(__dirname, '/templates/rss.ejs'),
'utf8'), feeds = {};
+var log4js = require('log4js');
+var logger = log4js.getLogger();
+const stopwords = require('stopwords-en');
+
// Pull feeds from config files:
// feeds.simple_test_feed = require("./config/feed").feed;
// Or define in-line
@@ -10,12 +14,18 @@ feeds.simple_test_feed = {
"feed_name": "feed", "default_count": 1, "no_cdata_fields": [], // Don't wrap these fields in CDATA tags
"plugins": [
'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only'
+ /*'filter_location', 'filter_today_only'*/
], "meta": {
"title": "Jobs",
"description": "Combined Jobs Feed",
'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml'
}, "sources": [
+ {
+ "count": 100,
+ "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss",
+ "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C"
+ },
{
/* "name" : "JobServe",*/
"count": 100,
@@ -226,11 +236,40 @@ rss_braider.processFeed('simple_test_feed', 'json', function(err, data) {
});
});
+function getNoneStopWords(sentence) {
+ var common = getStopWords();
+ var wordArr = sentence.match(/\w+/g),
+ commonObj = {},
+ uncommonArr = [],
+ word, i;
+
+ for (i = 0; i < common.length; i++) {
+ commonObj[ common[i].trim() ] = true;
+ }
+
+ for (i = 0; i < wordArr.length; i++) {
+ word = wordArr[i].trim().toLowerCase();
+ if (!commonObj[word]) {
+ uncommonArr.push(word);
+ }
+ }
+ return uncommonArr;
+ }
+
+ function getStopWords() {
+ return stopwords;
+ }
+
+
rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) {
if (err) {
return console.log(err);
}
+
+
+
console.log('Saving', __dirname + "/dist/" + outputFile + ".xml");
+
fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) {
if (err) {
return console.log(err);
diff --git a/node_modules/lodash/README.md b/node_modules/lodash/README.md
index f874f13..acdd128 100644
--- a/node_modules/lodash/README.md
+++ b/node_modules/lodash/README.md
@@ -1,4 +1,4 @@
-# lodash v4.16.6
+# lodash v4.17.4
The [Lodash](https://lodash.com/) library exported as [Node.js](https://nodejs.org/) modules.
@@ -28,12 +28,12 @@ var at = require('lodash/at');
var curryN = require('lodash/fp/curryN');
```
-See the [package source](https://github.com/lodash/lodash/tree/4.16.6-npm) for more details.
+See the [package source](https://github.com/lodash/lodash/tree/4.17.4-npm) for more details.
**Note:**
Install [n_](https://www.npmjs.com/package/n_) for Lodash use in the Node.js < 6 REPL.
## Support
-Tested in Chrome 53-54, Firefox 48-49, IE 11, Edge 14, Safari 9-10, Node.js 6-7, & PhantomJS 2.1.1.
+Tested in Chrome 54-55, Firefox 49-50, IE 11, Edge 14, Safari 9-10, Node.js 6-7, & PhantomJS 2.1.1.
Automated [browser](https://saucelabs.com/u/lodash) & [CI](https://travis-ci.org/lodash/lodash/) test runs are available.
diff --git a/node_modules/lodash/package.json b/node_modules/lodash/package.json
index e129724..d400892 100644
--- a/node_modules/lodash/package.json
+++ b/node_modules/lodash/package.json
@@ -2,25 +2,25 @@
"_args": [
[
{
- "raw": "lodash@^4.11.1",
+ "raw": "lodash@^4.16.6",
"scope": null,
"escapedName": "lodash",
"name": "lodash",
- "rawSpec": "^4.11.1",
- "spec": ">=4.11.1 <5.0.0",
+ "rawSpec": "^4.16.6",
+ "spec": ">=4.16.6 <5.0.0",
"type": "range"
},
- "/home/martin/mddev/Rinser"
+ "C:\\dev\\md\\Rinser"
]
],
- "_from": "lodash@>=4.11.1 <5.0.0",
- "_id": "lodash@4.16.6",
+ "_from": "lodash@>=4.16.6 <5.0.0",
+ "_id": "lodash@4.17.4",
"_inCache": true,
"_location": "/lodash",
- "_nodeVersion": "7.0.0",
+ "_nodeVersion": "7.2.1",
"_npmOperationalInternal": {
- "host": "packages-18-east.internal.npmjs.com",
- "tmp": "tmp/lodash-4.16.6.tgz_1477982285913_0.34612850472331047"
+ "host": "packages-12-west.internal.npmjs.com",
+ "tmp": "tmp/lodash-4.17.4.tgz_1483223634314_0.5332164366263896"
},
"_npmUser": {
"name": "jdalton",
@@ -29,12 +29,12 @@
"_npmVersion": "2.15.11",
"_phantomChildren": {},
"_requested": {
- "raw": "lodash@^4.11.1",
+ "raw": "lodash@^4.16.6",
"scope": null,
"escapedName": "lodash",
"name": "lodash",
- "rawSpec": "^4.11.1",
- "spec": ">=4.11.1 <5.0.0",
+ "rawSpec": "^4.16.6",
+ "spec": ">=4.16.6 <5.0.0",
"type": "range"
},
"_requiredBy": [
@@ -43,11 +43,11 @@
"/cheerio",
"/gulp-jshint"
],
- "_resolved": "http://localhost:4873/lodash/-/lodash-4.16.6.tgz",
- "_shasum": "d22c9ac660288f3843e16ba7d2b5d06cca27d777",
+ "_resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz",
+ "_shasum": "78203a4d1c328ae1d86dca6460e369b57f4055ae",
"_shrinkwrap": null,
- "_spec": "lodash@^4.11.1",
- "_where": "/home/martin/mddev/Rinser",
+ "_spec": "lodash@^4.16.6",
+ "_where": "C:\\dev\\md\\Rinser",
"author": {
"name": "John-David Dalton",
"email": "john.david.dalton@gmail.com",
@@ -62,11 +62,6 @@
"email": "john.david.dalton@gmail.com",
"url": "http://allyoucanleet.com/"
},
- {
- "name": "Blaine Bublitz",
- "email": "blaine.bublitz@gmail.com",
- "url": "https://github.com/phated"
- },
{
"name": "Mathias Bynens",
"email": "mathias@qiwi.be",
@@ -78,8 +73,8 @@
"devDependencies": {},
"directories": {},
"dist": {
- "shasum": "d22c9ac660288f3843e16ba7d2b5d06cca27d777",
- "tarball": "http://localhost:4873/lodash/-/lodash-4.16.6.tgz"
+ "shasum": "78203a4d1c328ae1d86dca6460e369b57f4055ae",
+ "tarball": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz"
},
"homepage": "https://lodash.com/",
"icon": "https://lodash.com/icon.svg",
@@ -110,5 +105,5 @@
"scripts": {
"test": "echo \"See https://travis-ci.org/lodash/lodash-cli for testing details.\""
},
- "version": "4.16.6"
+ "version": "4.17.4"
}
diff --git a/package.json b/package.json
index 57b5141..ad1c27b 100644
--- a/package.json
+++ b/package.json
@@ -19,9 +19,12 @@
"gulp-rename": "^1.2.2",
"gulp-uglify": "^1.5.3",
"jshint": "^2.9.1",
- "lodash": "^4.11.1",
- "log4js": "^0.6.35",
- "rss-braider": "git+http://gitlab.silvrtree.co.uk/martind2000/rss-braider.git"
+ "jsonfile": "^2.4.0",
+ "lodash": "^4.17.4",
+ "log4js": "^0.6.38",
+ "rss-braider": "git+http://gitlab.silvrtree.co.uk/martind2000/rss-braider.git",
+ "stopwords-en": "^0.3.0",
+ "striptags": "^3.0.1"
},
"dependencies": {
"body-parser": "^1.14.2",
diff --git a/plugins/filter_location.js b/plugins/filter_location.js
index 1f1e361..fdbc804 100644
--- a/plugins/filter_location.js
+++ b/plugins/filter_location.js
@@ -1,5 +1,5 @@
module.exports = function (item, itemOptions, source) {
- var patt = /(glasgow|london)/ig;
+ var patt = /(glasgow|london|edinburgh)/ig;
var result = patt.test(itemOptions.description);
var resultB = patt.test(itemOptions.title);
return (result||resultB === true) ? itemOptions : -1;
diff --git a/plugins/filter_md_jobs.js b/plugins/filter_md_jobs.js
index 6280dba..e1e64c6 100644
--- a/plugins/filter_md_jobs.js
+++ b/plugins/filter_md_jobs.js
@@ -1,5 +1,5 @@
module.exports = function (item, itemOptions, source) {
- var patt = /(full stack|html|html5|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig;
+ var patt = /(full\w?stack|html|html5|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig;
var result = patt.test(itemOptions.description);
var resultB = patt.test(itemOptions.title);
return (result||resultB === true) ? itemOptions : -1 ;
diff --git a/plugins/filter_reject.js b/plugins/filter_reject.js
index 2238f4b..e90f03a 100644
--- a/plugins/filter_reject.js
+++ b/plugins/filter_reject.js
@@ -1,5 +1,5 @@
module.exports = function (item, itemOptions, source) {
- var patt = /(Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator)/ig;
+ var patt = /(T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)/ig;
var engineers = /(Support|Devops|Planning|security|Postgresql|network|sccm|test)\s(Engineer)/ig;
var developers = /(Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|)\s(Developer|C\++)/ig;