diff --git a/.idea/misc.xml b/.idea/misc.xml index 24eb271..886fd10 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,4 +3,7 @@ + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 522f851..3bb7154 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,149 +1,28 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + @@ -164,108 +43,114 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + logg + 6DA9769BA89834AA + + - @@ -372,6 +263,8 @@ + + @@ -394,14 +287,26 @@ + + + + + + + - - @@ -409,7 +314,7 @@ - + @@ -463,58 +368,17 @@ - - - - - - - - - + C:\Users\mdonnel\AppData\Roaming\Subversion + 125 @@ -656,6 +521,7 @@ + 1458043875334 @@ -1004,31 +870,33 @@ - - + + - - - - - - + - + + + + + + + + - @@ -1104,19 +972,14 @@ + + + - - - - - - - - - - - - @@ -1212,13 +1075,6 @@ - - - - - - - @@ -1316,15 +1172,6 @@ - - - - - - - - - @@ -1337,49 +1184,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - + @@ -1387,7 +1202,7 @@ - + @@ -1395,8 +1210,106 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bayes.js b/bayes.js new file mode 100644 index 0000000..261e9c2 --- /dev/null +++ b/bayes.js @@ -0,0 +1,76 @@ +/** + * Created by mdonnel on 18/05/2017. + */ +let outputFile = 'jobs-special'; + +let log4js = require('log4js'); +let logger = log4js.getLogger(); +const stopwords = require('stopwords-en'); +let jsonfile = require('jsonfile'); +var striptags = require('striptags'); + +let strippedArray = []; + +function getNoneStopWords(sentence) { + var common = getStopWords(); + var wordArr = sentence.match(/\w+/g), + commonObj = {}, + uncommonArr = [], + word, i; + + for (i = 0; i < common.length; i++) { + commonObj[ common[i].trim() ] = true; + } + + for (i = 0; i < wordArr.length; i++) { + word = wordArr[i].trim().toLowerCase(); + if (!commonObj[word]) { + uncommonArr.push(word); + } + } + return uncommonArr; + } + + function getStopWords() { + return stopwords; + } + + + + +function processFile(data) { +// console.log(stopwords); + let fileName = ".\\dist\\stripped.json"; + console.log(fileName); + for (let item of data.items) { + let description = striptags(item.description); + let stripped = getNoneStopWords(description); + + strippedArray.push(stripped); + } + + //let o = JSON.stringify({s:strippedArray}); + //console.log(strippedArray); + jsonfile.writeFile(fileName, strippedArray,function(err, obj) { + console.error(err); + console.log(obj); + }); +} + + +function go() { + +//let fileName = __dirname + "\\dist\\" + outputFile + ".json"; + let fileName = ".\\dist\\" + outputFile + ".json"; +console.log(fileName); + +let data = jsonfile.readFile(fileName, function(err, obj) { + processFile(obj); +}); + +} + + + +go(); + diff --git a/jobs-special.js b/jobs-special.js index e0128cc..e940b6c 100644 --- a/jobs-special.js +++ b/jobs-special.js @@ -3,6 +3,10 @@ var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = requi 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), 'utf8'), feeds = {}; +var log4js = require('log4js'); +var logger = log4js.getLogger(); +const stopwords = require('stopwords-en'); + // Pull feeds from config files: // feeds.simple_test_feed = require("./config/feed").feed; // Or define in-line @@ -10,12 +14,18 @@ feeds.simple_test_feed = { "feed_name": "feed", "default_count": 1, "no_cdata_fields": [], // Don't wrap these fields in CDATA tags "plugins": [ 'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only' + /*'filter_location', 'filter_today_only'*/ ], "meta": { "title": "Jobs", "description": "Combined Jobs Feed", 'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml' }, "sources": [ + { + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss", + "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C" + }, { /* "name" : "JobServe",*/ "count": 100, @@ -226,11 +236,40 @@ rss_braider.processFeed('simple_test_feed', 'json', function(err, data) { }); }); +function getNoneStopWords(sentence) { + var common = getStopWords(); + var wordArr = sentence.match(/\w+/g), + commonObj = {}, + uncommonArr = [], + word, i; + + for (i = 0; i < common.length; i++) { + commonObj[ common[i].trim() ] = true; + } + + for (i = 0; i < wordArr.length; i++) { + word = wordArr[i].trim().toLowerCase(); + if (!commonObj[word]) { + uncommonArr.push(word); + } + } + return uncommonArr; + } + + function getStopWords() { + return stopwords; + } + + rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) { if (err) { return console.log(err); } + + + console.log('Saving', __dirname + "/dist/" + outputFile + ".xml"); + fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) { if (err) { return console.log(err); diff --git a/node_modules/lodash/README.md b/node_modules/lodash/README.md index f874f13..acdd128 100644 --- a/node_modules/lodash/README.md +++ b/node_modules/lodash/README.md @@ -1,4 +1,4 @@ -# lodash v4.16.6 +# lodash v4.17.4 The [Lodash](https://lodash.com/) library exported as [Node.js](https://nodejs.org/) modules. @@ -28,12 +28,12 @@ var at = require('lodash/at'); var curryN = require('lodash/fp/curryN'); ``` -See the [package source](https://github.com/lodash/lodash/tree/4.16.6-npm) for more details. +See the [package source](https://github.com/lodash/lodash/tree/4.17.4-npm) for more details. **Note:**
Install [n_](https://www.npmjs.com/package/n_) for Lodash use in the Node.js < 6 REPL. ## Support -Tested in Chrome 53-54, Firefox 48-49, IE 11, Edge 14, Safari 9-10, Node.js 6-7, & PhantomJS 2.1.1.
+Tested in Chrome 54-55, Firefox 49-50, IE 11, Edge 14, Safari 9-10, Node.js 6-7, & PhantomJS 2.1.1.
Automated [browser](https://saucelabs.com/u/lodash) & [CI](https://travis-ci.org/lodash/lodash/) test runs are available. diff --git a/node_modules/lodash/package.json b/node_modules/lodash/package.json index e129724..d400892 100644 --- a/node_modules/lodash/package.json +++ b/node_modules/lodash/package.json @@ -2,25 +2,25 @@ "_args": [ [ { - "raw": "lodash@^4.11.1", + "raw": "lodash@^4.16.6", "scope": null, "escapedName": "lodash", "name": "lodash", - "rawSpec": "^4.11.1", - "spec": ">=4.11.1 <5.0.0", + "rawSpec": "^4.16.6", + "spec": ">=4.16.6 <5.0.0", "type": "range" }, - "/home/martin/mddev/Rinser" + "C:\\dev\\md\\Rinser" ] ], - "_from": "lodash@>=4.11.1 <5.0.0", - "_id": "lodash@4.16.6", + "_from": "lodash@>=4.16.6 <5.0.0", + "_id": "lodash@4.17.4", "_inCache": true, "_location": "/lodash", - "_nodeVersion": "7.0.0", + "_nodeVersion": "7.2.1", "_npmOperationalInternal": { - "host": "packages-18-east.internal.npmjs.com", - "tmp": "tmp/lodash-4.16.6.tgz_1477982285913_0.34612850472331047" + "host": "packages-12-west.internal.npmjs.com", + "tmp": "tmp/lodash-4.17.4.tgz_1483223634314_0.5332164366263896" }, "_npmUser": { "name": "jdalton", @@ -29,12 +29,12 @@ "_npmVersion": "2.15.11", "_phantomChildren": {}, "_requested": { - "raw": "lodash@^4.11.1", + "raw": "lodash@^4.16.6", "scope": null, "escapedName": "lodash", "name": "lodash", - "rawSpec": "^4.11.1", - "spec": ">=4.11.1 <5.0.0", + "rawSpec": "^4.16.6", + "spec": ">=4.16.6 <5.0.0", "type": "range" }, "_requiredBy": [ @@ -43,11 +43,11 @@ "/cheerio", "/gulp-jshint" ], - "_resolved": "http://localhost:4873/lodash/-/lodash-4.16.6.tgz", - "_shasum": "d22c9ac660288f3843e16ba7d2b5d06cca27d777", + "_resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz", + "_shasum": "78203a4d1c328ae1d86dca6460e369b57f4055ae", "_shrinkwrap": null, - "_spec": "lodash@^4.11.1", - "_where": "/home/martin/mddev/Rinser", + "_spec": "lodash@^4.16.6", + "_where": "C:\\dev\\md\\Rinser", "author": { "name": "John-David Dalton", "email": "john.david.dalton@gmail.com", @@ -62,11 +62,6 @@ "email": "john.david.dalton@gmail.com", "url": "http://allyoucanleet.com/" }, - { - "name": "Blaine Bublitz", - "email": "blaine.bublitz@gmail.com", - "url": "https://github.com/phated" - }, { "name": "Mathias Bynens", "email": "mathias@qiwi.be", @@ -78,8 +73,8 @@ "devDependencies": {}, "directories": {}, "dist": { - "shasum": "d22c9ac660288f3843e16ba7d2b5d06cca27d777", - "tarball": "http://localhost:4873/lodash/-/lodash-4.16.6.tgz" + "shasum": "78203a4d1c328ae1d86dca6460e369b57f4055ae", + "tarball": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz" }, "homepage": "https://lodash.com/", "icon": "https://lodash.com/icon.svg", @@ -110,5 +105,5 @@ "scripts": { "test": "echo \"See https://travis-ci.org/lodash/lodash-cli for testing details.\"" }, - "version": "4.16.6" + "version": "4.17.4" } diff --git a/package.json b/package.json index 57b5141..ad1c27b 100644 --- a/package.json +++ b/package.json @@ -19,9 +19,12 @@ "gulp-rename": "^1.2.2", "gulp-uglify": "^1.5.3", "jshint": "^2.9.1", - "lodash": "^4.11.1", - "log4js": "^0.6.35", - "rss-braider": "git+http://gitlab.silvrtree.co.uk/martind2000/rss-braider.git" + "jsonfile": "^2.4.0", + "lodash": "^4.17.4", + "log4js": "^0.6.38", + "rss-braider": "git+http://gitlab.silvrtree.co.uk/martind2000/rss-braider.git", + "stopwords-en": "^0.3.0", + "striptags": "^3.0.1" }, "dependencies": { "body-parser": "^1.14.2", diff --git a/plugins/filter_location.js b/plugins/filter_location.js index 1f1e361..fdbc804 100644 --- a/plugins/filter_location.js +++ b/plugins/filter_location.js @@ -1,5 +1,5 @@ module.exports = function (item, itemOptions, source) { - var patt = /(glasgow|london)/ig; + var patt = /(glasgow|london|edinburgh)/ig; var result = patt.test(itemOptions.description); var resultB = patt.test(itemOptions.title); return (result||resultB === true) ? itemOptions : -1; diff --git a/plugins/filter_md_jobs.js b/plugins/filter_md_jobs.js index 6280dba..e1e64c6 100644 --- a/plugins/filter_md_jobs.js +++ b/plugins/filter_md_jobs.js @@ -1,5 +1,5 @@ module.exports = function (item, itemOptions, source) { - var patt = /(full stack|html|html5|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig; + var patt = /(full\w?stack|html|html5|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig; var result = patt.test(itemOptions.description); var resultB = patt.test(itemOptions.title); return (result||resultB === true) ? itemOptions : -1 ; diff --git a/plugins/filter_reject.js b/plugins/filter_reject.js index 2238f4b..e90f03a 100644 --- a/plugins/filter_reject.js +++ b/plugins/filter_reject.js @@ -1,5 +1,5 @@ module.exports = function (item, itemOptions, source) { - var patt = /(Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator)/ig; + var patt = /(T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)/ig; var engineers = /(Support|Devops|Planning|security|Postgresql|network|sccm|test)\s(Engineer)/ig; var developers = /(Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|)\s(Developer|C\++)/ig;