Compare commits
No commits in common. "development" and "svelte-updates" have entirely different histories.
developmen
...
svelte-upd
@ -1,32 +0,0 @@
|
|||||||
; http://editorconfig.org
|
|
||||||
|
|
||||||
root = true
|
|
||||||
|
|
||||||
[*]
|
|
||||||
charset = utf-8
|
|
||||||
end_of_line = lf
|
|
||||||
insert_final_newline = true
|
|
||||||
trim_trailing_whitespace = true
|
|
||||||
indent_style = space
|
|
||||||
indent_size = 2
|
|
||||||
|
|
||||||
[*.txt]
|
|
||||||
insert_final_newline = false
|
|
||||||
trim_trailing_whitespace = false
|
|
||||||
|
|
||||||
[*.py]
|
|
||||||
indent_size = 4
|
|
||||||
|
|
||||||
[*.m]
|
|
||||||
indent_size = 4
|
|
||||||
|
|
||||||
[Makefile]
|
|
||||||
indent_style = tab
|
|
||||||
indent_size = 8
|
|
||||||
|
|
||||||
[*.{js,json}]
|
|
||||||
indent_style = space
|
|
||||||
indent_size = 2
|
|
||||||
|
|
||||||
[*.md]
|
|
||||||
trim_trailing_whitespace = false
|
|
@ -9,7 +9,7 @@
|
|||||||
"env": {
|
"env": {
|
||||||
"browser": true,
|
"browser": true,
|
||||||
"node": true,
|
"node": true,
|
||||||
"es2017": true
|
"es6": true
|
||||||
},
|
},
|
||||||
"rules": {
|
"rules": {
|
||||||
"arrow-spacing": "error",
|
"arrow-spacing": "error",
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -147,4 +147,3 @@ fabric.properties
|
|||||||
/live/
|
/live/
|
||||||
!/output/
|
!/output/
|
||||||
/db/jobs.db
|
/db/jobs.db
|
||||||
!/db/
|
|
||||||
|
File diff suppressed because one or more lines are too long
204
brain.json
204
brain.json
@ -1,204 +0,0 @@
|
|||||||
{
|
|
||||||
"categories": {
|
|
||||||
"good": true,
|
|
||||||
"bad": true
|
|
||||||
},
|
|
||||||
"docCount": {
|
|
||||||
"good": 43,
|
|
||||||
"bad": 5
|
|
||||||
},
|
|
||||||
"totalDocuments": 48,
|
|
||||||
"vocabulary": {
|
|
||||||
"tsql": true,
|
|
||||||
"developer": true,
|
|
||||||
"contract": true,
|
|
||||||
"web": true,
|
|
||||||
"javascript": true,
|
|
||||||
"js": true,
|
|
||||||
"node": true,
|
|
||||||
"es": true,
|
|
||||||
"agile": true,
|
|
||||||
"nodejs": true,
|
|
||||||
"london": true,
|
|
||||||
"aws": true,
|
|
||||||
"sql": true,
|
|
||||||
"postgresql": true,
|
|
||||||
"mysql": true,
|
|
||||||
"docker": true,
|
|
||||||
"ecs": true,
|
|
||||||
"automation": true,
|
|
||||||
"jslint": true,
|
|
||||||
"jshint": true,
|
|
||||||
"vuejs": true,
|
|
||||||
"vue": true,
|
|
||||||
"nginx": true,
|
|
||||||
"remotely": true,
|
|
||||||
"mvc": true,
|
|
||||||
"remote": true,
|
|
||||||
"iot": true,
|
|
||||||
"mqtt": true,
|
|
||||||
"es6": true,
|
|
||||||
"es2016": true,
|
|
||||||
"es2017": true,
|
|
||||||
"es2018": true,
|
|
||||||
"react": true,
|
|
||||||
"redux": true,
|
|
||||||
"graphql": true,
|
|
||||||
"java": true,
|
|
||||||
"reactjs": true,
|
|
||||||
"apps": true,
|
|
||||||
"html": true,
|
|
||||||
"css": true,
|
|
||||||
"code": true,
|
|
||||||
"angular": true,
|
|
||||||
"ember": true,
|
|
||||||
"restful": true,
|
|
||||||
"apis": true,
|
|
||||||
"infrastructure": true,
|
|
||||||
"software": true,
|
|
||||||
"native": true,
|
|
||||||
"med": true,
|
|
||||||
"mobile": true,
|
|
||||||
"client": true,
|
|
||||||
"applications": true,
|
|
||||||
"digital": true,
|
|
||||||
"analytics": true,
|
|
||||||
"dashboarding": true,
|
|
||||||
"online": true,
|
|
||||||
"analyse": true,
|
|
||||||
"dashboards": true,
|
|
||||||
"google": true,
|
|
||||||
"query": true,
|
|
||||||
"data": true,
|
|
||||||
"stakeholders": true,
|
|
||||||
"enhancements": true,
|
|
||||||
"requirements": true,
|
|
||||||
"c": true,
|
|
||||||
"net": true,
|
|
||||||
"technologies": true,
|
|
||||||
"azure": true,
|
|
||||||
"understanding": true,
|
|
||||||
"devops": true,
|
|
||||||
"tools": true,
|
|
||||||
"frameworks": true,
|
|
||||||
"scotland": true,
|
|
||||||
"responsibility": true,
|
|
||||||
"programme": true,
|
|
||||||
"functions": true,
|
|
||||||
"asp": true,
|
|
||||||
"project": true,
|
|
||||||
"transform": true,
|
|
||||||
"collaborative": true,
|
|
||||||
"technical": true,
|
|
||||||
"framework": true,
|
|
||||||
"nhibernate": true,
|
|
||||||
"server": true,
|
|
||||||
"api": true,
|
|
||||||
"development": true,
|
|
||||||
"lifecycle": true,
|
|
||||||
"specification": true,
|
|
||||||
"appointments": true
|
|
||||||
},
|
|
||||||
"vocabularySize": 89,
|
|
||||||
"wordCount": {
|
|
||||||
"good": 157,
|
|
||||||
"bad": 5
|
|
||||||
},
|
|
||||||
"wordFrequencyCount": {
|
|
||||||
"good": {
|
|
||||||
"tsql": 1,
|
|
||||||
"developer": 6,
|
|
||||||
"contract": 9,
|
|
||||||
"web": 6,
|
|
||||||
"javascript": 7,
|
|
||||||
"js": 3,
|
|
||||||
"node": 2,
|
|
||||||
"es": 1,
|
|
||||||
"agile": 2,
|
|
||||||
"nodejs": 1,
|
|
||||||
"london": 3,
|
|
||||||
"aws": 3,
|
|
||||||
"sql": 3,
|
|
||||||
"postgresql": 1,
|
|
||||||
"mysql": 1,
|
|
||||||
"docker": 1,
|
|
||||||
"ecs": 1,
|
|
||||||
"automation": 1,
|
|
||||||
"jslint": 1,
|
|
||||||
"jshint": 1,
|
|
||||||
"vuejs": 1,
|
|
||||||
"vue": 2,
|
|
||||||
"nginx": 1,
|
|
||||||
"remotely": 1,
|
|
||||||
"mvc": 5,
|
|
||||||
"remote": 2,
|
|
||||||
"iot": 1,
|
|
||||||
"mqtt": 1,
|
|
||||||
"es6": 1,
|
|
||||||
"es2016": 1,
|
|
||||||
"es2017": 1,
|
|
||||||
"es2018": 1,
|
|
||||||
"apps": 1,
|
|
||||||
"html": 5,
|
|
||||||
"css": 5,
|
|
||||||
"code": 2,
|
|
||||||
"react": 2,
|
|
||||||
"angular": 1,
|
|
||||||
"ember": 1,
|
|
||||||
"restful": 1,
|
|
||||||
"apis": 1,
|
|
||||||
"infrastructure": 1,
|
|
||||||
"software": 2,
|
|
||||||
"native": 1,
|
|
||||||
"med": 1,
|
|
||||||
"mobile": 1,
|
|
||||||
"client": 4,
|
|
||||||
"applications": 2,
|
|
||||||
"digital": 2,
|
|
||||||
"analytics": 1,
|
|
||||||
"dashboarding": 1,
|
|
||||||
"online": 1,
|
|
||||||
"analyse": 1,
|
|
||||||
"dashboards": 1,
|
|
||||||
"google": 1,
|
|
||||||
"query": 1,
|
|
||||||
"data": 1,
|
|
||||||
"stakeholders": 1,
|
|
||||||
"enhancements": 3,
|
|
||||||
"requirements": 3,
|
|
||||||
"c": 4,
|
|
||||||
"net": 5,
|
|
||||||
"technologies": 4,
|
|
||||||
"azure": 2,
|
|
||||||
"understanding": 1,
|
|
||||||
"devops": 2,
|
|
||||||
"tools": 1,
|
|
||||||
"frameworks": 1,
|
|
||||||
"scotland": 1,
|
|
||||||
"responsibility": 1,
|
|
||||||
"programme": 1,
|
|
||||||
"functions": 1,
|
|
||||||
"asp": 1,
|
|
||||||
"project": 1,
|
|
||||||
"transform": 1,
|
|
||||||
"collaborative": 1,
|
|
||||||
"technical": 1,
|
|
||||||
"framework": 1,
|
|
||||||
"nhibernate": 1,
|
|
||||||
"server": 1,
|
|
||||||
"api": 1,
|
|
||||||
"development": 1,
|
|
||||||
"lifecycle": 1,
|
|
||||||
"specification": 1,
|
|
||||||
"appointments": 1
|
|
||||||
},
|
|
||||||
"bad": {
|
|
||||||
"react": 1,
|
|
||||||
"redux": 1,
|
|
||||||
"graphql": 1,
|
|
||||||
"java": 1,
|
|
||||||
"reactjs": 1
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"options": {}
|
|
||||||
}
|
|
BIN
db/jobs.db
BIN
db/jobs.db
Binary file not shown.
@ -38,7 +38,6 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|||||||
}, null, true);
|
}, null, true);
|
||||||
|
|
||||||
new CronJob('0 6-23/1 * * *', async function() {
|
new CronJob('0 6-23/1 * * *', async function() {
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
||||||
@ -55,13 +54,13 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
||||||
|
|
||||||
/* await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
||||||
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||||
|
84
lib/base.js
84
lib/base.js
@ -8,12 +8,6 @@
|
|||||||
const filterReject = require('../lib/filter_reject');
|
const filterReject = require('../lib/filter_reject');
|
||||||
const filterAccept = require('../lib/filter_md_jobs');
|
const filterAccept = require('../lib/filter_md_jobs');
|
||||||
const dbmanager = require('../lib/dbmanager');
|
const dbmanager = require('../lib/dbmanager');
|
||||||
const JobsModel = require('../lib/mongoManager');
|
|
||||||
|
|
||||||
const SHA = require('crypto-js/sha256');
|
|
||||||
|
|
||||||
const { Utils } = require('@rakh/utils');
|
|
||||||
const { Corpus } = require('./corpus');
|
|
||||||
|
|
||||||
class MasterBase {
|
class MasterBase {
|
||||||
|
|
||||||
@ -63,79 +57,6 @@ class MasterBase {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
addToMongo() {
|
|
||||||
console.log('>> ADD TO MONGO!');
|
|
||||||
|
|
||||||
for(const item of this.items) {
|
|
||||||
// console.log('add', item);
|
|
||||||
const newObj = this.reduceData(item);
|
|
||||||
const newJob = new JobsModel(newObj);
|
|
||||||
|
|
||||||
newJob.save().then((m) => {
|
|
||||||
console.log('m', m.details.title);
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error('m', err);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param inval
|
|
||||||
* @returns {number}
|
|
||||||
*/
|
|
||||||
analyseRate(inval) {
|
|
||||||
console.log('analyseRate', inval);
|
|
||||||
let outVal = 0;
|
|
||||||
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
|
||||||
const clearSpace = /\s+/g;
|
|
||||||
|
|
||||||
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
|
||||||
const resultArray = result.trim().split((' '));
|
|
||||||
|
|
||||||
if (resultArray.length > 0) {
|
|
||||||
const item = parseInt(resultArray[0], 10);
|
|
||||||
|
|
||||||
if (item < 100) outVal = 0;
|
|
||||||
else if ((item > 100) && (item < 5000)) outVal = 1;
|
|
||||||
else if (item >= 5000) outVal = 2;
|
|
||||||
}
|
|
||||||
else return 0;
|
|
||||||
|
|
||||||
return outVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param d
|
|
||||||
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
|
|
||||||
*/
|
|
||||||
reduceData(d) {
|
|
||||||
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
|
||||||
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
|
||||||
|
|
||||||
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
|
||||||
|
|
||||||
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
|
||||||
|
|
||||||
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
|
||||||
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
|
||||||
outObj.details.hashed = SHA(outObj.details.summary);
|
|
||||||
|
|
||||||
outObj.data.read = 0;
|
|
||||||
outObj.data.applied = d.applied || 0;
|
|
||||||
|
|
||||||
outObj.data.jobtype = this.analyseRate(d.salary);
|
|
||||||
outObj.data.autoclass = Corpus.process(d.summary);
|
|
||||||
|
|
||||||
outObj.data.timestamp = d.timestamp * 1000;
|
|
||||||
|
|
||||||
return outObj;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
@ -199,15 +120,10 @@ class MasterBase {
|
|||||||
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async go() {
|
async go() {
|
||||||
this.items = [];
|
this.items = [];
|
||||||
this.rawItems = [];
|
this.rawItems = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = MasterBase;
|
module.exports = MasterBase;
|
||||||
|
@ -1,91 +0,0 @@
|
|||||||
const jsonfile = require('jsonfile');
|
|
||||||
|
|
||||||
const words = require('../lib/wordlist.json');
|
|
||||||
const wordsAdditional = require('../lib/wordlistAdditional.json');
|
|
||||||
|
|
||||||
const bigList = new Map([]);
|
|
||||||
|
|
||||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es',
|
|
||||||
'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs',
|
|
||||||
'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote',
|
|
||||||
'iot', 'mqtt'];
|
|
||||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
|
||||||
let unrated = [];
|
|
||||||
|
|
||||||
var _global = typeof global === 'undefined' ? window : global;
|
|
||||||
var Corpus = (_global.Corpus = _global.Corpus || {});
|
|
||||||
|
|
||||||
const emailRegex = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
|
|
||||||
const detagRegex = /(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)/gi;
|
|
||||||
const desymbolNumberRegex = /[\n\t+$,\?\.\%\*=&:;()\\/\-£…"]|\d+/gi;
|
|
||||||
const deSpace = /\s+/g;
|
|
||||||
|
|
||||||
function cleanText(intext) {
|
|
||||||
if (arguments.length === 0 || typeof intext === 'undefined' || intext === null ) return '';
|
|
||||||
|
|
||||||
return intext.replace(emailRegex, ' ').replace(detagRegex, ' ').replace(desymbolNumberRegex, ' ').replace(deSpace, ' ').trim().toLowerCase();
|
|
||||||
}
|
|
||||||
|
|
||||||
function dedupe(intext) {
|
|
||||||
if (arguments.length === 0 || intext === null ) return [];
|
|
||||||
|
|
||||||
return [...new Set(intext)];
|
|
||||||
}
|
|
||||||
|
|
||||||
function incItem(item) {
|
|
||||||
if (bigList.has(item))
|
|
||||||
bigList.set(item, bigList.get(item) + 1);
|
|
||||||
|
|
||||||
else
|
|
||||||
bigList.set(item, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Process the body
|
|
||||||
* @param intext
|
|
||||||
* @returns {{score: number, bad: *, good: *}}
|
|
||||||
*/
|
|
||||||
Corpus.process = function(intext) {
|
|
||||||
const workText = cleanText(intext);
|
|
||||||
|
|
||||||
const workArray = workText.split(' ');
|
|
||||||
|
|
||||||
const cleanedArray = dedupe(workArray).filter((v) => {
|
|
||||||
return (words.indexOf(v) === -1 && wordsAdditional.indexOf(v) === -1);
|
|
||||||
});
|
|
||||||
|
|
||||||
const good = cleanedArray.filter((v) => {
|
|
||||||
return (goodWords.indexOf(v) !== -1);
|
|
||||||
});
|
|
||||||
|
|
||||||
const bad = cleanedArray.filter((v) => {
|
|
||||||
return (badWords.indexOf(v) !== -1);
|
|
||||||
});
|
|
||||||
|
|
||||||
const unused = cleanedArray.filter((v) => {
|
|
||||||
return ((badWords.indexOf(v) === -1) && (goodWords.indexOf(v) === -1));
|
|
||||||
});
|
|
||||||
|
|
||||||
cleanedArray.map((item) => {
|
|
||||||
incItem(item);
|
|
||||||
});
|
|
||||||
|
|
||||||
unrated = [...unrated, ...unused];
|
|
||||||
|
|
||||||
const score = good.length - (bad.length * 5);
|
|
||||||
|
|
||||||
// console.log('unused', unused);
|
|
||||||
|
|
||||||
return { good, bad, score, 'words':cleanedArray };
|
|
||||||
};
|
|
||||||
|
|
||||||
Corpus.exportUnused = function() {
|
|
||||||
jsonfile.writeFileSync('./unused.json', dedupe(unrated));
|
|
||||||
jsonfile.writeFileSync('./biglist.json', [...bigList].sort((a, b) => b[1] - a[1]));
|
|
||||||
console.log([...bigList]);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (typeof module !== 'undefined')
|
|
||||||
module.exports = {
|
|
||||||
'Corpus': Corpus
|
|
||||||
};
|
|
@ -1,34 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 22/07/2020
|
|
||||||
* Time: 17:00
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const mongoose = require('mongoose');
|
|
||||||
const log4js = require('log4js');
|
|
||||||
const logger = log4js.getLogger();
|
|
||||||
|
|
||||||
const JobsModel = require('../models/jobs');
|
|
||||||
|
|
||||||
// const { Utils } = require('@rakh/utils');
|
|
||||||
|
|
||||||
require('dotenv').config();
|
|
||||||
|
|
||||||
logger.level = 'debug';
|
|
||||||
|
|
||||||
const mongoConnect = process.env.MONGOCONNECT;
|
|
||||||
|
|
||||||
// logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
|
||||||
|
|
||||||
// mongoose.connect('mongodb://martin:1V3D4m526i@127.0.0.1/jobs');
|
|
||||||
|
|
||||||
logger.debug(mongoConnect);
|
|
||||||
|
|
||||||
mongoose.connect(mongoConnect);
|
|
||||||
|
|
||||||
const mDB = mongoose.connection;
|
|
||||||
mDB.on('error', console.error.bind(console, 'connection error:'));
|
|
||||||
|
|
||||||
module.exports = JobsModel;
|
|
@ -89,7 +89,6 @@ class MasterRSS extends MasterBase {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
if (this.items.length > 0) await this.addToDB();
|
if (this.items.length > 0) await this.addToDB();
|
||||||
if (this.items.length > 0) await this.addToMongo();
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
console.log('No items to process');
|
console.log('No items to process');
|
||||||
|
@ -21,14 +21,9 @@ class MasterScraper extends MasterBase {
|
|||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param url
|
|
||||||
* @param useStone
|
|
||||||
* @returns {Promise<unknown>}
|
|
||||||
*/
|
|
||||||
getContent(url, useStone = false) {
|
getContent(url, useStone = false) {
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
let headers = new Headers({
|
let headers = new Headers({
|
||||||
"Accept" : "application/json",
|
"Accept" : "application/json",
|
||||||
@ -59,28 +54,19 @@ fetch(url, {
|
|||||||
resolve(response.body);
|
resolve(response.body);
|
||||||
})
|
})
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
console.error('getContent', e );
|
|
||||||
reject(e.response.body);
|
reject(e.response.body);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
async savePage(html) {
|
|
||||||
const now = fecha.format(new Date(), 'YYYY-MM-DD--hh');
|
|
||||||
|
|
||||||
const filename = `pages/${this.siteid}-${now}.html`;
|
|
||||||
|
|
||||||
fs.writeFileSync(filename, html);
|
|
||||||
}
|
|
||||||
|
|
||||||
async getPage() {
|
async getPage() {
|
||||||
console.log('>> getPage: fetching', this.url);
|
console.log('>> getPage: fetching', this.url);
|
||||||
|
const now = fecha.format(new Date(), 'YYYY-MM-DD--hhmmss');
|
||||||
|
const filename = `${this.siteid}-${now}.html`;
|
||||||
|
|
||||||
await this.getContent(this.url, this.useStone)
|
await this.getContent(this.url, this.useStone)
|
||||||
.then((html) => {
|
.then((html) => {
|
||||||
// console.log('>> getPage:: got', html);
|
fs.writeFileSync(filename, html);
|
||||||
console.log('>> getPage:: OK');
|
|
||||||
if (this.saveFile) this.savePage(html);
|
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
this.loadPage($);
|
this.loadPage($);
|
||||||
})
|
})
|
||||||
@ -89,59 +75,30 @@ fetch(url, {
|
|||||||
|
|
||||||
// Site specific parts below here
|
// Site specific parts below here
|
||||||
|
|
||||||
/**
|
|
||||||
* Break each page into items
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async breakPage() {
|
async breakPage() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param part
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async extractDetails(part) {
|
async extractDetails(part) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async checkNext() {
|
async checkNext() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async processSite() {
|
async processSite() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async getIndividualPage() {
|
async getIndividualPage() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async getJobPages() {
|
async getJobPages() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async go() {
|
async go() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
1007
lib/wordlist.json
1007
lib/wordlist.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
559
limited.json
559
limited.json
@ -1,559 +0,0 @@
|
|||||||
[
|
|
||||||
"experienced",
|
|
||||||
"exceptional",
|
|
||||||
"maintaining",
|
|
||||||
"familiarity",
|
|
||||||
"commodities",
|
|
||||||
"opportunity",
|
|
||||||
"possibility",
|
|
||||||
"integration",
|
|
||||||
"engineering",
|
|
||||||
"derivatives",
|
|
||||||
"prefferable",
|
|
||||||
"nutritional",
|
|
||||||
"performance",
|
|
||||||
"immediately",
|
|
||||||
"information",
|
|
||||||
"responsible",
|
|
||||||
"environment",
|
|
||||||
"stakeholder",
|
|
||||||
"proactively",
|
|
||||||
"requirement",
|
|
||||||
"temporarily",
|
|
||||||
"interrogate",
|
|
||||||
"effectively",
|
|
||||||
"progressing",
|
|
||||||
"substantial",
|
|
||||||
"identifying",
|
|
||||||
"maintenance",
|
|
||||||
"workarounds",
|
|
||||||
"departments",
|
|
||||||
"consultancy",
|
|
||||||
"regulations",
|
|
||||||
"statistical",
|
|
||||||
"previously·",
|
|
||||||
"euromonitor",
|
|
||||||
"documenting",
|
|
||||||
"bookkeeping",
|
|
||||||
"reconciling",
|
|
||||||
"hardworking",
|
|
||||||
"themselves!",
|
|
||||||
"appropriate",
|
|
||||||
"socialising",
|
|
||||||
"fundraising",
|
|
||||||
"initiatives",
|
|
||||||
"sponsorship",
|
|
||||||
"orientation",
|
|
||||||
"competitive",
|
|
||||||
"illustrator",
|
|
||||||
"outstanding",
|
|
||||||
"interaction",
|
|
||||||
"consistency",
|
|
||||||
"touchpoints",
|
|
||||||
"freshtechit",
|
|
||||||
"recruitment",
|
|
||||||
"catastrophe",
|
|
||||||
"accountable",
|
|
||||||
"workstreams",
|
|
||||||
"scalability",
|
|
||||||
"undertaking",
|
|
||||||
"interacting",
|
|
||||||
"significant",
|
|
||||||
"considering",
|
|
||||||
"independent",
|
|
||||||
"collaborate",
|
|
||||||
"arrangement",
|
|
||||||
"unsolicited",
|
|
||||||
"empowerment",
|
|
||||||
"connections",
|
|
||||||
"specialists",
|
|
||||||
"credentials",
|
|
||||||
"personality",
|
|
||||||
"established",
|
|
||||||
"northampton",
|
|
||||||
"advertising",
|
|
||||||
"operational",
|
|
||||||
"mathematics",
|
|
||||||
"contractors",
|
|
||||||
"instruments",
|
|
||||||
"referencing",
|
|
||||||
"locationsco",
|
|
||||||
"disciplines",
|
|
||||||
"corporation",
|
|
||||||
"investments",
|
|
||||||
"conferences",
|
|
||||||
"demonstrate",
|
|
||||||
"directorate",
|
|
||||||
"acknowledge",
|
|
||||||
"legislation",
|
|
||||||
"designgreat",
|
|
||||||
"understands",
|
|
||||||
"perspective",
|
|
||||||
"association",
|
|
||||||
"enforcement",
|
|
||||||
"prestigious",
|
|
||||||
"individuals",
|
|
||||||
"alternative",
|
|
||||||
"technically",
|
|
||||||
"challenging",
|
|
||||||
"discussions",
|
|
||||||
"lifeworking",
|
|
||||||
"interactive",
|
|
||||||
"storyboards",
|
|
||||||
"communicate",
|
|
||||||
"abilitywork",
|
|
||||||
"englishgood",
|
|
||||||
"detailbonus",
|
|
||||||
"angularwhat",
|
|
||||||
"neededabout",
|
|
||||||
"innovations",
|
|
||||||
"enthusiasts",
|
|
||||||
"instructors",
|
|
||||||
"prospective",
|
|
||||||
"comfortable",
|
|
||||||
"involvement",
|
|
||||||
"adventurous",
|
|
||||||
"marketplace",
|
|
||||||
"forecasting",
|
|
||||||
"contractual",
|
|
||||||
"underpinned",
|
|
||||||
"acquisition",
|
|
||||||
"microsoft’s",
|
|
||||||
"progression",
|
|
||||||
"suggestions",
|
|
||||||
"proficiency",
|
|
||||||
"participate",
|
|
||||||
"joblocation",
|
|
||||||
"methodology",
|
|
||||||
"continually",
|
|
||||||
"cataloguing",
|
|
||||||
"projectgood",
|
|
||||||
"incremental",
|
|
||||||
"overarching",
|
|
||||||
"confidently",
|
|
||||||
"circulatory",
|
|
||||||
"adjustments",
|
|
||||||
"interesting",
|
|
||||||
"consultants",
|
|
||||||
"experienceb",
|
|
||||||
"hourscasual",
|
|
||||||
"switzerland",
|
|
||||||
"contributes",
|
|
||||||
"participant",
|
|
||||||
"improvement",
|
|
||||||
"articulates",
|
|
||||||
"contributed",
|
|
||||||
"comfortably",
|
|
||||||
"deployments",
|
|
||||||
"integrating",
|
|
||||||
"configuring",
|
|
||||||
"platforming",
|
|
||||||
"educatedday",
|
|
||||||
"contracting",
|
|
||||||
"monthstotal",
|
|
||||||
"outsourcing",
|
|
||||||
"designswork",
|
|
||||||
"ideasdesign",
|
|
||||||
"deviceswork",
|
|
||||||
"fundamental",
|
|
||||||
"businessjob",
|
|
||||||
"implemented",
|
|
||||||
"transaction",
|
|
||||||
"reliability",
|
|
||||||
"upgradesyou",
|
|
||||||
"uncertainty",
|
|
||||||
"enterpriser",
|
|
||||||
"teamprovide",
|
|
||||||
"trafficking",
|
|
||||||
"doubleclick",
|
|
||||||
"communities",
|
|
||||||
"‘forestlink",
|
|
||||||
"dimensional",
|
|
||||||
"coordinator",
|
|
||||||
"spreadsheet",
|
|
||||||
"pressurised",
|
|
||||||
"assignments",
|
|
||||||
"willingness",
|
|
||||||
"certificate",
|
|
||||||
"summaryrole",
|
|
||||||
"institution",
|
|
||||||
"segregation",
|
|
||||||
"preparation",
|
|
||||||
"electronics",
|
|
||||||
"duplication",
|
|
||||||
"surrounding",
|
|
||||||
"informatica",
|
|
||||||
"blackfriars",
|
|
||||||
"terminology",
|
|
||||||
"shabarinath",
|
|
||||||
"interfacing",
|
|
||||||
"expectation",
|
|
||||||
"proprietary",
|
|
||||||
"conflicting",
|
|
||||||
"itecopeople",
|
|
||||||
"opowershell",
|
|
||||||
"submissions",
|
|
||||||
"negotiating",
|
|
||||||
"escalations",
|
|
||||||
"transferred",
|
|
||||||
"protections",
|
|
||||||
"customizing",
|
|
||||||
"oxfordshire",
|
|
||||||
"progressive",
|
|
||||||
"bishopsgate",
|
|
||||||
"partnership",
|
|
||||||
"futureheads",
|
|
||||||
"permissions",
|
|
||||||
"efficiently",
|
|
||||||
"unspecified",
|
|
||||||
"potentially",
|
|
||||||
"disclaimers",
|
|
||||||
"foreseeable",
|
|
||||||
"sustainable",
|
|
||||||
"calculation",
|
|
||||||
"replication",
|
|
||||||
"constitutes",
|
|
||||||
"recommended",
|
|
||||||
"enterprises",
|
|
||||||
"negotiation",
|
|
||||||
"imaginative",
|
|
||||||
"differences",
|
|
||||||
"nationality",
|
|
||||||
"impediments",
|
|
||||||
"refinements",
|
|
||||||
"translating",
|
|
||||||
"obligations",
|
|
||||||
"flexibility",
|
|
||||||
"unashamedly",
|
|
||||||
"exclusively",
|
|
||||||
"replacement",
|
|
||||||
"essentially",
|
|
||||||
"artifactory",
|
|
||||||
"theoretical",
|
|
||||||
"probability",
|
|
||||||
"integrators",
|
|
||||||
"contractor?",
|
|
||||||
"interested?",
|
|
||||||
"functioning",
|
|
||||||
"chamberlain",
|
|
||||||
"inclusivity",
|
|
||||||
"iteratively",
|
|
||||||
"enhancement",
|
|
||||||
"constraints",
|
|
||||||
"establishes",
|
|
||||||
"qualitative",
|
|
||||||
"influencing",
|
|
||||||
"procurement",
|
|
||||||
"experiences",
|
|
||||||
"furthermore",
|
|
||||||
"disciplined",
|
|
||||||
"unnecessary",
|
|
||||||
"bureaucracy",
|
|
||||||
"represented",
|
|
||||||
"siteimprove",
|
|
||||||
"lokhandwala",
|
|
||||||
"specialises",
|
|
||||||
"rationalize",
|
|
||||||
"competncies",
|
|
||||||
"restoration",
|
|
||||||
"allocations",
|
|
||||||
"admittances",
|
|
||||||
"furnishings",
|
|
||||||
"cleanliness",
|
|
||||||
"residential",
|
|
||||||
"contactable",
|
|
||||||
"conventions",
|
|
||||||
"translation",
|
|
||||||
"approaching",
|
|
||||||
"intecselect",
|
|
||||||
"linguistics",
|
|
||||||
"southampton",
|
|
||||||
"beautifully",
|
|
||||||
"estimations",
|
|
||||||
"newsletters",
|
|
||||||
"summarising",
|
|
||||||
"simulations",
|
|
||||||
"portfolio's",
|
|
||||||
"coronavirus",
|
|
||||||
"opoortunity",
|
|
||||||
"unavailable",
|
|
||||||
"accordingly",
|
|
||||||
"penetration",
|
|
||||||
"remediation",
|
|
||||||
"elimination",
|
|
||||||
"achievement",
|
|
||||||
"facilitator",
|
|
||||||
"westminster",
|
|
||||||
"introducing",
|
|
||||||
"businesses'",
|
|
||||||
"capitalists",
|
|
||||||
"investigate",
|
|
||||||
"countryside",
|
|
||||||
"problematic",
|
|
||||||
"coordinates",
|
|
||||||
"components'",
|
|
||||||
"supervision",
|
|
||||||
"bonavolonta",
|
|
||||||
"proposition",
|
|
||||||
"foundations",
|
|
||||||
"suitability",
|
|
||||||
"researchers",
|
|
||||||
"explanation",
|
|
||||||
"commitments",
|
|
||||||
"computation",
|
|
||||||
"questioning",
|
|
||||||
"experiments",
|
|
||||||
"visualfiles",
|
|
||||||
"cloudstream",
|
|
||||||
"determining",
|
|
||||||
"deliverable",
|
|
||||||
"inquisitive",
|
|
||||||
"backgrounds",
|
|
||||||
"thoughtspot",
|
|
||||||
"specialized",
|
|
||||||
"veloppement",
|
|
||||||
"importantes",
|
|
||||||
"typedscript",
|
|
||||||
"restaurants",
|
|
||||||
"prophylaxis",
|
|
||||||
"transmitted",
|
|
||||||
"appointment",
|
|
||||||
"encouraging",
|
|
||||||
"aggregating",
|
|
||||||
"championing",
|
|
||||||
"conjunction",
|
|
||||||
"customising",
|
|
||||||
"photography",
|
|
||||||
"authorities",
|
|
||||||
"competition",
|
|
||||||
"collections",
|
|
||||||
"contraintes",
|
|
||||||
"fonctionnel",
|
|
||||||
"adaptabilit",
|
|
||||||
"changements",
|
|
||||||
"conceptions",
|
|
||||||
"utilisation",
|
|
||||||
"shortlisted",
|
|
||||||
"reusability",
|
|
||||||
"recognizing",
|
|
||||||
"decisioning",
|
|
||||||
"accommodate",
|
|
||||||
"limitations",
|
|
||||||
"resourceful",
|
|
||||||
"algorithmic",
|
|
||||||
"unconcerned",
|
|
||||||
"intelligent",
|
|
||||||
"considerate",
|
|
||||||
"clientbased",
|
|
||||||
"accelerator",
|
|
||||||
"dreamweaver",
|
|
||||||
"applicant's",
|
|
||||||
"proactivity",
|
|
||||||
"aggregation",
|
|
||||||
"restriction",
|
|
||||||
"traditional",
|
|
||||||
"corporately",
|
|
||||||
"memberships",
|
|
||||||
"standardise",
|
|
||||||
"theecsgroup",
|
|
||||||
"scarchitect",
|
|
||||||
"consolidate",
|
|
||||||
"extensively",
|
|
||||||
"afghanistan",
|
|
||||||
"encompasses",
|
|
||||||
"distinctive",
|
|
||||||
"professions",
|
|
||||||
"interviewed",
|
|
||||||
"formulation",
|
|
||||||
"transitions",
|
|
||||||
"aspirations",
|
|
||||||
"ingredients",
|
|
||||||
"setterfield",
|
|
||||||
"candidate’s",
|
|
||||||
"leatherhead",
|
|
||||||
"publication",
|
|
||||||
"undoubtedly",
|
|
||||||
"basingstoke",
|
|
||||||
"underground",
|
|
||||||
"reinsurance",
|
|
||||||
"exemplifies",
|
|
||||||
"civiization",
|
|
||||||
"developer's",
|
|
||||||
"bazzelgette",
|
|
||||||
"adjacencies",
|
|
||||||
"feasibility",
|
|
||||||
"frontinvest",
|
|
||||||
"neogotiable",
|
|
||||||
"unconnected",
|
|
||||||
"conditional",
|
|
||||||
"bottlenecks",
|
|
||||||
"productions",
|
|
||||||
"pharmacists",
|
|
||||||
"technicians",
|
|
||||||
"prescribing",
|
|
||||||
"stewardship",
|
|
||||||
"recognising",
|
|
||||||
"convictions",
|
|
||||||
"subscribing",
|
|
||||||
"transparent",
|
|
||||||
"wireframing",
|
|
||||||
"insidehmcts",
|
|
||||||
"justicejobs",
|
|
||||||
"criminology",
|
|
||||||
"hospitality",
|
|
||||||
"structuring",
|
|
||||||
"educational",
|
|
||||||
"substantive",
|
|
||||||
"secondments",
|
|
||||||
"transgender",
|
|
||||||
"smartphones",
|
|
||||||
"microsoft's",
|
|
||||||
"definitions",
|
|
||||||
"validations",
|
|
||||||
"prioritised",
|
|
||||||
"autoscaling",
|
|
||||||
"abstraction",
|
|
||||||
"correlation",
|
|
||||||
"recognition",
|
|
||||||
"contributor",
|
|
||||||
"apigedevops",
|
|
||||||
"incorporate",
|
|
||||||
"woocommerce",
|
|
||||||
"informatics",
|
|
||||||
"adfadc@apps",
|
|
||||||
"automations",
|
|
||||||
"formulating",
|
|
||||||
"beneficiary",
|
|
||||||
"referential",
|
|
||||||
"jsdevsecops",
|
|
||||||
"solutioning",
|
|
||||||
"measurement",
|
|
||||||
"familiarise",
|
|
||||||
"eligibility",
|
|
||||||
"standardize",
|
|
||||||
"experience?",
|
|
||||||
"bournemouth",
|
|
||||||
"implementer",
|
|
||||||
"agilesphere",
|
|
||||||
"assumptions",
|
|
||||||
"accountancy",
|
|
||||||
"cockroachdb",
|
|
||||||
"promotional",
|
|
||||||
"facilitates",
|
|
||||||
"discoveries",
|
|
||||||
"bladecenter",
|
|
||||||
"considered!",
|
|
||||||
"cooperation",
|
|
||||||
"exploration",
|
|
||||||
"angulareact",
|
|
||||||
"preferabbly",
|
|
||||||
"harmonising",
|
|
||||||
"convenience",
|
|
||||||
"inclusively",
|
|
||||||
"strategists",
|
|
||||||
"attribution",
|
|
||||||
"fromscratch",
|
|
||||||
"combination",
|
|
||||||
"solutionize",
|
|
||||||
"accelerated",
|
|
||||||
"diagnostics",
|
|
||||||
"sensibility",
|
|
||||||
"informative",
|
|
||||||
"intellegnce",
|
|
||||||
"specilisits",
|
|
||||||
"projections",
|
|
||||||
"associative",
|
|
||||||
"personalize",
|
|
||||||
"farnborough",
|
|
||||||
"necessarily",
|
|
||||||
"nservicebus",
|
|
||||||
"constrained",
|
|
||||||
"prioritized",
|
|
||||||
"behavioural",
|
|
||||||
"chakraborty",
|
|
||||||
"leaderships",
|
|
||||||
"flourishing",
|
|
||||||
"uniqstudios",
|
|
||||||
"simplifying",
|
|
||||||
"realisation",
|
|
||||||
"extensions!",
|
|
||||||
"prioritises",
|
|
||||||
"experience!",
|
|
||||||
"candidates!",
|
|
||||||
"inclination",
|
|
||||||
"stimulating",
|
|
||||||
"appreciated",
|
|
||||||
"reinventing",
|
|
||||||
"compression",
|
|
||||||
"jscybsecdev",
|
|
||||||
"equirements",
|
|
||||||
"generalized",
|
|
||||||
"compressors",
|
|
||||||
"assessments",
|
|
||||||
"beyondtrust",
|
|
||||||
"engagements",
|
|
||||||
"numerically",
|
|
||||||
"electricity",
|
|
||||||
"interchange",
|
|
||||||
"jsswift_dev",
|
|
||||||
"circulating",
|
|
||||||
"attachments",
|
|
||||||
"credibility",
|
|
||||||
"vnetpeering",
|
|
||||||
"territories",
|
|
||||||
"staggering!",
|
|
||||||
"developers!",
|
|
||||||
"peripherals",
|
|
||||||
"virtualized",
|
|
||||||
"bitdefender",
|
|
||||||
"jssitecorjs",
|
|
||||||
"positioning",
|
|
||||||
"appreciates",
|
|
||||||
"chessington",
|
|
||||||
"controllers",
|
|
||||||
"controlling",
|
|
||||||
"quantifying",
|
|
||||||
"virtualised",
|
|
||||||
"manufacture",
|
|
||||||
"fluorescent",
|
|
||||||
"governments",
|
|
||||||
"bigcommerce",
|
|
||||||
"therapeutic",
|
|
||||||
"importantly",
|
|
||||||
"differently",
|
|
||||||
"rigourously",
|
|
||||||
"shareholder",
|
|
||||||
"copywriting",
|
|
||||||
"anticipated",
|
|
||||||
"approximate",
|
|
||||||
"behdarvandi",
|
|
||||||
"testability",
|
|
||||||
"beneficial!",
|
|
||||||
"jswmibmcraw",
|
|
||||||
"exhibitions",
|
|
||||||
"talentpoint",
|
|
||||||
"propagation",
|
|
||||||
"interviews!",
|
|
||||||
"solutionise",
|
|
||||||
"elasticache",
|
|
||||||
"manoeuvring",
|
|
||||||
"teamservice",
|
|
||||||
"geographies",
|
|
||||||
"efficientip",
|
|
||||||
"organically",
|
|
||||||
"advancement",
|
|
||||||
"jshodanular",
|
|
||||||
"wholesalers",
|
|
||||||
"multitenant",
|
|
||||||
"encouraged?",
|
|
||||||
"freelancers",
|
|
||||||
"composition",
|
|
||||||
"#jobswagger",
|
|
||||||
"typographic",
|
|
||||||
"stereotypes",
|
|
||||||
"clerkenwell",
|
|
||||||
"sacrificing",
|
|
||||||
"resolutions",
|
|
||||||
"technology?",
|
|
||||||
"advantagous"
|
|
||||||
]
|
|
@ -1,22 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 27/07/2020
|
|
||||||
* Time: 15:34
|
|
||||||
|
|
||||||
*/
|
|
||||||
const jsonfile = require('jsonfile');
|
|
||||||
|
|
||||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
|
|
||||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
|
||||||
|
|
||||||
const brain = new Map([]);
|
|
||||||
|
|
||||||
for(let i = 0;i < goodWords.length - 1;i++)
|
|
||||||
brain.set(goodWords[i], 3);
|
|
||||||
|
|
||||||
for(let i = 0;i < badWords.length - 1;i++)
|
|
||||||
brain.set(badWords[i], -5);
|
|
||||||
|
|
||||||
jsonfile.writeFileSync('brain.json', [...brain]);
|
|
||||||
|
|
156
migrate.js
156
migrate.js
@ -1,156 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 22/07/2020
|
|
||||||
* Time: 10:20
|
|
||||||
|
|
||||||
*/
|
|
||||||
const db = require('./lib/connect');
|
|
||||||
const log4js = require('log4js');
|
|
||||||
const logger = log4js.getLogger();
|
|
||||||
const { Utils } = require('@rakh/utils');
|
|
||||||
|
|
||||||
const { Corpus } = require('./lib/corpus');
|
|
||||||
|
|
||||||
const SHA = require('crypto-js/sha256');
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
2604
|
|
||||||
|
|
||||||
const mongoose = require('mongoose');
|
|
||||||
const log4js = require('log4js');
|
|
||||||
const logger = log4js.getLogger();
|
|
||||||
|
|
||||||
const Jobs = require('./models/jobs');
|
|
||||||
|
|
||||||
require('dotenv').config();
|
|
||||||
|
|
||||||
logger.level = 'debug';
|
|
||||||
|
|
||||||
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
|
||||||
|
|
||||||
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
|
||||||
|
|
||||||
const mDB = mongoose.connection;
|
|
||||||
mDB.on('error', console.error.bind(console, 'connection error:'));
|
|
||||||
*/
|
|
||||||
|
|
||||||
const Jobs = require('./lib/mongoManager');
|
|
||||||
|
|
||||||
const migrate = (function() {
|
|
||||||
function analyseRate(inval) {
|
|
||||||
let outVal = 0;
|
|
||||||
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
|
||||||
const clearSpace = /\s+/g;
|
|
||||||
|
|
||||||
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
|
||||||
const resultArray = result.trim().split((' '));
|
|
||||||
|
|
||||||
if (resultArray.length > 0) {
|
|
||||||
const item = parseInt(resultArray[0], 10);
|
|
||||||
|
|
||||||
if (item < 100) outVal = 0;
|
|
||||||
else if ((item > 100) && (item < 5000)) outVal = 1;
|
|
||||||
else if (item >= 5000) outVal = 2;
|
|
||||||
}
|
|
||||||
else return 0;
|
|
||||||
|
|
||||||
return outVal;
|
|
||||||
}
|
|
||||||
function reduceData(d) {
|
|
||||||
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
|
||||||
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
|
||||||
|
|
||||||
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
|
||||||
|
|
||||||
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
|
||||||
|
|
||||||
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
|
||||||
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
|
||||||
outObj.details.hashed = SHA(outObj.details.summary);
|
|
||||||
|
|
||||||
// outObj.data.read = d.read || 0;
|
|
||||||
outObj.data.read = 0;
|
|
||||||
outObj.data.applied = d.applied || 0;
|
|
||||||
outObj.data.jobtype = analyseRate(d.salary);
|
|
||||||
|
|
||||||
outObj.data.autoclass = Corpus.process(d.summary);
|
|
||||||
|
|
||||||
outObj.data.timestamp = d.timestamp * 1000;
|
|
||||||
|
|
||||||
return outObj;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getCurrent() {
|
|
||||||
const outgoing = [];
|
|
||||||
console.log('get version');
|
|
||||||
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
|
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
db.all(sql, [], (err, rows) => {
|
|
||||||
if (err)
|
|
||||||
reject(err);
|
|
||||||
|
|
||||||
rows.forEach((row) => {
|
|
||||||
outgoing.push(row);
|
|
||||||
});
|
|
||||||
|
|
||||||
resolve(outgoing) ;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function start() {
|
|
||||||
await getCurrent().then(async (d) => {
|
|
||||||
logger.debug(d.length);
|
|
||||||
|
|
||||||
for (let t = 0;t < (d.length - 1);t++) {
|
|
||||||
const newD = reduceData(d[t]);
|
|
||||||
|
|
||||||
// logger.debug(newD);
|
|
||||||
|
|
||||||
const newJob = Jobs(newD);
|
|
||||||
|
|
||||||
await newJob.save().then((m) => {
|
|
||||||
logger.debug('m', m.details.title);
|
|
||||||
}).catch((err) => {
|
|
||||||
logger.error(err.keyPattern);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}).then(() => {
|
|
||||||
logger.debug('SAVING!!');
|
|
||||||
Corpus.exportUnused();
|
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
logger.error(err.keyPattern);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function deleteOld() {
|
|
||||||
const oneDay = 86400000;
|
|
||||||
const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay);
|
|
||||||
|
|
||||||
logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo);
|
|
||||||
|
|
||||||
logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } });
|
|
||||||
Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => {
|
|
||||||
logger.debug('m', m);
|
|
||||||
}).catch((err) => {
|
|
||||||
logger.error(err);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
|
|
||||||
|
|
||||||
return {
|
|
||||||
'start':start,
|
|
||||||
'deleteOld': deleteOld
|
|
||||||
};
|
|
||||||
})();
|
|
||||||
|
|
||||||
(async function() {
|
|
||||||
await migrate.start();
|
|
||||||
await migrate.deleteOld();
|
|
||||||
logger.info('Done??');
|
|
||||||
})();
|
|
@ -1,47 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 22/07/2020
|
|
||||||
* Time: 14:18
|
|
||||||
|
|
||||||
*/
|
|
||||||
const mongoose = require('mongoose');
|
|
||||||
const Schema = mongoose.Schema;
|
|
||||||
|
|
||||||
const jobSchema = new Schema({
|
|
||||||
'details': {
|
|
||||||
'title': { 'type': String, 'required': true },
|
|
||||||
'site': { 'type': String, 'required': true },
|
|
||||||
'url': { 'type': String, 'required': true, 'unique': true },
|
|
||||||
'id': String,
|
|
||||||
'summary': String,
|
|
||||||
'company': String,
|
|
||||||
'location': String,
|
|
||||||
'postdate': String,
|
|
||||||
'salary': String,
|
|
||||||
'easyapply': Number,
|
|
||||||
'timestamp': Number,
|
|
||||||
'hashed' : { 'type': String, 'required':true, 'unique':true }
|
|
||||||
},
|
|
||||||
'data': {
|
|
||||||
'read': { 'type': Number, 'default': 0 },
|
|
||||||
'applied': { 'type': Number, 'default': 0 },
|
|
||||||
'jobtype': { 'type': Number, 'default': 0 },
|
|
||||||
'class': { 'type': Number, 'default': 0 },
|
|
||||||
'autoclass': {
|
|
||||||
'good': Array,
|
|
||||||
'bad': Array,
|
|
||||||
'words': Array,
|
|
||||||
'score': { 'type': Number, 'default': 0 }
|
|
||||||
},
|
|
||||||
'timestamp': { 'type': Number, 'default': 0 },
|
|
||||||
'created_at': { 'type': Date, 'default': Date.now }
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
mongoose.set('useFindAndModify', false);
|
|
||||||
|
|
||||||
const Jobs = mongoose.model('Jobs', jobSchema);
|
|
||||||
|
|
||||||
module.exports = Jobs;
|
|
66
onetime.js
66
onetime.js
@ -1,66 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 16/04/2020
|
|
||||||
* Time: 23:35
|
|
||||||
|
|
||||||
*/
|
|
||||||
const CronJob = require('cron').CronJob;
|
|
||||||
const IndeedScraper = require('./scrapers/indeed');
|
|
||||||
const TotaljobsScraper = require('./scrapers/totaljobs');
|
|
||||||
const CwjobsScraper = require('./scrapers/cwjobs');
|
|
||||||
const JobserveScraper = require('./scrapers/rss.jobserve');
|
|
||||||
const RssS1Jobs = require('./scrapers/rss.s1jobs');
|
|
||||||
const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|
||||||
|
|
||||||
(async function () {
|
|
||||||
console.log('Started..');
|
|
||||||
const indeedScraper = new IndeedScraper();
|
|
||||||
const totaljobsScraper = new TotaljobsScraper();
|
|
||||||
const cwjobsScraper = new CwjobsScraper();
|
|
||||||
const jobserveScraper = new JobserveScraper();
|
|
||||||
const s1jobsScraper = new RssS1Jobs();
|
|
||||||
const technojobsScraper = new RssTechnojobs();
|
|
||||||
|
|
||||||
await indeedScraper.go('london');
|
|
||||||
await totaljobsScraper.go('london');
|
|
||||||
await cwjobsScraper.go('london');
|
|
||||||
|
|
||||||
await indeedScraper.go('glasgow');
|
|
||||||
await totaljobsScraper.go('glasgow');
|
|
||||||
await cwjobsScraper.go('glasgow');
|
|
||||||
await indeedScraper.go('edinburgh');
|
|
||||||
await totaljobsScraper.go('edinburgh');
|
|
||||||
await cwjobsScraper.go('edinburgh');
|
|
||||||
await indeedScraper.go('milton keynes');
|
|
||||||
await totaljobsScraper.go('milton keynes');
|
|
||||||
await cwjobsScraper.go('milton keynes');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
|
||||||
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
|
||||||
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
|
|
||||||
})();
|
|
1288
package-lock.json
generated
1288
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
12
package.json
12
package.json
@ -1,31 +1,23 @@
|
|||||||
{
|
{
|
||||||
"name": "jobscraper",
|
"name": "jobscraper",
|
||||||
"version": "1.0.2",
|
"version": "1.0.0",
|
||||||
"description": "",
|
"description": "",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"release": "vik patch -t",
|
"grabber": "node grabber.js"
|
||||||
"grabber": "node grabber.js",
|
|
||||||
"server" : "node server/server.js"
|
|
||||||
},
|
},
|
||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@rakh/utils": "^1.0.0",
|
|
||||||
"axios": "^0.19.2",
|
"axios": "^0.19.2",
|
||||||
"bayes": "^1.0.0",
|
|
||||||
"body-parser": "^1.19.0",
|
"body-parser": "^1.19.0",
|
||||||
"cheerio": "^1.0.0-rc.3",
|
"cheerio": "^1.0.0-rc.3",
|
||||||
"cron": "^1.8.2",
|
"cron": "^1.8.2",
|
||||||
"crypto-js": "^4.0.0",
|
|
||||||
"dotenv": "^8.2.0",
|
"dotenv": "^8.2.0",
|
||||||
"eslint": "^6.8.0",
|
"eslint": "^6.8.0",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"fecha": "^4.2.0",
|
"fecha": "^4.2.0",
|
||||||
"got": "^11.2.0",
|
"got": "^11.2.0",
|
||||||
"jsonfile": "^6.0.1",
|
|
||||||
"log4js": "^6.3.0",
|
|
||||||
"mongoose": "^5.9.25",
|
|
||||||
"present": "^1.0.0",
|
"present": "^1.0.0",
|
||||||
"rss-parser": "^3.8.0",
|
"rss-parser": "^3.8.0",
|
||||||
"sqlite3": "^4.1.1",
|
"sqlite3": "^4.1.1",
|
||||||
|
45
preload.js
45
preload.js
@ -1,45 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 28/07/2020
|
|
||||||
* Time: 10:51
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
var bayes = require('bayes');
|
|
||||||
|
|
||||||
var classifier = bayes({
|
|
||||||
'tokenizer': function (text) {
|
|
||||||
return text.split(',');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// teach it positive phrases
|
|
||||||
|
|
||||||
async function load() {
|
|
||||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt', 'es6', 'es2016', 'es2017', 'es2018', 'freelance'];
|
|
||||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
|
||||||
|
|
||||||
for(let i = 0;i < goodWords.length - 1;i++)
|
|
||||||
await classifier.learn(goodWords[i], 'good');
|
|
||||||
|
|
||||||
for(let i = 0;i < badWords.length - 1;i++)
|
|
||||||
await classifier.learn(badWords[i], 'bad');
|
|
||||||
|
|
||||||
// now ask it to categorize a document it has never seen before
|
|
||||||
|
|
||||||
console.log(await classifier.categorize(['ui', 'developer', 'london', 'react'].join(',')));
|
|
||||||
|
|
||||||
console.log(await classifier.categorize(['mysql', 'react', 'js', 'node', 'docker', 'kubernetes', 'google'].join(',')));
|
|
||||||
|
|
||||||
// serialize the classifier's state as a JSON string.
|
|
||||||
var stateJson = classifier.toJson();
|
|
||||||
|
|
||||||
console.log(stateJson);
|
|
||||||
|
|
||||||
fs.writeFileSync('brain.json', stateJson);
|
|
||||||
}
|
|
||||||
|
|
||||||
load();
|
|
@ -20,7 +20,7 @@ class CwjobsScraper extends TotaljobsScraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||||
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
||||||
|
|
||||||
// Glasgow
|
// Glasgow
|
||||||
|
@ -133,15 +133,12 @@ class IndeedScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Angular+Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
|
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
|
||||||
|
|
||||||
await this.processSite().catch((err) => {
|
await this.processSite();
|
||||||
console.error('Indeed Go', err);
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`Indeed ${location} completed`);
|
console.log(`Indeed ${location} completed`);
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,6 @@ class IndeedMobileScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
|
@ -22,10 +22,7 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Site specific parts below here
|
// Site specific parts below here
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async breakPage() {
|
async breakPage() {
|
||||||
const $ = this.currentPage;
|
const $ = this.currentPage;
|
||||||
const ads = [];
|
const ads = [];
|
||||||
@ -42,11 +39,6 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
this.items = [...this.items, ...ads];
|
this.items = [...this.items, ...ads];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param part
|
|
||||||
* @returns {Promise<{}>}
|
|
||||||
*/
|
|
||||||
async extractDetails(part) {
|
async extractDetails(part) {
|
||||||
const newObj = {};
|
const newObj = {};
|
||||||
const $part = cheerio.load(part);
|
const $part = cheerio.load(part);
|
||||||
@ -69,11 +61,6 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
return newObj;
|
return newObj;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param item
|
|
||||||
* @returns {Promise<*>}
|
|
||||||
*/
|
|
||||||
async getIndividualPage(item) {
|
async getIndividualPage(item) {
|
||||||
const newItem = {...item};
|
const newItem = {...item};
|
||||||
console.log('Getting', item.url);
|
console.log('Getting', item.url);
|
||||||
@ -88,10 +75,6 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
return newItem;
|
return newItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async getJobPages() {
|
async getJobPages() {
|
||||||
const newItems = [];
|
const newItems = [];
|
||||||
for (let item of this.items) {
|
for (let item of this.items) {
|
||||||
@ -103,10 +86,6 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
this.items = [...newItems];
|
this.items = [...newItems];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async checkNext() {
|
async checkNext() {
|
||||||
const $ = this.currentPage;
|
const $ = this.currentPage;
|
||||||
const next = $('.pagination > *:last-child').attr('href') || '';
|
const next = $('.pagination > *:last-child').attr('href') || '';
|
||||||
@ -117,10 +96,6 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
console.log(next);
|
console.log(next);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async processSite() {
|
async processSite() {
|
||||||
console.log('Processing...');
|
console.log('Processing...');
|
||||||
|
|
||||||
@ -146,16 +121,10 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param location
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||||
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
||||||
|
|
||||||
// Glasgow
|
// Glasgow
|
||||||
|
@ -1,81 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 10/09/2020
|
|
||||||
* Time: 16:07
|
|
||||||
|
|
||||||
*/
|
|
||||||
const Jobs = require('../../lib/mongoManager');
|
|
||||||
const { Utils } = require('@rakh/utils');
|
|
||||||
|
|
||||||
exports.markApplied = (req, res) => {
|
|
||||||
console.log('>V2 markApplied req', req.params);
|
|
||||||
|
|
||||||
if(!req.params.id)
|
|
||||||
return res.status(500).send({
|
|
||||||
'message': 'Job id missing'
|
|
||||||
});
|
|
||||||
|
|
||||||
const aid = req.params.id;
|
|
||||||
const now = new Date().getTime();
|
|
||||||
|
|
||||||
// touchOne
|
|
||||||
|
|
||||||
console.log('aid', aid);
|
|
||||||
|
|
||||||
Jobs.updateMany({ '_id':aid }, { '$set': { 'data.applied':now } } ).then((data) => {
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
res.status(200).end();
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
/*
|
|
||||||
dbmanager.appliedOne({ aid, a })
|
|
||||||
.then((data) => {
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
res.status(200).end();
|
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
*/
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.markAllRead = (req, res) => {
|
|
||||||
console.log('>V2 markAllRead req', req.params);
|
|
||||||
|
|
||||||
const now = new Date().getTime();
|
|
||||||
|
|
||||||
Jobs.updateMany({ 'data.read':0 }, { '$set': { 'data.read':now } } ).then((data) => {
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
res.status(200).end();
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
/*
|
|
||||||
dbmanager.markAllRead()
|
|
||||||
.then((data) => {
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
res.status(200).end();
|
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
*/
|
|
||||||
};
|
|
@ -1,124 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 24/07/2020
|
|
||||||
* Time: 11:45
|
|
||||||
|
|
||||||
*/
|
|
||||||
const Jobs = require('../../lib/mongoManager');
|
|
||||||
const { Utils } = require('@rakh/utils');
|
|
||||||
|
|
||||||
const killNLDoubleSpace = /(\\n)\s{2,}|(\\n)|\s{2,}/g;
|
|
||||||
|
|
||||||
function reduceList(data) {
|
|
||||||
if (arguments.length === 0 || arguments[0] === null ) return '';
|
|
||||||
|
|
||||||
const outObj = data.map((v) => {
|
|
||||||
const o = Utils.extractFromObj({...v.details,...v.data, _id:v._id},['title','site', 'company', 'timestamp', 'read', 'applied', 'jobtype', 'class', 'autoclass']);
|
|
||||||
o._id = v._id;
|
|
||||||
return o;
|
|
||||||
|
|
||||||
});
|
|
||||||
// console.log(data);
|
|
||||||
|
|
||||||
return outObj;
|
|
||||||
}
|
|
||||||
|
|
||||||
function reduceRecord(record) {
|
|
||||||
// console.log('Reducderecord', record);
|
|
||||||
let outRec = {...record.details,data:record.data,_id:record._id};
|
|
||||||
|
|
||||||
return outRec;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
exports.getList = (req, res) => {
|
|
||||||
console.log('>getList req', req.params);
|
|
||||||
|
|
||||||
Jobs.find({}, { 'details.title':1, 'details.site':1, 'details.company':1, 'data':1, '_id':1 }).limit(200).sort( { 'data.timestamp': -1 } ).then((doc) => {
|
|
||||||
if (doc) {
|
|
||||||
|
|
||||||
res.send(reduceList(doc));
|
|
||||||
}
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.getJob = (req, res) => {
|
|
||||||
console.log('>getJob req', req.params);
|
|
||||||
|
|
||||||
if(!req.params.id)
|
|
||||||
return res.status(500).send({
|
|
||||||
'message': 'Job id missing'
|
|
||||||
});
|
|
||||||
|
|
||||||
const id = req.params.id;
|
|
||||||
|
|
||||||
Jobs.findById(id).then((doc) => {
|
|
||||||
if (doc) {
|
|
||||||
|
|
||||||
const item = reduceRecord(doc._doc);
|
|
||||||
const date = new Date( item.timestamp * 1000);
|
|
||||||
|
|
||||||
console.log(item);
|
|
||||||
item.date = date.toLocaleString();
|
|
||||||
item.title = item.title.replace(killNLDoubleSpace, ' ');
|
|
||||||
|
|
||||||
res.send(item);
|
|
||||||
}
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.readJob = (req, res) => {
|
|
||||||
console.log('>readJob req', req.params);
|
|
||||||
|
|
||||||
let id;
|
|
||||||
if(!req.params.id)
|
|
||||||
return res.status(500).send({
|
|
||||||
'message': 'Job id missing'
|
|
||||||
});
|
|
||||||
else
|
|
||||||
id = req.params.id;
|
|
||||||
|
|
||||||
Jobs.findById(id).then((doc) => {
|
|
||||||
if (doc) {
|
|
||||||
|
|
||||||
let fullDoc = Object.assign({}, doc._doc);
|
|
||||||
|
|
||||||
console.log('fullDoc', fullDoc);
|
|
||||||
|
|
||||||
if (!Utils.isEmpty(fullDoc)){
|
|
||||||
fullDoc.data.read = new Date().getTime();
|
|
||||||
|
|
||||||
Jobs.findByIdAndUpdate(id, fullDoc, {'new':true}).then((doc) => {
|
|
||||||
console.log(doc._doc);
|
|
||||||
res.status(200).end();
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error('inside',err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error('outer', err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 28/07/2020
|
|
||||||
* Time: 11:08
|
|
||||||
|
|
||||||
*/
|
|
||||||
const Jobs = require('../../lib/mongoManager');
|
|
||||||
const { Utils } = require('@rakh/utils');
|
|
||||||
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
var bayes = require('bayes');
|
|
||||||
|
|
||||||
var classifier = bayes({
|
|
||||||
'tokenizer': function (text) {
|
|
||||||
return text.split(',');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
function load() {
|
|
||||||
const file = fs.readFileSync('brain.json');
|
|
||||||
|
|
||||||
classifier = bayes.fromJson(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
function save() {
|
|
||||||
var stateJson = classifier.toJson();
|
|
||||||
|
|
||||||
console.log(stateJson);
|
|
||||||
|
|
||||||
fs.writeFileSync('brain.json', stateJson);
|
|
||||||
}
|
|
||||||
|
|
||||||
load();
|
|
||||||
|
|
||||||
exports.upvote = (req, res) => {
|
|
||||||
console.log('>upvote req', req.params);
|
|
||||||
|
|
||||||
if(!req.params.id)
|
|
||||||
return res.status(500).send({
|
|
||||||
'message': 'Job id missing'
|
|
||||||
});
|
|
||||||
|
|
||||||
const id = req.params.id;
|
|
||||||
|
|
||||||
Jobs.findById(id).then(async (doc) => {
|
|
||||||
if (doc) {
|
|
||||||
const words = doc._doc.data.autoclass.words.join(',');
|
|
||||||
|
|
||||||
await classifier.learn(words, 'good');
|
|
||||||
|
|
||||||
save();
|
|
||||||
res.status(200).end();
|
|
||||||
}
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.downvote = (req, res) => {
|
|
||||||
console.log('>upvote req', req.params);
|
|
||||||
|
|
||||||
if(!req.params.id)
|
|
||||||
return res.status(500).send({
|
|
||||||
'message': 'Job id missing'
|
|
||||||
});
|
|
||||||
|
|
||||||
const id = req.params.id;
|
|
||||||
|
|
||||||
Jobs.findById(id).then(async (doc) => {
|
|
||||||
if (doc) {
|
|
||||||
const words = doc._doc.data.autoclass.words.join(',');
|
|
||||||
|
|
||||||
await classifier.learn(words, 'bad');
|
|
||||||
|
|
||||||
save();
|
|
||||||
res.status(200).end();
|
|
||||||
}
|
|
||||||
}).catch((err) => {
|
|
||||||
console.error(err.message);
|
|
||||||
res.status(500).send({
|
|
||||||
'message': err.message || 'Some error occurred while querying the database.'
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
47
server/dist/3rdpartylicenses.txt
vendored
47
server/dist/3rdpartylicenses.txt
vendored
@ -1,47 +0,0 @@
|
|||||||
css-loader
|
|
||||||
MIT
|
|
||||||
Copyright JS Foundation and other contributors
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
'Software'), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be
|
|
||||||
included in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
|
|
||||||
zone.js
|
|
||||||
MIT
|
|
||||||
The MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2010-2020 Google LLC. http://angular.io/license
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in
|
|
||||||
all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
THE SOFTWARE.
|
|
6
server/dist/build/bundle.css
vendored
6
server/dist/build/bundle.css
vendored
File diff suppressed because one or more lines are too long
8
server/dist/build/bundle.css.map
vendored
8
server/dist/build/bundle.css.map
vendored
File diff suppressed because one or more lines are too long
2
server/dist/build/bundle.js
vendored
2
server/dist/build/bundle.js
vendored
File diff suppressed because one or more lines are too long
2
server/dist/build/bundle.js.map
vendored
2
server/dist/build/bundle.js.map
vendored
File diff suppressed because one or more lines are too long
1
server/dist/main.6053daaf70df7cc81398.js
vendored
1
server/dist/main.6053daaf70df7cc81398.js
vendored
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
server/dist/runtime.acf0dec4155e77772545.js
vendored
1
server/dist/runtime.acf0dec4155e77772545.js
vendored
@ -1 +0,0 @@
|
|||||||
!function(e){function r(r){for(var n,l,f=r[0],i=r[1],p=r[2],c=0,s=[];c<f.length;c++)l=f[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(a&&a(r);s.length;)s.shift()();return u.push.apply(u,p||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,f=1;f<t.length;f++)0!==o[t[f]]&&(n=!1);n&&(u.splice(r--,1),e=l(l.s=t[0]))}return e}var n={},o={0:0},u=[];function l(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,l),t.l=!0,t.exports}l.m=e,l.c=n,l.d=function(e,r,t){l.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,r){if(1&r&&(e=l(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(l.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)l.d(t,n,(function(r){return e[r]}).bind(null,n));return t},l.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(r,"a",r),r},l.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},l.p="";var f=window.webpackJsonp=window.webpackJsonp||[],i=f.push.bind(f);f.push=r,f=f.slice();for(var p=0;p<f.length;p++)r(f[p]);var a=i;t()}([]);
|
|
1
server/dist/styles.7cc34b60cd61c4ed50cc.css
vendored
1
server/dist/styles.7cc34b60cd61c4ed50cc.css
vendored
File diff suppressed because one or more lines are too long
@ -1,24 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 10/09/2020
|
|
||||||
* Time: 16:06
|
|
||||||
|
|
||||||
*/
|
|
||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 25/05/2020
|
|
||||||
* Time: 13:36
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const apply = require('../controllers/apply.v2.controller');
|
|
||||||
|
|
||||||
module.exports = (app) => {
|
|
||||||
app.route('/v2/apply/:id')
|
|
||||||
.put(apply.markApplied);
|
|
||||||
|
|
||||||
app.route('/v2/readall')
|
|
||||||
.put(apply.markAllRead);
|
|
||||||
};
|
|
@ -1,17 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 24/07/2020
|
|
||||||
* Time: 11:42
|
|
||||||
|
|
||||||
*/
|
|
||||||
const jobs = require('../controllers/jobs.v2.controller');
|
|
||||||
|
|
||||||
module.exports = (app) => {
|
|
||||||
app.route('/v2/jobs')
|
|
||||||
.get(jobs.getList);
|
|
||||||
|
|
||||||
app.route('/v2/jobs/:id')
|
|
||||||
.get(jobs.getJob)
|
|
||||||
.put(jobs.readJob);
|
|
||||||
};
|
|
@ -1,17 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 28/07/2020
|
|
||||||
* Time: 11:07
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const vote = require('../controllers/vote.controller');
|
|
||||||
|
|
||||||
module.exports = (app) => {
|
|
||||||
app.route('/vote/up/:id')
|
|
||||||
.put(vote.upvote);
|
|
||||||
|
|
||||||
app.route('/vote/down/:id')
|
|
||||||
.put(vote.downvote);
|
|
||||||
};
|
|
@ -58,9 +58,7 @@ app.use(bodyParser.json());
|
|||||||
app.post('/auth', auth.auth);
|
app.post('/auth', auth.auth);
|
||||||
|
|
||||||
require('./routes/jobs.route')(app);
|
require('./routes/jobs.route')(app);
|
||||||
require('./routes/jobs.v2.route')(app);
|
require('./routes/apply.route')(app);
|
||||||
require('./routes/apply.v2.route')(app);
|
|
||||||
require('./routes/vote.route')(app);
|
|
||||||
|
|
||||||
app.listen(serverPort, () => {
|
app.listen(serverPort, () => {
|
||||||
console.log(`Server is listening on port ${serverPort}`);
|
console.log(`Server is listening on port ${serverPort}`);
|
||||||
|
File diff suppressed because one or more lines are too long
@ -20,7 +20,7 @@ const indeedScraper = new IndeedScraper();
|
|||||||
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
|
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
|
||||||
const page = fs.readFileSync('data/indeed/page2.html');
|
const page = fs.readFileSync('data/indeed/page2.html');
|
||||||
|
|
||||||
test.skip('Test Indeed scraper', async t => {
|
test.test('Test Indeed scraper', async t => {
|
||||||
const $ = cheerio.load(page);
|
const $ = cheerio.load(page);
|
||||||
|
|
||||||
indeedScraper.loadPage($);
|
indeedScraper.loadPage($);
|
||||||
@ -35,36 +35,13 @@ test.skip('Test Indeed scraper', async t => {
|
|||||||
|
|
||||||
await indeedScraper.filterAdverts();
|
await indeedScraper.filterAdverts();
|
||||||
|
|
||||||
await indeedScraper.addToMongo();
|
// await indeedScraper.addToDB();
|
||||||
|
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
|
|
||||||
test.skip('Test full run Indeed scraper', async t => {
|
test.test('Test full run Indeed scraper', async t => {
|
||||||
await indeedScraper.go('london').catch((err) => {
|
await indeedScraper.go('london');
|
||||||
console.error('Indeed GO', err);
|
|
||||||
});
|
|
||||||
|
|
||||||
t.end();
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
test.test('Test Indeed scraper -- MONGO', async t => {
|
|
||||||
const $ = cheerio.load(page);
|
|
||||||
|
|
||||||
indeedScraper.loadPage($);
|
|
||||||
|
|
||||||
await indeedScraper.breakPage();
|
|
||||||
|
|
||||||
// await indeedScraper.getJobPages();
|
|
||||||
|
|
||||||
// console.log(await indeedScraper.checkNext());
|
|
||||||
|
|
||||||
// console.log(indeedScraper.items);
|
|
||||||
|
|
||||||
// await indeedScraper.filterAdverts();
|
|
||||||
|
|
||||||
await indeedScraper.addToMongo();
|
|
||||||
|
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
|
@ -26,14 +26,13 @@ const s1jobsScraper = new RssS1Jobs();
|
|||||||
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
|
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
|
||||||
|
|
||||||
test.test('Test Jobserve scraper', async t => {
|
test.test('Test Jobserve scraper', async t => {
|
||||||
let url = 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml';
|
|
||||||
await s1jobsScraper.setStartUrl(url);
|
await s1jobsScraper.setStartUrl(url);
|
||||||
|
|
||||||
|
|
||||||
s1jobsScraper.reduceItems();
|
s1jobsScraper.reduceItems();
|
||||||
|
|
||||||
await s1jobsScraper.filterAdverts();
|
await s1jobsScraper.filterAdverts();
|
||||||
// await s1jobsScraper.addToDB();
|
await s1jobsScraper.addToDB();
|
||||||
|
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
|
@ -19,17 +19,17 @@ const testScraper = new RssTechnojobs();
|
|||||||
const feed = fs.readFileSync('test/data/technojobs/page1');
|
const feed = fs.readFileSync('test/data/technojobs/page1');
|
||||||
|
|
||||||
test.test('Test Technojobs scraper', async t => {
|
test.test('Test Technojobs scraper', async t => {
|
||||||
await testScraper.loadFeed('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
// await testScraper.loadFeed(feed);
|
||||||
|
|
||||||
await testScraper.reduceItems();
|
// testScraper.reduceItems();
|
||||||
|
|
||||||
await s1jobsScraper.filterAdverts();
|
// await s1jobsScraper.filterAdverts();
|
||||||
// await s1jobsScraper.addToDB();
|
// await s1jobsScraper.addToDB();
|
||||||
|
|
||||||
/* await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||||
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||||
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||||
*/
|
|
||||||
|
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
|
@ -31,7 +31,7 @@ test.test('Test Totaljobs scraper', async t => {
|
|||||||
await totaljobsScraper.getJobPages();
|
await totaljobsScraper.getJobPages();
|
||||||
// console.log(await indeedScraper.checkNext());
|
// console.log(await indeedScraper.checkNext());
|
||||||
|
|
||||||
// console.log(totaljobsScraper.items);
|
console.log(totaljobsScraper.items);
|
||||||
|
|
||||||
await totaljobsScraper.filterAdverts();
|
await totaljobsScraper.filterAdverts();
|
||||||
|
|
||||||
|
14
test/wip.js
14
test/wip.js
@ -1,14 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 23/07/2020
|
|
||||||
* Time: 09:26
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const { Corpus } = require('../lib/corpus');
|
|
||||||
|
|
||||||
const text = 'ESTAMP DEVELOPER 6 month contract £450-525 / day Developer, SQL, Photoshop, Javascript, … NET, C#, Javascript Advanced knowledge of SQL Server TSQL Experience of the design and … PDF stamp development E-STAMP DEVELOPER 6 month contract';
|
|
||||||
const out = Corpus.process(text);
|
|
||||||
|
|
||||||
console.log(out);
|
|
@ -1,71 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 16/04/2020
|
|
||||||
* Time: 23:35
|
|
||||||
|
|
||||||
*/
|
|
||||||
const CronJob = require('cron').CronJob;
|
|
||||||
const IndeedScraper = require('./scrapers/indeed');
|
|
||||||
const TotaljobsScraper = require('./scrapers/totaljobs');
|
|
||||||
const CwjobsScraper = require('./scrapers/cwjobs');
|
|
||||||
const JobserveScraper = require('./scrapers/rss.jobserve');
|
|
||||||
const RssS1Jobs = require('./scrapers/rss.s1jobs');
|
|
||||||
const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|
||||||
|
|
||||||
(async function () {
|
|
||||||
console.log('Started..');
|
|
||||||
const indeedScraper = new IndeedScraper();
|
|
||||||
const totaljobsScraper = new TotaljobsScraper();
|
|
||||||
const cwjobsScraper = new CwjobsScraper();
|
|
||||||
const jobserveScraper = new JobserveScraper();
|
|
||||||
const s1jobsScraper = new RssS1Jobs();
|
|
||||||
const technojobsScraper = new RssTechnojobs();
|
|
||||||
|
|
||||||
await indeedScraper.go('london');
|
|
||||||
|
|
||||||
|
|
||||||
await totaljobsScraper.go('london');
|
|
||||||
await cwjobsScraper.go('london');
|
|
||||||
await indeedScraper.go('glasgow');
|
|
||||||
await totaljobsScraper.go('glasgow');
|
|
||||||
await cwjobsScraper.go('glasgow');
|
|
||||||
await indeedScraper.go('edinburgh');
|
|
||||||
await totaljobsScraper.go('edinburgh');
|
|
||||||
await cwjobsScraper.go('edinburgh');
|
|
||||||
await indeedScraper.go('milton keynes');
|
|
||||||
await totaljobsScraper.go('milton keynes');
|
|
||||||
await cwjobsScraper.go('milton keynes');
|
|
||||||
/*
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
|
||||||
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
|
||||||
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
})();
|
|
File diff suppressed because one or more lines are too long
22
words.js
22
words.js
@ -1,22 +0,0 @@
|
|||||||
/**
|
|
||||||
* Created by WebStorm.
|
|
||||||
* User: martin
|
|
||||||
* Date: 27/07/2020
|
|
||||||
* Time: 10:08
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
const jsonfile = require('jsonfile');
|
|
||||||
|
|
||||||
const data = require('./unused.json');
|
|
||||||
|
|
||||||
function show(size) {
|
|
||||||
const f = data.filter((v) => {
|
|
||||||
return (v.length === size);
|
|
||||||
});
|
|
||||||
|
|
||||||
jsonfile.writeFileSync('limited.json', [...new Set(f)]);
|
|
||||||
console.log('done');
|
|
||||||
}
|
|
||||||
|
|
||||||
show(11);
|
|
Loading…
Reference in New Issue
Block a user