Compare commits
No commits in common. "development" and "svelte-updates" have entirely different histories.
developmen
...
svelte-upd
@ -1,32 +0,0 @@
|
||||
; http://editorconfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[*.txt]
|
||||
insert_final_newline = false
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
|
||||
[*.m]
|
||||
indent_size = 4
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
indent_size = 8
|
||||
|
||||
[*.{js,json}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
@ -9,7 +9,7 @@
|
||||
"env": {
|
||||
"browser": true,
|
||||
"node": true,
|
||||
"es2017": true
|
||||
"es6": true
|
||||
},
|
||||
"rules": {
|
||||
"arrow-spacing": "error",
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -147,4 +147,3 @@ fabric.properties
|
||||
/live/
|
||||
!/output/
|
||||
/db/jobs.db
|
||||
!/db/
|
||||
|
File diff suppressed because one or more lines are too long
204
brain.json
204
brain.json
@ -1,204 +0,0 @@
|
||||
{
|
||||
"categories": {
|
||||
"good": true,
|
||||
"bad": true
|
||||
},
|
||||
"docCount": {
|
||||
"good": 43,
|
||||
"bad": 5
|
||||
},
|
||||
"totalDocuments": 48,
|
||||
"vocabulary": {
|
||||
"tsql": true,
|
||||
"developer": true,
|
||||
"contract": true,
|
||||
"web": true,
|
||||
"javascript": true,
|
||||
"js": true,
|
||||
"node": true,
|
||||
"es": true,
|
||||
"agile": true,
|
||||
"nodejs": true,
|
||||
"london": true,
|
||||
"aws": true,
|
||||
"sql": true,
|
||||
"postgresql": true,
|
||||
"mysql": true,
|
||||
"docker": true,
|
||||
"ecs": true,
|
||||
"automation": true,
|
||||
"jslint": true,
|
||||
"jshint": true,
|
||||
"vuejs": true,
|
||||
"vue": true,
|
||||
"nginx": true,
|
||||
"remotely": true,
|
||||
"mvc": true,
|
||||
"remote": true,
|
||||
"iot": true,
|
||||
"mqtt": true,
|
||||
"es6": true,
|
||||
"es2016": true,
|
||||
"es2017": true,
|
||||
"es2018": true,
|
||||
"react": true,
|
||||
"redux": true,
|
||||
"graphql": true,
|
||||
"java": true,
|
||||
"reactjs": true,
|
||||
"apps": true,
|
||||
"html": true,
|
||||
"css": true,
|
||||
"code": true,
|
||||
"angular": true,
|
||||
"ember": true,
|
||||
"restful": true,
|
||||
"apis": true,
|
||||
"infrastructure": true,
|
||||
"software": true,
|
||||
"native": true,
|
||||
"med": true,
|
||||
"mobile": true,
|
||||
"client": true,
|
||||
"applications": true,
|
||||
"digital": true,
|
||||
"analytics": true,
|
||||
"dashboarding": true,
|
||||
"online": true,
|
||||
"analyse": true,
|
||||
"dashboards": true,
|
||||
"google": true,
|
||||
"query": true,
|
||||
"data": true,
|
||||
"stakeholders": true,
|
||||
"enhancements": true,
|
||||
"requirements": true,
|
||||
"c": true,
|
||||
"net": true,
|
||||
"technologies": true,
|
||||
"azure": true,
|
||||
"understanding": true,
|
||||
"devops": true,
|
||||
"tools": true,
|
||||
"frameworks": true,
|
||||
"scotland": true,
|
||||
"responsibility": true,
|
||||
"programme": true,
|
||||
"functions": true,
|
||||
"asp": true,
|
||||
"project": true,
|
||||
"transform": true,
|
||||
"collaborative": true,
|
||||
"technical": true,
|
||||
"framework": true,
|
||||
"nhibernate": true,
|
||||
"server": true,
|
||||
"api": true,
|
||||
"development": true,
|
||||
"lifecycle": true,
|
||||
"specification": true,
|
||||
"appointments": true
|
||||
},
|
||||
"vocabularySize": 89,
|
||||
"wordCount": {
|
||||
"good": 157,
|
||||
"bad": 5
|
||||
},
|
||||
"wordFrequencyCount": {
|
||||
"good": {
|
||||
"tsql": 1,
|
||||
"developer": 6,
|
||||
"contract": 9,
|
||||
"web": 6,
|
||||
"javascript": 7,
|
||||
"js": 3,
|
||||
"node": 2,
|
||||
"es": 1,
|
||||
"agile": 2,
|
||||
"nodejs": 1,
|
||||
"london": 3,
|
||||
"aws": 3,
|
||||
"sql": 3,
|
||||
"postgresql": 1,
|
||||
"mysql": 1,
|
||||
"docker": 1,
|
||||
"ecs": 1,
|
||||
"automation": 1,
|
||||
"jslint": 1,
|
||||
"jshint": 1,
|
||||
"vuejs": 1,
|
||||
"vue": 2,
|
||||
"nginx": 1,
|
||||
"remotely": 1,
|
||||
"mvc": 5,
|
||||
"remote": 2,
|
||||
"iot": 1,
|
||||
"mqtt": 1,
|
||||
"es6": 1,
|
||||
"es2016": 1,
|
||||
"es2017": 1,
|
||||
"es2018": 1,
|
||||
"apps": 1,
|
||||
"html": 5,
|
||||
"css": 5,
|
||||
"code": 2,
|
||||
"react": 2,
|
||||
"angular": 1,
|
||||
"ember": 1,
|
||||
"restful": 1,
|
||||
"apis": 1,
|
||||
"infrastructure": 1,
|
||||
"software": 2,
|
||||
"native": 1,
|
||||
"med": 1,
|
||||
"mobile": 1,
|
||||
"client": 4,
|
||||
"applications": 2,
|
||||
"digital": 2,
|
||||
"analytics": 1,
|
||||
"dashboarding": 1,
|
||||
"online": 1,
|
||||
"analyse": 1,
|
||||
"dashboards": 1,
|
||||
"google": 1,
|
||||
"query": 1,
|
||||
"data": 1,
|
||||
"stakeholders": 1,
|
||||
"enhancements": 3,
|
||||
"requirements": 3,
|
||||
"c": 4,
|
||||
"net": 5,
|
||||
"technologies": 4,
|
||||
"azure": 2,
|
||||
"understanding": 1,
|
||||
"devops": 2,
|
||||
"tools": 1,
|
||||
"frameworks": 1,
|
||||
"scotland": 1,
|
||||
"responsibility": 1,
|
||||
"programme": 1,
|
||||
"functions": 1,
|
||||
"asp": 1,
|
||||
"project": 1,
|
||||
"transform": 1,
|
||||
"collaborative": 1,
|
||||
"technical": 1,
|
||||
"framework": 1,
|
||||
"nhibernate": 1,
|
||||
"server": 1,
|
||||
"api": 1,
|
||||
"development": 1,
|
||||
"lifecycle": 1,
|
||||
"specification": 1,
|
||||
"appointments": 1
|
||||
},
|
||||
"bad": {
|
||||
"react": 1,
|
||||
"redux": 1,
|
||||
"graphql": 1,
|
||||
"java": 1,
|
||||
"reactjs": 1
|
||||
}
|
||||
},
|
||||
"options": {}
|
||||
}
|
BIN
db/jobs.db
BIN
db/jobs.db
Binary file not shown.
@ -38,7 +38,6 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
}, null, true);
|
||||
|
||||
new CronJob('0 6-23/1 * * *', async function() {
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
||||
@ -55,13 +54,13 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
||||
|
||||
/* await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
||||
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
|
84
lib/base.js
84
lib/base.js
@ -8,12 +8,6 @@
|
||||
const filterReject = require('../lib/filter_reject');
|
||||
const filterAccept = require('../lib/filter_md_jobs');
|
||||
const dbmanager = require('../lib/dbmanager');
|
||||
const JobsModel = require('../lib/mongoManager');
|
||||
|
||||
const SHA = require('crypto-js/sha256');
|
||||
|
||||
const { Utils } = require('@rakh/utils');
|
||||
const { Corpus } = require('./corpus');
|
||||
|
||||
class MasterBase {
|
||||
|
||||
@ -63,79 +57,6 @@ class MasterBase {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
addToMongo() {
|
||||
console.log('>> ADD TO MONGO!');
|
||||
|
||||
for(const item of this.items) {
|
||||
// console.log('add', item);
|
||||
const newObj = this.reduceData(item);
|
||||
const newJob = new JobsModel(newObj);
|
||||
|
||||
newJob.save().then((m) => {
|
||||
console.log('m', m.details.title);
|
||||
}).catch((err) => {
|
||||
console.error('m', err);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param inval
|
||||
* @returns {number}
|
||||
*/
|
||||
analyseRate(inval) {
|
||||
console.log('analyseRate', inval);
|
||||
let outVal = 0;
|
||||
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
||||
const clearSpace = /\s+/g;
|
||||
|
||||
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
||||
const resultArray = result.trim().split((' '));
|
||||
|
||||
if (resultArray.length > 0) {
|
||||
const item = parseInt(resultArray[0], 10);
|
||||
|
||||
if (item < 100) outVal = 0;
|
||||
else if ((item > 100) && (item < 5000)) outVal = 1;
|
||||
else if (item >= 5000) outVal = 2;
|
||||
}
|
||||
else return 0;
|
||||
|
||||
return outVal;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param d
|
||||
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
|
||||
*/
|
||||
reduceData(d) {
|
||||
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
||||
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
||||
|
||||
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
||||
|
||||
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
||||
|
||||
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
||||
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
||||
outObj.details.hashed = SHA(outObj.details.summary);
|
||||
|
||||
outObj.data.read = 0;
|
||||
outObj.data.applied = d.applied || 0;
|
||||
|
||||
outObj.data.jobtype = this.analyseRate(d.salary);
|
||||
outObj.data.autoclass = Corpus.process(d.summary);
|
||||
|
||||
outObj.data.timestamp = d.timestamp * 1000;
|
||||
|
||||
return outObj;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
@ -199,15 +120,10 @@ class MasterBase {
|
||||
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async go() {
|
||||
this.items = [];
|
||||
this.rawItems = [];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module.exports = MasterBase;
|
||||
|
@ -1,91 +0,0 @@
|
||||
const jsonfile = require('jsonfile');
|
||||
|
||||
const words = require('../lib/wordlist.json');
|
||||
const wordsAdditional = require('../lib/wordlistAdditional.json');
|
||||
|
||||
const bigList = new Map([]);
|
||||
|
||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es',
|
||||
'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs',
|
||||
'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote',
|
||||
'iot', 'mqtt'];
|
||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
||||
let unrated = [];
|
||||
|
||||
var _global = typeof global === 'undefined' ? window : global;
|
||||
var Corpus = (_global.Corpus = _global.Corpus || {});
|
||||
|
||||
const emailRegex = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
|
||||
const detagRegex = /(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)/gi;
|
||||
const desymbolNumberRegex = /[\n\t+$,\?\.\%\*=&:;()\\/\-£…"]|\d+/gi;
|
||||
const deSpace = /\s+/g;
|
||||
|
||||
function cleanText(intext) {
|
||||
if (arguments.length === 0 || typeof intext === 'undefined' || intext === null ) return '';
|
||||
|
||||
return intext.replace(emailRegex, ' ').replace(detagRegex, ' ').replace(desymbolNumberRegex, ' ').replace(deSpace, ' ').trim().toLowerCase();
|
||||
}
|
||||
|
||||
function dedupe(intext) {
|
||||
if (arguments.length === 0 || intext === null ) return [];
|
||||
|
||||
return [...new Set(intext)];
|
||||
}
|
||||
|
||||
function incItem(item) {
|
||||
if (bigList.has(item))
|
||||
bigList.set(item, bigList.get(item) + 1);
|
||||
|
||||
else
|
||||
bigList.set(item, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the body
|
||||
* @param intext
|
||||
* @returns {{score: number, bad: *, good: *}}
|
||||
*/
|
||||
Corpus.process = function(intext) {
|
||||
const workText = cleanText(intext);
|
||||
|
||||
const workArray = workText.split(' ');
|
||||
|
||||
const cleanedArray = dedupe(workArray).filter((v) => {
|
||||
return (words.indexOf(v) === -1 && wordsAdditional.indexOf(v) === -1);
|
||||
});
|
||||
|
||||
const good = cleanedArray.filter((v) => {
|
||||
return (goodWords.indexOf(v) !== -1);
|
||||
});
|
||||
|
||||
const bad = cleanedArray.filter((v) => {
|
||||
return (badWords.indexOf(v) !== -1);
|
||||
});
|
||||
|
||||
const unused = cleanedArray.filter((v) => {
|
||||
return ((badWords.indexOf(v) === -1) && (goodWords.indexOf(v) === -1));
|
||||
});
|
||||
|
||||
cleanedArray.map((item) => {
|
||||
incItem(item);
|
||||
});
|
||||
|
||||
unrated = [...unrated, ...unused];
|
||||
|
||||
const score = good.length - (bad.length * 5);
|
||||
|
||||
// console.log('unused', unused);
|
||||
|
||||
return { good, bad, score, 'words':cleanedArray };
|
||||
};
|
||||
|
||||
Corpus.exportUnused = function() {
|
||||
jsonfile.writeFileSync('./unused.json', dedupe(unrated));
|
||||
jsonfile.writeFileSync('./biglist.json', [...bigList].sort((a, b) => b[1] - a[1]));
|
||||
console.log([...bigList]);
|
||||
};
|
||||
|
||||
if (typeof module !== 'undefined')
|
||||
module.exports = {
|
||||
'Corpus': Corpus
|
||||
};
|
@ -1,34 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 22/07/2020
|
||||
* Time: 17:00
|
||||
|
||||
*/
|
||||
|
||||
const mongoose = require('mongoose');
|
||||
const log4js = require('log4js');
|
||||
const logger = log4js.getLogger();
|
||||
|
||||
const JobsModel = require('../models/jobs');
|
||||
|
||||
// const { Utils } = require('@rakh/utils');
|
||||
|
||||
require('dotenv').config();
|
||||
|
||||
logger.level = 'debug';
|
||||
|
||||
const mongoConnect = process.env.MONGOCONNECT;
|
||||
|
||||
// logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
||||
|
||||
// mongoose.connect('mongodb://martin:1V3D4m526i@127.0.0.1/jobs');
|
||||
|
||||
logger.debug(mongoConnect);
|
||||
|
||||
mongoose.connect(mongoConnect);
|
||||
|
||||
const mDB = mongoose.connection;
|
||||
mDB.on('error', console.error.bind(console, 'connection error:'));
|
||||
|
||||
module.exports = JobsModel;
|
@ -89,7 +89,6 @@ class MasterRSS extends MasterBase {
|
||||
await this.filterAdverts();
|
||||
|
||||
if (this.items.length > 0) await this.addToDB();
|
||||
if (this.items.length > 0) await this.addToMongo();
|
||||
}
|
||||
else
|
||||
console.log('No items to process');
|
||||
|
@ -20,15 +20,10 @@ class MasterScraper extends MasterBase {
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param url
|
||||
* @param useStone
|
||||
* @returns {Promise<unknown>}
|
||||
*/
|
||||
|
||||
getContent(url, useStone = false) {
|
||||
|
||||
|
||||
/*
|
||||
let headers = new Headers({
|
||||
"Accept" : "application/json",
|
||||
@ -59,28 +54,19 @@ fetch(url, {
|
||||
resolve(response.body);
|
||||
})
|
||||
.catch((e) => {
|
||||
console.error('getContent', e );
|
||||
reject(e.response.body);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
async savePage(html) {
|
||||
const now = fecha.format(new Date(), 'YYYY-MM-DD--hh');
|
||||
|
||||
const filename = `pages/${this.siteid}-${now}.html`;
|
||||
|
||||
fs.writeFileSync(filename, html);
|
||||
}
|
||||
|
||||
|
||||
async getPage() {
|
||||
console.log('>> getPage: fetching', this.url);
|
||||
const now = fecha.format(new Date(), 'YYYY-MM-DD--hhmmss');
|
||||
const filename = `${this.siteid}-${now}.html`;
|
||||
|
||||
await this.getContent(this.url, this.useStone)
|
||||
.then((html) => {
|
||||
// console.log('>> getPage:: got', html);
|
||||
console.log('>> getPage:: OK');
|
||||
if (this.saveFile) this.savePage(html);
|
||||
fs.writeFileSync(filename, html);
|
||||
const $ = cheerio.load(html);
|
||||
this.loadPage($);
|
||||
})
|
||||
@ -89,59 +75,30 @@ fetch(url, {
|
||||
|
||||
// Site specific parts below here
|
||||
|
||||
/**
|
||||
* Break each page into items
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async breakPage() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param part
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async extractDetails(part) {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async checkNext() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async processSite() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async getIndividualPage() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async getJobPages() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async go() {
|
||||
|
||||
}
|
||||
|
1007
lib/wordlist.json
1007
lib/wordlist.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
559
limited.json
559
limited.json
@ -1,559 +0,0 @@
|
||||
[
|
||||
"experienced",
|
||||
"exceptional",
|
||||
"maintaining",
|
||||
"familiarity",
|
||||
"commodities",
|
||||
"opportunity",
|
||||
"possibility",
|
||||
"integration",
|
||||
"engineering",
|
||||
"derivatives",
|
||||
"prefferable",
|
||||
"nutritional",
|
||||
"performance",
|
||||
"immediately",
|
||||
"information",
|
||||
"responsible",
|
||||
"environment",
|
||||
"stakeholder",
|
||||
"proactively",
|
||||
"requirement",
|
||||
"temporarily",
|
||||
"interrogate",
|
||||
"effectively",
|
||||
"progressing",
|
||||
"substantial",
|
||||
"identifying",
|
||||
"maintenance",
|
||||
"workarounds",
|
||||
"departments",
|
||||
"consultancy",
|
||||
"regulations",
|
||||
"statistical",
|
||||
"previously·",
|
||||
"euromonitor",
|
||||
"documenting",
|
||||
"bookkeeping",
|
||||
"reconciling",
|
||||
"hardworking",
|
||||
"themselves!",
|
||||
"appropriate",
|
||||
"socialising",
|
||||
"fundraising",
|
||||
"initiatives",
|
||||
"sponsorship",
|
||||
"orientation",
|
||||
"competitive",
|
||||
"illustrator",
|
||||
"outstanding",
|
||||
"interaction",
|
||||
"consistency",
|
||||
"touchpoints",
|
||||
"freshtechit",
|
||||
"recruitment",
|
||||
"catastrophe",
|
||||
"accountable",
|
||||
"workstreams",
|
||||
"scalability",
|
||||
"undertaking",
|
||||
"interacting",
|
||||
"significant",
|
||||
"considering",
|
||||
"independent",
|
||||
"collaborate",
|
||||
"arrangement",
|
||||
"unsolicited",
|
||||
"empowerment",
|
||||
"connections",
|
||||
"specialists",
|
||||
"credentials",
|
||||
"personality",
|
||||
"established",
|
||||
"northampton",
|
||||
"advertising",
|
||||
"operational",
|
||||
"mathematics",
|
||||
"contractors",
|
||||
"instruments",
|
||||
"referencing",
|
||||
"locationsco",
|
||||
"disciplines",
|
||||
"corporation",
|
||||
"investments",
|
||||
"conferences",
|
||||
"demonstrate",
|
||||
"directorate",
|
||||
"acknowledge",
|
||||
"legislation",
|
||||
"designgreat",
|
||||
"understands",
|
||||
"perspective",
|
||||
"association",
|
||||
"enforcement",
|
||||
"prestigious",
|
||||
"individuals",
|
||||
"alternative",
|
||||
"technically",
|
||||
"challenging",
|
||||
"discussions",
|
||||
"lifeworking",
|
||||
"interactive",
|
||||
"storyboards",
|
||||
"communicate",
|
||||
"abilitywork",
|
||||
"englishgood",
|
||||
"detailbonus",
|
||||
"angularwhat",
|
||||
"neededabout",
|
||||
"innovations",
|
||||
"enthusiasts",
|
||||
"instructors",
|
||||
"prospective",
|
||||
"comfortable",
|
||||
"involvement",
|
||||
"adventurous",
|
||||
"marketplace",
|
||||
"forecasting",
|
||||
"contractual",
|
||||
"underpinned",
|
||||
"acquisition",
|
||||
"microsoft’s",
|
||||
"progression",
|
||||
"suggestions",
|
||||
"proficiency",
|
||||
"participate",
|
||||
"joblocation",
|
||||
"methodology",
|
||||
"continually",
|
||||
"cataloguing",
|
||||
"projectgood",
|
||||
"incremental",
|
||||
"overarching",
|
||||
"confidently",
|
||||
"circulatory",
|
||||
"adjustments",
|
||||
"interesting",
|
||||
"consultants",
|
||||
"experienceb",
|
||||
"hourscasual",
|
||||
"switzerland",
|
||||
"contributes",
|
||||
"participant",
|
||||
"improvement",
|
||||
"articulates",
|
||||
"contributed",
|
||||
"comfortably",
|
||||
"deployments",
|
||||
"integrating",
|
||||
"configuring",
|
||||
"platforming",
|
||||
"educatedday",
|
||||
"contracting",
|
||||
"monthstotal",
|
||||
"outsourcing",
|
||||
"designswork",
|
||||
"ideasdesign",
|
||||
"deviceswork",
|
||||
"fundamental",
|
||||
"businessjob",
|
||||
"implemented",
|
||||
"transaction",
|
||||
"reliability",
|
||||
"upgradesyou",
|
||||
"uncertainty",
|
||||
"enterpriser",
|
||||
"teamprovide",
|
||||
"trafficking",
|
||||
"doubleclick",
|
||||
"communities",
|
||||
"‘forestlink",
|
||||
"dimensional",
|
||||
"coordinator",
|
||||
"spreadsheet",
|
||||
"pressurised",
|
||||
"assignments",
|
||||
"willingness",
|
||||
"certificate",
|
||||
"summaryrole",
|
||||
"institution",
|
||||
"segregation",
|
||||
"preparation",
|
||||
"electronics",
|
||||
"duplication",
|
||||
"surrounding",
|
||||
"informatica",
|
||||
"blackfriars",
|
||||
"terminology",
|
||||
"shabarinath",
|
||||
"interfacing",
|
||||
"expectation",
|
||||
"proprietary",
|
||||
"conflicting",
|
||||
"itecopeople",
|
||||
"opowershell",
|
||||
"submissions",
|
||||
"negotiating",
|
||||
"escalations",
|
||||
"transferred",
|
||||
"protections",
|
||||
"customizing",
|
||||
"oxfordshire",
|
||||
"progressive",
|
||||
"bishopsgate",
|
||||
"partnership",
|
||||
"futureheads",
|
||||
"permissions",
|
||||
"efficiently",
|
||||
"unspecified",
|
||||
"potentially",
|
||||
"disclaimers",
|
||||
"foreseeable",
|
||||
"sustainable",
|
||||
"calculation",
|
||||
"replication",
|
||||
"constitutes",
|
||||
"recommended",
|
||||
"enterprises",
|
||||
"negotiation",
|
||||
"imaginative",
|
||||
"differences",
|
||||
"nationality",
|
||||
"impediments",
|
||||
"refinements",
|
||||
"translating",
|
||||
"obligations",
|
||||
"flexibility",
|
||||
"unashamedly",
|
||||
"exclusively",
|
||||
"replacement",
|
||||
"essentially",
|
||||
"artifactory",
|
||||
"theoretical",
|
||||
"probability",
|
||||
"integrators",
|
||||
"contractor?",
|
||||
"interested?",
|
||||
"functioning",
|
||||
"chamberlain",
|
||||
"inclusivity",
|
||||
"iteratively",
|
||||
"enhancement",
|
||||
"constraints",
|
||||
"establishes",
|
||||
"qualitative",
|
||||
"influencing",
|
||||
"procurement",
|
||||
"experiences",
|
||||
"furthermore",
|
||||
"disciplined",
|
||||
"unnecessary",
|
||||
"bureaucracy",
|
||||
"represented",
|
||||
"siteimprove",
|
||||
"lokhandwala",
|
||||
"specialises",
|
||||
"rationalize",
|
||||
"competncies",
|
||||
"restoration",
|
||||
"allocations",
|
||||
"admittances",
|
||||
"furnishings",
|
||||
"cleanliness",
|
||||
"residential",
|
||||
"contactable",
|
||||
"conventions",
|
||||
"translation",
|
||||
"approaching",
|
||||
"intecselect",
|
||||
"linguistics",
|
||||
"southampton",
|
||||
"beautifully",
|
||||
"estimations",
|
||||
"newsletters",
|
||||
"summarising",
|
||||
"simulations",
|
||||
"portfolio's",
|
||||
"coronavirus",
|
||||
"opoortunity",
|
||||
"unavailable",
|
||||
"accordingly",
|
||||
"penetration",
|
||||
"remediation",
|
||||
"elimination",
|
||||
"achievement",
|
||||
"facilitator",
|
||||
"westminster",
|
||||
"introducing",
|
||||
"businesses'",
|
||||
"capitalists",
|
||||
"investigate",
|
||||
"countryside",
|
||||
"problematic",
|
||||
"coordinates",
|
||||
"components'",
|
||||
"supervision",
|
||||
"bonavolonta",
|
||||
"proposition",
|
||||
"foundations",
|
||||
"suitability",
|
||||
"researchers",
|
||||
"explanation",
|
||||
"commitments",
|
||||
"computation",
|
||||
"questioning",
|
||||
"experiments",
|
||||
"visualfiles",
|
||||
"cloudstream",
|
||||
"determining",
|
||||
"deliverable",
|
||||
"inquisitive",
|
||||
"backgrounds",
|
||||
"thoughtspot",
|
||||
"specialized",
|
||||
"veloppement",
|
||||
"importantes",
|
||||
"typedscript",
|
||||
"restaurants",
|
||||
"prophylaxis",
|
||||
"transmitted",
|
||||
"appointment",
|
||||
"encouraging",
|
||||
"aggregating",
|
||||
"championing",
|
||||
"conjunction",
|
||||
"customising",
|
||||
"photography",
|
||||
"authorities",
|
||||
"competition",
|
||||
"collections",
|
||||
"contraintes",
|
||||
"fonctionnel",
|
||||
"adaptabilit",
|
||||
"changements",
|
||||
"conceptions",
|
||||
"utilisation",
|
||||
"shortlisted",
|
||||
"reusability",
|
||||
"recognizing",
|
||||
"decisioning",
|
||||
"accommodate",
|
||||
"limitations",
|
||||
"resourceful",
|
||||
"algorithmic",
|
||||
"unconcerned",
|
||||
"intelligent",
|
||||
"considerate",
|
||||
"clientbased",
|
||||
"accelerator",
|
||||
"dreamweaver",
|
||||
"applicant's",
|
||||
"proactivity",
|
||||
"aggregation",
|
||||
"restriction",
|
||||
"traditional",
|
||||
"corporately",
|
||||
"memberships",
|
||||
"standardise",
|
||||
"theecsgroup",
|
||||
"scarchitect",
|
||||
"consolidate",
|
||||
"extensively",
|
||||
"afghanistan",
|
||||
"encompasses",
|
||||
"distinctive",
|
||||
"professions",
|
||||
"interviewed",
|
||||
"formulation",
|
||||
"transitions",
|
||||
"aspirations",
|
||||
"ingredients",
|
||||
"setterfield",
|
||||
"candidate’s",
|
||||
"leatherhead",
|
||||
"publication",
|
||||
"undoubtedly",
|
||||
"basingstoke",
|
||||
"underground",
|
||||
"reinsurance",
|
||||
"exemplifies",
|
||||
"civiization",
|
||||
"developer's",
|
||||
"bazzelgette",
|
||||
"adjacencies",
|
||||
"feasibility",
|
||||
"frontinvest",
|
||||
"neogotiable",
|
||||
"unconnected",
|
||||
"conditional",
|
||||
"bottlenecks",
|
||||
"productions",
|
||||
"pharmacists",
|
||||
"technicians",
|
||||
"prescribing",
|
||||
"stewardship",
|
||||
"recognising",
|
||||
"convictions",
|
||||
"subscribing",
|
||||
"transparent",
|
||||
"wireframing",
|
||||
"insidehmcts",
|
||||
"justicejobs",
|
||||
"criminology",
|
||||
"hospitality",
|
||||
"structuring",
|
||||
"educational",
|
||||
"substantive",
|
||||
"secondments",
|
||||
"transgender",
|
||||
"smartphones",
|
||||
"microsoft's",
|
||||
"definitions",
|
||||
"validations",
|
||||
"prioritised",
|
||||
"autoscaling",
|
||||
"abstraction",
|
||||
"correlation",
|
||||
"recognition",
|
||||
"contributor",
|
||||
"apigedevops",
|
||||
"incorporate",
|
||||
"woocommerce",
|
||||
"informatics",
|
||||
"adfadc@apps",
|
||||
"automations",
|
||||
"formulating",
|
||||
"beneficiary",
|
||||
"referential",
|
||||
"jsdevsecops",
|
||||
"solutioning",
|
||||
"measurement",
|
||||
"familiarise",
|
||||
"eligibility",
|
||||
"standardize",
|
||||
"experience?",
|
||||
"bournemouth",
|
||||
"implementer",
|
||||
"agilesphere",
|
||||
"assumptions",
|
||||
"accountancy",
|
||||
"cockroachdb",
|
||||
"promotional",
|
||||
"facilitates",
|
||||
"discoveries",
|
||||
"bladecenter",
|
||||
"considered!",
|
||||
"cooperation",
|
||||
"exploration",
|
||||
"angulareact",
|
||||
"preferabbly",
|
||||
"harmonising",
|
||||
"convenience",
|
||||
"inclusively",
|
||||
"strategists",
|
||||
"attribution",
|
||||
"fromscratch",
|
||||
"combination",
|
||||
"solutionize",
|
||||
"accelerated",
|
||||
"diagnostics",
|
||||
"sensibility",
|
||||
"informative",
|
||||
"intellegnce",
|
||||
"specilisits",
|
||||
"projections",
|
||||
"associative",
|
||||
"personalize",
|
||||
"farnborough",
|
||||
"necessarily",
|
||||
"nservicebus",
|
||||
"constrained",
|
||||
"prioritized",
|
||||
"behavioural",
|
||||
"chakraborty",
|
||||
"leaderships",
|
||||
"flourishing",
|
||||
"uniqstudios",
|
||||
"simplifying",
|
||||
"realisation",
|
||||
"extensions!",
|
||||
"prioritises",
|
||||
"experience!",
|
||||
"candidates!",
|
||||
"inclination",
|
||||
"stimulating",
|
||||
"appreciated",
|
||||
"reinventing",
|
||||
"compression",
|
||||
"jscybsecdev",
|
||||
"equirements",
|
||||
"generalized",
|
||||
"compressors",
|
||||
"assessments",
|
||||
"beyondtrust",
|
||||
"engagements",
|
||||
"numerically",
|
||||
"electricity",
|
||||
"interchange",
|
||||
"jsswift_dev",
|
||||
"circulating",
|
||||
"attachments",
|
||||
"credibility",
|
||||
"vnetpeering",
|
||||
"territories",
|
||||
"staggering!",
|
||||
"developers!",
|
||||
"peripherals",
|
||||
"virtualized",
|
||||
"bitdefender",
|
||||
"jssitecorjs",
|
||||
"positioning",
|
||||
"appreciates",
|
||||
"chessington",
|
||||
"controllers",
|
||||
"controlling",
|
||||
"quantifying",
|
||||
"virtualised",
|
||||
"manufacture",
|
||||
"fluorescent",
|
||||
"governments",
|
||||
"bigcommerce",
|
||||
"therapeutic",
|
||||
"importantly",
|
||||
"differently",
|
||||
"rigourously",
|
||||
"shareholder",
|
||||
"copywriting",
|
||||
"anticipated",
|
||||
"approximate",
|
||||
"behdarvandi",
|
||||
"testability",
|
||||
"beneficial!",
|
||||
"jswmibmcraw",
|
||||
"exhibitions",
|
||||
"talentpoint",
|
||||
"propagation",
|
||||
"interviews!",
|
||||
"solutionise",
|
||||
"elasticache",
|
||||
"manoeuvring",
|
||||
"teamservice",
|
||||
"geographies",
|
||||
"efficientip",
|
||||
"organically",
|
||||
"advancement",
|
||||
"jshodanular",
|
||||
"wholesalers",
|
||||
"multitenant",
|
||||
"encouraged?",
|
||||
"freelancers",
|
||||
"composition",
|
||||
"#jobswagger",
|
||||
"typographic",
|
||||
"stereotypes",
|
||||
"clerkenwell",
|
||||
"sacrificing",
|
||||
"resolutions",
|
||||
"technology?",
|
||||
"advantagous"
|
||||
]
|
@ -1,22 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 27/07/2020
|
||||
* Time: 15:34
|
||||
|
||||
*/
|
||||
const jsonfile = require('jsonfile');
|
||||
|
||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
|
||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
||||
|
||||
const brain = new Map([]);
|
||||
|
||||
for(let i = 0;i < goodWords.length - 1;i++)
|
||||
brain.set(goodWords[i], 3);
|
||||
|
||||
for(let i = 0;i < badWords.length - 1;i++)
|
||||
brain.set(badWords[i], -5);
|
||||
|
||||
jsonfile.writeFileSync('brain.json', [...brain]);
|
||||
|
156
migrate.js
156
migrate.js
@ -1,156 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 22/07/2020
|
||||
* Time: 10:20
|
||||
|
||||
*/
|
||||
const db = require('./lib/connect');
|
||||
const log4js = require('log4js');
|
||||
const logger = log4js.getLogger();
|
||||
const { Utils } = require('@rakh/utils');
|
||||
|
||||
const { Corpus } = require('./lib/corpus');
|
||||
|
||||
const SHA = require('crypto-js/sha256');
|
||||
|
||||
/*
|
||||
|
||||
2604
|
||||
|
||||
const mongoose = require('mongoose');
|
||||
const log4js = require('log4js');
|
||||
const logger = log4js.getLogger();
|
||||
|
||||
const Jobs = require('./models/jobs');
|
||||
|
||||
require('dotenv').config();
|
||||
|
||||
logger.level = 'debug';
|
||||
|
||||
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
||||
|
||||
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
||||
|
||||
const mDB = mongoose.connection;
|
||||
mDB.on('error', console.error.bind(console, 'connection error:'));
|
||||
*/
|
||||
|
||||
const Jobs = require('./lib/mongoManager');
|
||||
|
||||
const migrate = (function() {
|
||||
function analyseRate(inval) {
|
||||
let outVal = 0;
|
||||
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
||||
const clearSpace = /\s+/g;
|
||||
|
||||
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
||||
const resultArray = result.trim().split((' '));
|
||||
|
||||
if (resultArray.length > 0) {
|
||||
const item = parseInt(resultArray[0], 10);
|
||||
|
||||
if (item < 100) outVal = 0;
|
||||
else if ((item > 100) && (item < 5000)) outVal = 1;
|
||||
else if (item >= 5000) outVal = 2;
|
||||
}
|
||||
else return 0;
|
||||
|
||||
return outVal;
|
||||
}
|
||||
function reduceData(d) {
|
||||
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
||||
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
||||
|
||||
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
||||
|
||||
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
||||
|
||||
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
||||
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
||||
outObj.details.hashed = SHA(outObj.details.summary);
|
||||
|
||||
// outObj.data.read = d.read || 0;
|
||||
outObj.data.read = 0;
|
||||
outObj.data.applied = d.applied || 0;
|
||||
outObj.data.jobtype = analyseRate(d.salary);
|
||||
|
||||
outObj.data.autoclass = Corpus.process(d.summary);
|
||||
|
||||
outObj.data.timestamp = d.timestamp * 1000;
|
||||
|
||||
return outObj;
|
||||
}
|
||||
|
||||
function getCurrent() {
|
||||
const outgoing = [];
|
||||
console.log('get version');
|
||||
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(sql, [], (err, rows) => {
|
||||
if (err)
|
||||
reject(err);
|
||||
|
||||
rows.forEach((row) => {
|
||||
outgoing.push(row);
|
||||
});
|
||||
|
||||
resolve(outgoing) ;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function start() {
|
||||
await getCurrent().then(async (d) => {
|
||||
logger.debug(d.length);
|
||||
|
||||
for (let t = 0;t < (d.length - 1);t++) {
|
||||
const newD = reduceData(d[t]);
|
||||
|
||||
// logger.debug(newD);
|
||||
|
||||
const newJob = Jobs(newD);
|
||||
|
||||
await newJob.save().then((m) => {
|
||||
logger.debug('m', m.details.title);
|
||||
}).catch((err) => {
|
||||
logger.error(err.keyPattern);
|
||||
});
|
||||
}
|
||||
}).then(() => {
|
||||
logger.debug('SAVING!!');
|
||||
Corpus.exportUnused();
|
||||
})
|
||||
.catch((err) => {
|
||||
logger.error(err.keyPattern);
|
||||
});
|
||||
}
|
||||
|
||||
async function deleteOld() {
|
||||
const oneDay = 86400000;
|
||||
const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay);
|
||||
|
||||
logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo);
|
||||
|
||||
logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } });
|
||||
Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => {
|
||||
logger.debug('m', m);
|
||||
}).catch((err) => {
|
||||
logger.error(err);
|
||||
});
|
||||
}
|
||||
|
||||
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
|
||||
|
||||
return {
|
||||
'start':start,
|
||||
'deleteOld': deleteOld
|
||||
};
|
||||
})();
|
||||
|
||||
(async function() {
|
||||
await migrate.start();
|
||||
await migrate.deleteOld();
|
||||
logger.info('Done??');
|
||||
})();
|
@ -1,47 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 22/07/2020
|
||||
* Time: 14:18
|
||||
|
||||
*/
|
||||
const mongoose = require('mongoose');
|
||||
const Schema = mongoose.Schema;
|
||||
|
||||
const jobSchema = new Schema({
|
||||
'details': {
|
||||
'title': { 'type': String, 'required': true },
|
||||
'site': { 'type': String, 'required': true },
|
||||
'url': { 'type': String, 'required': true, 'unique': true },
|
||||
'id': String,
|
||||
'summary': String,
|
||||
'company': String,
|
||||
'location': String,
|
||||
'postdate': String,
|
||||
'salary': String,
|
||||
'easyapply': Number,
|
||||
'timestamp': Number,
|
||||
'hashed' : { 'type': String, 'required':true, 'unique':true }
|
||||
},
|
||||
'data': {
|
||||
'read': { 'type': Number, 'default': 0 },
|
||||
'applied': { 'type': Number, 'default': 0 },
|
||||
'jobtype': { 'type': Number, 'default': 0 },
|
||||
'class': { 'type': Number, 'default': 0 },
|
||||
'autoclass': {
|
||||
'good': Array,
|
||||
'bad': Array,
|
||||
'words': Array,
|
||||
'score': { 'type': Number, 'default': 0 }
|
||||
},
|
||||
'timestamp': { 'type': Number, 'default': 0 },
|
||||
'created_at': { 'type': Date, 'default': Date.now }
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
mongoose.set('useFindAndModify', false);
|
||||
|
||||
const Jobs = mongoose.model('Jobs', jobSchema);
|
||||
|
||||
module.exports = Jobs;
|
66
onetime.js
66
onetime.js
@ -1,66 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 16/04/2020
|
||||
* Time: 23:35
|
||||
|
||||
*/
|
||||
const CronJob = require('cron').CronJob;
|
||||
const IndeedScraper = require('./scrapers/indeed');
|
||||
const TotaljobsScraper = require('./scrapers/totaljobs');
|
||||
const CwjobsScraper = require('./scrapers/cwjobs');
|
||||
const JobserveScraper = require('./scrapers/rss.jobserve');
|
||||
const RssS1Jobs = require('./scrapers/rss.s1jobs');
|
||||
const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
|
||||
(async function () {
|
||||
console.log('Started..');
|
||||
const indeedScraper = new IndeedScraper();
|
||||
const totaljobsScraper = new TotaljobsScraper();
|
||||
const cwjobsScraper = new CwjobsScraper();
|
||||
const jobserveScraper = new JobserveScraper();
|
||||
const s1jobsScraper = new RssS1Jobs();
|
||||
const technojobsScraper = new RssTechnojobs();
|
||||
|
||||
await indeedScraper.go('london');
|
||||
await totaljobsScraper.go('london');
|
||||
await cwjobsScraper.go('london');
|
||||
|
||||
await indeedScraper.go('glasgow');
|
||||
await totaljobsScraper.go('glasgow');
|
||||
await cwjobsScraper.go('glasgow');
|
||||
await indeedScraper.go('edinburgh');
|
||||
await totaljobsScraper.go('edinburgh');
|
||||
await cwjobsScraper.go('edinburgh');
|
||||
await indeedScraper.go('milton keynes');
|
||||
await totaljobsScraper.go('milton keynes');
|
||||
await cwjobsScraper.go('milton keynes');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
||||
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
||||
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
|
||||
})();
|
1296
package-lock.json
generated
1296
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
12
package.json
12
package.json
@ -1,31 +1,23 @@
|
||||
{
|
||||
"name": "jobscraper",
|
||||
"version": "1.0.2",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"release": "vik patch -t",
|
||||
"grabber": "node grabber.js",
|
||||
"server" : "node server/server.js"
|
||||
"grabber": "node grabber.js"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@rakh/utils": "^1.0.0",
|
||||
"axios": "^0.19.2",
|
||||
"bayes": "^1.0.0",
|
||||
"body-parser": "^1.19.0",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"cron": "^1.8.2",
|
||||
"crypto-js": "^4.0.0",
|
||||
"dotenv": "^8.2.0",
|
||||
"eslint": "^6.8.0",
|
||||
"express": "^4.17.1",
|
||||
"fecha": "^4.2.0",
|
||||
"got": "^11.2.0",
|
||||
"jsonfile": "^6.0.1",
|
||||
"log4js": "^6.3.0",
|
||||
"mongoose": "^5.9.25",
|
||||
"present": "^1.0.0",
|
||||
"rss-parser": "^3.8.0",
|
||||
"sqlite3": "^4.1.1",
|
||||
|
45
preload.js
45
preload.js
@ -1,45 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 28/07/2020
|
||||
* Time: 10:51
|
||||
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
var bayes = require('bayes');
|
||||
|
||||
var classifier = bayes({
|
||||
'tokenizer': function (text) {
|
||||
return text.split(',');
|
||||
}
|
||||
});
|
||||
|
||||
// teach it positive phrases
|
||||
|
||||
async function load() {
|
||||
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt', 'es6', 'es2016', 'es2017', 'es2018', 'freelance'];
|
||||
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
|
||||
|
||||
for(let i = 0;i < goodWords.length - 1;i++)
|
||||
await classifier.learn(goodWords[i], 'good');
|
||||
|
||||
for(let i = 0;i < badWords.length - 1;i++)
|
||||
await classifier.learn(badWords[i], 'bad');
|
||||
|
||||
// now ask it to categorize a document it has never seen before
|
||||
|
||||
console.log(await classifier.categorize(['ui', 'developer', 'london', 'react'].join(',')));
|
||||
|
||||
console.log(await classifier.categorize(['mysql', 'react', 'js', 'node', 'docker', 'kubernetes', 'google'].join(',')));
|
||||
|
||||
// serialize the classifier's state as a JSON string.
|
||||
var stateJson = classifier.toJson();
|
||||
|
||||
console.log(stateJson);
|
||||
|
||||
fs.writeFileSync('brain.json', stateJson);
|
||||
}
|
||||
|
||||
load();
|
@ -20,7 +20,7 @@ class CwjobsScraper extends TotaljobsScraper {
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
||||
|
||||
// Glasgow
|
||||
|
@ -11,7 +11,7 @@ const cheerio = require('cheerio');
|
||||
const MasterScraper = require('../lib/scraper');
|
||||
|
||||
class IndeedScraper extends MasterScraper {
|
||||
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.siteurl = 'www.indeed.co.uk';
|
||||
@ -23,15 +23,15 @@ class IndeedScraper extends MasterScraper {
|
||||
|
||||
this.antiAd = /sja\d+/gi;
|
||||
}
|
||||
|
||||
|
||||
// Site specific parts below here
|
||||
|
||||
|
||||
async breakPage() {
|
||||
const $ = this.currentPage;
|
||||
const ads = [];
|
||||
|
||||
|
||||
const sections = $('div.row.result');
|
||||
|
||||
|
||||
await sections.each(async (index, item) => {
|
||||
// console.log($(item).html());
|
||||
const ad = await this.extractDetails(item);
|
||||
@ -73,7 +73,7 @@ class IndeedScraper extends MasterScraper {
|
||||
|
||||
return newObj;
|
||||
}
|
||||
|
||||
|
||||
async getIndividualPage(item) {
|
||||
const newItem = {...item};
|
||||
console.log('Getting', item.url);
|
||||
@ -100,10 +100,10 @@ class IndeedScraper extends MasterScraper {
|
||||
async checkNext() {
|
||||
const $ = this.currentPage;
|
||||
const next = $('.pagination > *:last-child').attr('href') || '';
|
||||
if (next !== '')
|
||||
if (next !== '')
|
||||
// next = `https://${ this.siteurl }${next}`;
|
||||
this.makeUrl(next);
|
||||
|
||||
|
||||
console.log(next);
|
||||
}
|
||||
|
||||
@ -122,9 +122,9 @@ class IndeedScraper extends MasterScraper {
|
||||
await this.checkNext();
|
||||
|
||||
await this.getJobPages();
|
||||
|
||||
|
||||
// nextPage = await this.checkNext();
|
||||
|
||||
|
||||
// if (nextPage === previousPage) nextPage = '';
|
||||
|
||||
// this.setStartUrl(nextPage);
|
||||
@ -133,19 +133,16 @@ class IndeedScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Angular+Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
|
||||
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
|
||||
|
||||
await this.processSite().catch((err) => {
|
||||
console.error('Indeed Go', err);
|
||||
});
|
||||
await this.processSite();
|
||||
|
||||
console.log(`Indeed ${location} completed`);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
module.exports = IndeedScraper;
|
||||
|
@ -140,7 +140,6 @@ class IndeedMobileScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
|
@ -22,10 +22,7 @@ class TotaljobsScraper extends MasterScraper {
|
||||
}
|
||||
|
||||
// Site specific parts below here
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
|
||||
async breakPage() {
|
||||
const $ = this.currentPage;
|
||||
const ads = [];
|
||||
@ -42,11 +39,6 @@ class TotaljobsScraper extends MasterScraper {
|
||||
this.items = [...this.items, ...ads];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param part
|
||||
* @returns {Promise<{}>}
|
||||
*/
|
||||
async extractDetails(part) {
|
||||
const newObj = {};
|
||||
const $part = cheerio.load(part);
|
||||
@ -69,11 +61,6 @@ class TotaljobsScraper extends MasterScraper {
|
||||
return newObj;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param item
|
||||
* @returns {Promise<*>}
|
||||
*/
|
||||
async getIndividualPage(item) {
|
||||
const newItem = {...item};
|
||||
console.log('Getting', item.url);
|
||||
@ -88,10 +75,6 @@ class TotaljobsScraper extends MasterScraper {
|
||||
return newItem;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async getJobPages() {
|
||||
const newItems = [];
|
||||
for (let item of this.items) {
|
||||
@ -103,10 +86,6 @@ class TotaljobsScraper extends MasterScraper {
|
||||
this.items = [...newItems];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async checkNext() {
|
||||
const $ = this.currentPage;
|
||||
const next = $('.pagination > *:last-child').attr('href') || '';
|
||||
@ -117,10 +96,6 @@ class TotaljobsScraper extends MasterScraper {
|
||||
console.log(next);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async processSite() {
|
||||
console.log('Processing...');
|
||||
|
||||
@ -146,16 +121,10 @@ class TotaljobsScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param location
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async go(location = 'london') {
|
||||
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
||||
|
||||
// Glasgow
|
||||
|
@ -1,81 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 10/09/2020
|
||||
* Time: 16:07
|
||||
|
||||
*/
|
||||
const Jobs = require('../../lib/mongoManager');
|
||||
const { Utils } = require('@rakh/utils');
|
||||
|
||||
exports.markApplied = (req, res) => {
|
||||
console.log('>V2 markApplied req', req.params);
|
||||
|
||||
if(!req.params.id)
|
||||
return res.status(500).send({
|
||||
'message': 'Job id missing'
|
||||
});
|
||||
|
||||
const aid = req.params.id;
|
||||
const now = new Date().getTime();
|
||||
|
||||
// touchOne
|
||||
|
||||
console.log('aid', aid);
|
||||
|
||||
Jobs.updateMany({ '_id':aid }, { '$set': { 'data.applied':now } } ).then((data) => {
|
||||
console.log(data);
|
||||
|
||||
res.status(200).end();
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
dbmanager.appliedOne({ aid, a })
|
||||
.then((data) => {
|
||||
console.log(data);
|
||||
|
||||
res.status(200).end();
|
||||
})
|
||||
.catch((err) => {
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
*/
|
||||
};
|
||||
|
||||
exports.markAllRead = (req, res) => {
|
||||
console.log('>V2 markAllRead req', req.params);
|
||||
|
||||
const now = new Date().getTime();
|
||||
|
||||
Jobs.updateMany({ 'data.read':0 }, { '$set': { 'data.read':now } } ).then((data) => {
|
||||
console.log(data);
|
||||
|
||||
res.status(200).end();
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
dbmanager.markAllRead()
|
||||
.then((data) => {
|
||||
console.log(data);
|
||||
|
||||
res.status(200).end();
|
||||
})
|
||||
.catch((err) => {
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
*/
|
||||
};
|
@ -1,124 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 24/07/2020
|
||||
* Time: 11:45
|
||||
|
||||
*/
|
||||
const Jobs = require('../../lib/mongoManager');
|
||||
const { Utils } = require('@rakh/utils');
|
||||
|
||||
const killNLDoubleSpace = /(\\n)\s{2,}|(\\n)|\s{2,}/g;
|
||||
|
||||
function reduceList(data) {
|
||||
if (arguments.length === 0 || arguments[0] === null ) return '';
|
||||
|
||||
const outObj = data.map((v) => {
|
||||
const o = Utils.extractFromObj({...v.details,...v.data, _id:v._id},['title','site', 'company', 'timestamp', 'read', 'applied', 'jobtype', 'class', 'autoclass']);
|
||||
o._id = v._id;
|
||||
return o;
|
||||
|
||||
});
|
||||
// console.log(data);
|
||||
|
||||
return outObj;
|
||||
}
|
||||
|
||||
function reduceRecord(record) {
|
||||
// console.log('Reducderecord', record);
|
||||
let outRec = {...record.details,data:record.data,_id:record._id};
|
||||
|
||||
return outRec;
|
||||
|
||||
}
|
||||
|
||||
exports.getList = (req, res) => {
|
||||
console.log('>getList req', req.params);
|
||||
|
||||
Jobs.find({}, { 'details.title':1, 'details.site':1, 'details.company':1, 'data':1, '_id':1 }).limit(200).sort( { 'data.timestamp': -1 } ).then((doc) => {
|
||||
if (doc) {
|
||||
|
||||
res.send(reduceList(doc));
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
exports.getJob = (req, res) => {
|
||||
console.log('>getJob req', req.params);
|
||||
|
||||
if(!req.params.id)
|
||||
return res.status(500).send({
|
||||
'message': 'Job id missing'
|
||||
});
|
||||
|
||||
const id = req.params.id;
|
||||
|
||||
Jobs.findById(id).then((doc) => {
|
||||
if (doc) {
|
||||
|
||||
const item = reduceRecord(doc._doc);
|
||||
const date = new Date( item.timestamp * 1000);
|
||||
|
||||
console.log(item);
|
||||
item.date = date.toLocaleString();
|
||||
item.title = item.title.replace(killNLDoubleSpace, ' ');
|
||||
|
||||
res.send(item);
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
exports.readJob = (req, res) => {
|
||||
console.log('>readJob req', req.params);
|
||||
|
||||
let id;
|
||||
if(!req.params.id)
|
||||
return res.status(500).send({
|
||||
'message': 'Job id missing'
|
||||
});
|
||||
else
|
||||
id = req.params.id;
|
||||
|
||||
Jobs.findById(id).then((doc) => {
|
||||
if (doc) {
|
||||
|
||||
let fullDoc = Object.assign({}, doc._doc);
|
||||
|
||||
console.log('fullDoc', fullDoc);
|
||||
|
||||
if (!Utils.isEmpty(fullDoc)){
|
||||
fullDoc.data.read = new Date().getTime();
|
||||
|
||||
Jobs.findByIdAndUpdate(id, fullDoc, {'new':true}).then((doc) => {
|
||||
console.log(doc._doc);
|
||||
res.status(200).end();
|
||||
}).catch((err) => {
|
||||
console.error('inside',err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error('outer', err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,89 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 28/07/2020
|
||||
* Time: 11:08
|
||||
|
||||
*/
|
||||
const Jobs = require('../../lib/mongoManager');
|
||||
const { Utils } = require('@rakh/utils');
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
var bayes = require('bayes');
|
||||
|
||||
var classifier = bayes({
|
||||
'tokenizer': function (text) {
|
||||
return text.split(',');
|
||||
}
|
||||
});
|
||||
|
||||
function load() {
|
||||
const file = fs.readFileSync('brain.json');
|
||||
|
||||
classifier = bayes.fromJson(file);
|
||||
}
|
||||
|
||||
function save() {
|
||||
var stateJson = classifier.toJson();
|
||||
|
||||
console.log(stateJson);
|
||||
|
||||
fs.writeFileSync('brain.json', stateJson);
|
||||
}
|
||||
|
||||
load();
|
||||
|
||||
exports.upvote = (req, res) => {
|
||||
console.log('>upvote req', req.params);
|
||||
|
||||
if(!req.params.id)
|
||||
return res.status(500).send({
|
||||
'message': 'Job id missing'
|
||||
});
|
||||
|
||||
const id = req.params.id;
|
||||
|
||||
Jobs.findById(id).then(async (doc) => {
|
||||
if (doc) {
|
||||
const words = doc._doc.data.autoclass.words.join(',');
|
||||
|
||||
await classifier.learn(words, 'good');
|
||||
|
||||
save();
|
||||
res.status(200).end();
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
exports.downvote = (req, res) => {
|
||||
console.log('>upvote req', req.params);
|
||||
|
||||
if(!req.params.id)
|
||||
return res.status(500).send({
|
||||
'message': 'Job id missing'
|
||||
});
|
||||
|
||||
const id = req.params.id;
|
||||
|
||||
Jobs.findById(id).then(async (doc) => {
|
||||
if (doc) {
|
||||
const words = doc._doc.data.autoclass.words.join(',');
|
||||
|
||||
await classifier.learn(words, 'bad');
|
||||
|
||||
save();
|
||||
res.status(200).end();
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error(err.message);
|
||||
res.status(500).send({
|
||||
'message': err.message || 'Some error occurred while querying the database.'
|
||||
});
|
||||
});
|
||||
};
|
47
server/dist/3rdpartylicenses.txt
vendored
47
server/dist/3rdpartylicenses.txt
vendored
@ -1,47 +0,0 @@
|
||||
css-loader
|
||||
MIT
|
||||
Copyright JS Foundation and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
'Software'), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
zone.js
|
||||
MIT
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2010-2020 Google LLC. http://angular.io/license
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
6
server/dist/build/bundle.css
vendored
6
server/dist/build/bundle.css
vendored
File diff suppressed because one or more lines are too long
8
server/dist/build/bundle.css.map
vendored
8
server/dist/build/bundle.css.map
vendored
File diff suppressed because one or more lines are too long
2
server/dist/build/bundle.js
vendored
2
server/dist/build/bundle.js
vendored
File diff suppressed because one or more lines are too long
2
server/dist/build/bundle.js.map
vendored
2
server/dist/build/bundle.js.map
vendored
File diff suppressed because one or more lines are too long
1
server/dist/main.6053daaf70df7cc81398.js
vendored
1
server/dist/main.6053daaf70df7cc81398.js
vendored
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
server/dist/runtime.acf0dec4155e77772545.js
vendored
1
server/dist/runtime.acf0dec4155e77772545.js
vendored
@ -1 +0,0 @@
|
||||
!function(e){function r(r){for(var n,l,f=r[0],i=r[1],p=r[2],c=0,s=[];c<f.length;c++)l=f[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(a&&a(r);s.length;)s.shift()();return u.push.apply(u,p||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,f=1;f<t.length;f++)0!==o[t[f]]&&(n=!1);n&&(u.splice(r--,1),e=l(l.s=t[0]))}return e}var n={},o={0:0},u=[];function l(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,l),t.l=!0,t.exports}l.m=e,l.c=n,l.d=function(e,r,t){l.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,r){if(1&r&&(e=l(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(l.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)l.d(t,n,(function(r){return e[r]}).bind(null,n));return t},l.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(r,"a",r),r},l.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},l.p="";var f=window.webpackJsonp=window.webpackJsonp||[],i=f.push.bind(f);f.push=r,f=f.slice();for(var p=0;p<f.length;p++)r(f[p]);var a=i;t()}([]);
|
1
server/dist/styles.7cc34b60cd61c4ed50cc.css
vendored
1
server/dist/styles.7cc34b60cd61c4ed50cc.css
vendored
File diff suppressed because one or more lines are too long
@ -1,24 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 10/09/2020
|
||||
* Time: 16:06
|
||||
|
||||
*/
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 25/05/2020
|
||||
* Time: 13:36
|
||||
|
||||
*/
|
||||
|
||||
const apply = require('../controllers/apply.v2.controller');
|
||||
|
||||
module.exports = (app) => {
|
||||
app.route('/v2/apply/:id')
|
||||
.put(apply.markApplied);
|
||||
|
||||
app.route('/v2/readall')
|
||||
.put(apply.markAllRead);
|
||||
};
|
@ -1,17 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 24/07/2020
|
||||
* Time: 11:42
|
||||
|
||||
*/
|
||||
const jobs = require('../controllers/jobs.v2.controller');
|
||||
|
||||
module.exports = (app) => {
|
||||
app.route('/v2/jobs')
|
||||
.get(jobs.getList);
|
||||
|
||||
app.route('/v2/jobs/:id')
|
||||
.get(jobs.getJob)
|
||||
.put(jobs.readJob);
|
||||
};
|
@ -1,17 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 28/07/2020
|
||||
* Time: 11:07
|
||||
|
||||
*/
|
||||
|
||||
const vote = require('../controllers/vote.controller');
|
||||
|
||||
module.exports = (app) => {
|
||||
app.route('/vote/up/:id')
|
||||
.put(vote.upvote);
|
||||
|
||||
app.route('/vote/down/:id')
|
||||
.put(vote.downvote);
|
||||
};
|
@ -58,9 +58,7 @@ app.use(bodyParser.json());
|
||||
app.post('/auth', auth.auth);
|
||||
|
||||
require('./routes/jobs.route')(app);
|
||||
require('./routes/jobs.v2.route')(app);
|
||||
require('./routes/apply.v2.route')(app);
|
||||
require('./routes/vote.route')(app);
|
||||
require('./routes/apply.route')(app);
|
||||
|
||||
app.listen(serverPort, () => {
|
||||
console.log(`Server is listening on port ${serverPort}`);
|
||||
|
File diff suppressed because one or more lines are too long
@ -20,7 +20,7 @@ const indeedScraper = new IndeedScraper();
|
||||
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
|
||||
const page = fs.readFileSync('data/indeed/page2.html');
|
||||
|
||||
test.skip('Test Indeed scraper', async t => {
|
||||
test.test('Test Indeed scraper', async t => {
|
||||
const $ = cheerio.load(page);
|
||||
|
||||
indeedScraper.loadPage($);
|
||||
@ -35,36 +35,13 @@ test.skip('Test Indeed scraper', async t => {
|
||||
|
||||
await indeedScraper.filterAdverts();
|
||||
|
||||
await indeedScraper.addToMongo();
|
||||
// await indeedScraper.addToDB();
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
||||
test.skip('Test full run Indeed scraper', async t => {
|
||||
await indeedScraper.go('london').catch((err) => {
|
||||
console.error('Indeed GO', err);
|
||||
});
|
||||
test.test('Test full run Indeed scraper', async t => {
|
||||
await indeedScraper.go('london');
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
||||
|
||||
test.test('Test Indeed scraper -- MONGO', async t => {
|
||||
const $ = cheerio.load(page);
|
||||
|
||||
indeedScraper.loadPage($);
|
||||
|
||||
await indeedScraper.breakPage();
|
||||
|
||||
// await indeedScraper.getJobPages();
|
||||
|
||||
// console.log(await indeedScraper.checkNext());
|
||||
|
||||
// console.log(indeedScraper.items);
|
||||
|
||||
// await indeedScraper.filterAdverts();
|
||||
|
||||
await indeedScraper.addToMongo();
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
@ -26,14 +26,13 @@ const s1jobsScraper = new RssS1Jobs();
|
||||
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
|
||||
|
||||
test.test('Test Jobserve scraper', async t => {
|
||||
let url = 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml';
|
||||
await s1jobsScraper.setStartUrl(url);
|
||||
|
||||
|
||||
s1jobsScraper.reduceItems();
|
||||
|
||||
await s1jobsScraper.filterAdverts();
|
||||
// await s1jobsScraper.addToDB();
|
||||
await s1jobsScraper.addToDB();
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
@ -19,17 +19,17 @@ const testScraper = new RssTechnojobs();
|
||||
const feed = fs.readFileSync('test/data/technojobs/page1');
|
||||
|
||||
test.test('Test Technojobs scraper', async t => {
|
||||
await testScraper.loadFeed('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
// await testScraper.loadFeed(feed);
|
||||
|
||||
await testScraper.reduceItems();
|
||||
// testScraper.reduceItems();
|
||||
|
||||
await s1jobsScraper.filterAdverts();
|
||||
// await s1jobsScraper.filterAdverts();
|
||||
// await s1jobsScraper.addToDB();
|
||||
|
||||
/* await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
|
||||
*/
|
||||
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
@ -22,20 +22,20 @@ console.log(`${__dirname}`);
|
||||
const page = fs.readFileSync(`${__dirname}/data/totaljobs/totaljobs-2020-04-16--121504.html`);
|
||||
|
||||
test.test('Test Totaljobs scraper', async t => {
|
||||
const $ = cheerio.load(page);
|
||||
const $ = cheerio.load(page);
|
||||
|
||||
totaljobsScraper.loadPage($);
|
||||
totaljobsScraper.loadPage($);
|
||||
|
||||
await totaljobsScraper.breakPage();
|
||||
await totaljobsScraper.breakPage();
|
||||
|
||||
await totaljobsScraper.getJobPages();
|
||||
// console.log(await indeedScraper.checkNext());
|
||||
await totaljobsScraper.getJobPages();
|
||||
// console.log(await indeedScraper.checkNext());
|
||||
|
||||
// console.log(totaljobsScraper.items);
|
||||
console.log(totaljobsScraper.items);
|
||||
|
||||
await totaljobsScraper.filterAdverts();
|
||||
await totaljobsScraper.filterAdverts();
|
||||
|
||||
// await totaljobsScraper.addToDB();
|
||||
// await totaljobsScraper.addToDB();
|
||||
|
||||
t.end();
|
||||
t.end();
|
||||
});
|
||||
|
14
test/wip.js
14
test/wip.js
@ -1,14 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 23/07/2020
|
||||
* Time: 09:26
|
||||
|
||||
*/
|
||||
|
||||
const { Corpus } = require('../lib/corpus');
|
||||
|
||||
const text = 'ESTAMP DEVELOPER 6 month contract £450-525 / day Developer, SQL, Photoshop, Javascript, … NET, C#, Javascript Advanced knowledge of SQL Server TSQL Experience of the design and … PDF stamp development E-STAMP DEVELOPER 6 month contract';
|
||||
const out = Corpus.process(text);
|
||||
|
||||
console.log(out);
|
@ -1,71 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 16/04/2020
|
||||
* Time: 23:35
|
||||
|
||||
*/
|
||||
const CronJob = require('cron').CronJob;
|
||||
const IndeedScraper = require('./scrapers/indeed');
|
||||
const TotaljobsScraper = require('./scrapers/totaljobs');
|
||||
const CwjobsScraper = require('./scrapers/cwjobs');
|
||||
const JobserveScraper = require('./scrapers/rss.jobserve');
|
||||
const RssS1Jobs = require('./scrapers/rss.s1jobs');
|
||||
const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
|
||||
(async function () {
|
||||
console.log('Started..');
|
||||
const indeedScraper = new IndeedScraper();
|
||||
const totaljobsScraper = new TotaljobsScraper();
|
||||
const cwjobsScraper = new CwjobsScraper();
|
||||
const jobserveScraper = new JobserveScraper();
|
||||
const s1jobsScraper = new RssS1Jobs();
|
||||
const technojobsScraper = new RssTechnojobs();
|
||||
|
||||
await indeedScraper.go('london');
|
||||
|
||||
|
||||
await totaljobsScraper.go('london');
|
||||
await cwjobsScraper.go('london');
|
||||
await indeedScraper.go('glasgow');
|
||||
await totaljobsScraper.go('glasgow');
|
||||
await cwjobsScraper.go('glasgow');
|
||||
await indeedScraper.go('edinburgh');
|
||||
await totaljobsScraper.go('edinburgh');
|
||||
await cwjobsScraper.go('edinburgh');
|
||||
await indeedScraper.go('milton keynes');
|
||||
await totaljobsScraper.go('milton keynes');
|
||||
await cwjobsScraper.go('milton keynes');
|
||||
/*
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
|
||||
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
|
||||
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
|
||||
*/
|
||||
|
||||
|
||||
})();
|
File diff suppressed because one or more lines are too long
22
words.js
22
words.js
@ -1,22 +0,0 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 27/07/2020
|
||||
* Time: 10:08
|
||||
|
||||
*/
|
||||
|
||||
const jsonfile = require('jsonfile');
|
||||
|
||||
const data = require('./unused.json');
|
||||
|
||||
function show(size) {
|
||||
const f = data.filter((v) => {
|
||||
return (v.length === size);
|
||||
});
|
||||
|
||||
jsonfile.writeFileSync('limited.json', [...new Set(f)]);
|
||||
console.log('done');
|
||||
}
|
||||
|
||||
show(11);
|
Loading…
Reference in New Issue
Block a user