Compare commits

...

16 Commits

Author SHA1 Message Date
Martin Donnelly
91a51d7fda Merge branch 'svelte-20201007' into 'development'
back to svelte version

See merge request martind2000/jobscraper!7
2021-01-13 10:52:54 +00:00
Martin Donnelly
8b587ef055 back to svelte version 2021-01-13 10:50:03 +00:00
Martin Donnelly
0743d052dd Merge branch 'svelte-20201007' into 'development'
Added angular to indeed, cws and total

See merge request martind2000/jobscraper!6
2020-12-10 16:06:56 +00:00
Martin Donnelly
17a348f625 Added angular to indeed, cws and total
Added a new search for jobserve
2020-12-10 16:05:21 +00:00
Martin Donnelly
383daf9eeb Merge branch 'svelte-20201007' into 'development'
Svelte to Angular

See merge request martind2000/jobscraper!5
2020-11-21 12:07:48 +00:00
Martin Donnelly
c4713863c0 Svelte to Angular
- Changed UI runtime from Svelte to Angular
2020-11-21 12:06:32 +00:00
Martin Donnelly
e977f056cb Merge branch 'svelte-20201007' into 'development'
Updates to svelte

See merge request martind2000/jobscraper!4
2020-10-07 11:03:50 +00:00
Martin Donnelly
4363a5db77 Updates to svelte 2020-10-07 12:02:39 +01:00
Martin Donnelly
a96881b680 Merge branch 'JS003-expandMongo' into 'development'
JS003-expandMongo

See merge request martind2000/jobscraper!3
2020-10-07 10:54:21 +00:00
Martin Donnelly
8c70b52713 JS003-expandMongo 2020-10-07 11:53:17 +01:00
Martin Donnelly
150ece2aa0 Merge branch 'JS003-expandMongo' into 'development'
JS003-expandMongo

See merge request martind2000/jobscraper!2
2020-09-10 18:43:20 +00:00
Martin Donnelly
2872f92d67 JS003-expandMongo 2020-09-10 19:42:17 +01:00
Martin Donnelly
1513ea5010 Merge branch 'JOBSCRAPER-1' into 'development'
Resolve JOBSCRAPER-1

See merge request martind2000/jobscraper!1
2020-09-10 13:14:18 +00:00
Martin Donnelly
f2880b661e JOBSCRAPER-1 Implement a bit of AI
* Moved to mongo
* UI updated to use mongo
* UI is a bit fancier now
* Import sql to mongo
2020-09-10 14:13:08 +01:00
Martin Donnelly
1938bbeb5f JOBSCRAPER-1 Implement a bit of AI 2020-09-01 12:44:42 +01:00
Martin Donnelly
6a23583b5b WIP: Adding brain 2020-08-24 09:35:30 +01:00
51 changed files with 14582 additions and 576 deletions

32
.edditorconfig Normal file
View File

@ -0,0 +1,32 @@
; http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
[*.txt]
insert_final_newline = false
trim_trailing_whitespace = false
[*.py]
indent_size = 4
[*.m]
indent_size = 4
[Makefile]
indent_style = tab
indent_size = 8
[*.{js,json}]
indent_style = space
indent_size = 2
[*.md]
trim_trailing_whitespace = false

View File

@ -9,7 +9,7 @@
"env": { "env": {
"browser": true, "browser": true,
"node": true, "node": true,
"es6": true "es2017": true
}, },
"rules": { "rules": {
"arrow-spacing": "error", "arrow-spacing": "error",

1
.gitignore vendored
View File

@ -147,3 +147,4 @@ fabric.properties
/live/ /live/
!/output/ !/output/
/db/jobs.db /db/jobs.db
!/db/

1
biglist.json Normal file

File diff suppressed because one or more lines are too long

204
brain.json Normal file
View File

@ -0,0 +1,204 @@
{
"categories": {
"good": true,
"bad": true
},
"docCount": {
"good": 43,
"bad": 5
},
"totalDocuments": 48,
"vocabulary": {
"tsql": true,
"developer": true,
"contract": true,
"web": true,
"javascript": true,
"js": true,
"node": true,
"es": true,
"agile": true,
"nodejs": true,
"london": true,
"aws": true,
"sql": true,
"postgresql": true,
"mysql": true,
"docker": true,
"ecs": true,
"automation": true,
"jslint": true,
"jshint": true,
"vuejs": true,
"vue": true,
"nginx": true,
"remotely": true,
"mvc": true,
"remote": true,
"iot": true,
"mqtt": true,
"es6": true,
"es2016": true,
"es2017": true,
"es2018": true,
"react": true,
"redux": true,
"graphql": true,
"java": true,
"reactjs": true,
"apps": true,
"html": true,
"css": true,
"code": true,
"angular": true,
"ember": true,
"restful": true,
"apis": true,
"infrastructure": true,
"software": true,
"native": true,
"med": true,
"mobile": true,
"client": true,
"applications": true,
"digital": true,
"analytics": true,
"dashboarding": true,
"online": true,
"analyse": true,
"dashboards": true,
"google": true,
"query": true,
"data": true,
"stakeholders": true,
"enhancements": true,
"requirements": true,
"c": true,
"net": true,
"technologies": true,
"azure": true,
"understanding": true,
"devops": true,
"tools": true,
"frameworks": true,
"scotland": true,
"responsibility": true,
"programme": true,
"functions": true,
"asp": true,
"project": true,
"transform": true,
"collaborative": true,
"technical": true,
"framework": true,
"nhibernate": true,
"server": true,
"api": true,
"development": true,
"lifecycle": true,
"specification": true,
"appointments": true
},
"vocabularySize": 89,
"wordCount": {
"good": 157,
"bad": 5
},
"wordFrequencyCount": {
"good": {
"tsql": 1,
"developer": 6,
"contract": 9,
"web": 6,
"javascript": 7,
"js": 3,
"node": 2,
"es": 1,
"agile": 2,
"nodejs": 1,
"london": 3,
"aws": 3,
"sql": 3,
"postgresql": 1,
"mysql": 1,
"docker": 1,
"ecs": 1,
"automation": 1,
"jslint": 1,
"jshint": 1,
"vuejs": 1,
"vue": 2,
"nginx": 1,
"remotely": 1,
"mvc": 5,
"remote": 2,
"iot": 1,
"mqtt": 1,
"es6": 1,
"es2016": 1,
"es2017": 1,
"es2018": 1,
"apps": 1,
"html": 5,
"css": 5,
"code": 2,
"react": 2,
"angular": 1,
"ember": 1,
"restful": 1,
"apis": 1,
"infrastructure": 1,
"software": 2,
"native": 1,
"med": 1,
"mobile": 1,
"client": 4,
"applications": 2,
"digital": 2,
"analytics": 1,
"dashboarding": 1,
"online": 1,
"analyse": 1,
"dashboards": 1,
"google": 1,
"query": 1,
"data": 1,
"stakeholders": 1,
"enhancements": 3,
"requirements": 3,
"c": 4,
"net": 5,
"technologies": 4,
"azure": 2,
"understanding": 1,
"devops": 2,
"tools": 1,
"frameworks": 1,
"scotland": 1,
"responsibility": 1,
"programme": 1,
"functions": 1,
"asp": 1,
"project": 1,
"transform": 1,
"collaborative": 1,
"technical": 1,
"framework": 1,
"nhibernate": 1,
"server": 1,
"api": 1,
"development": 1,
"lifecycle": 1,
"specification": 1,
"appointments": 1
},
"bad": {
"react": 1,
"redux": 1,
"graphql": 1,
"java": 1,
"reactjs": 1
}
},
"options": {}
}

Binary file not shown.

View File

@ -38,6 +38,7 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
}, null, true); }, null, true);
new CronJob('0 6-23/1 * * *', async function() { new CronJob('0 6-23/1 * * *', async function() {
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss'); await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss'); await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss'); await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
@ -54,13 +55,13 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020 await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020 await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml'); /* await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml'); await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1'); await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1'); await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');

View File

@ -8,6 +8,12 @@
const filterReject = require('../lib/filter_reject'); const filterReject = require('../lib/filter_reject');
const filterAccept = require('../lib/filter_md_jobs'); const filterAccept = require('../lib/filter_md_jobs');
const dbmanager = require('../lib/dbmanager'); const dbmanager = require('../lib/dbmanager');
const JobsModel = require('../lib/mongoManager');
const SHA = require('crypto-js/sha256');
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./corpus');
class MasterBase { class MasterBase {
@ -57,6 +63,79 @@ class MasterBase {
}); });
} }
/**
*
*/
addToMongo() {
console.log('>> ADD TO MONGO!');
for(const item of this.items) {
// console.log('add', item);
const newObj = this.reduceData(item);
const newJob = new JobsModel(newObj);
newJob.save().then((m) => {
console.log('m', m.details.title);
}).catch((err) => {
console.error('m', err);
});
}
}
/**
*
* @param inval
* @returns {number}
*/
analyseRate(inval) {
console.log('analyseRate', inval);
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
else return 0;
return outVal;
}
/**
*
* @param d
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
*/
reduceData(d) {
const clearPremium = /(\n+)(Featured|Premium)/gi;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.details.title = outObj.details.title.replace(clearPremium, '');
outObj.details.title = outObj.details.title.replace(otherStupid, '');
outObj.details.hashed = SHA(outObj.details.summary);
outObj.data.read = 0;
outObj.data.applied = d.applied || 0;
outObj.data.jobtype = this.analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
/** /**
* *
* @returns {Promise<void>} * @returns {Promise<void>}
@ -120,10 +199,15 @@ class MasterBase {
return `https://image.silvrtree.co.uk/q${q}/${url}`; return `https://image.silvrtree.co.uk/q${q}/${url}`;
} }
/**
*
* @returns {Promise<void>}
*/
async go() { async go() {
this.items = []; this.items = [];
this.rawItems = []; this.rawItems = [];
} }
} }
module.exports = MasterBase; module.exports = MasterBase;

91
lib/corpus.js Normal file
View File

@ -0,0 +1,91 @@
const jsonfile = require('jsonfile');
const words = require('../lib/wordlist.json');
const wordsAdditional = require('../lib/wordlistAdditional.json');
const bigList = new Map([]);
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es',
'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs',
'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote',
'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
let unrated = [];
var _global = typeof global === 'undefined' ? window : global;
var Corpus = (_global.Corpus = _global.Corpus || {});
const emailRegex = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
const detagRegex = /(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)/gi;
const desymbolNumberRegex = /[\n\t+$,\?\.\%\*=&:;()\\/\-£…"]|\d+/gi;
const deSpace = /\s+/g;
function cleanText(intext) {
if (arguments.length === 0 || typeof intext === 'undefined' || intext === null ) return '';
return intext.replace(emailRegex, ' ').replace(detagRegex, ' ').replace(desymbolNumberRegex, ' ').replace(deSpace, ' ').trim().toLowerCase();
}
function dedupe(intext) {
if (arguments.length === 0 || intext === null ) return [];
return [...new Set(intext)];
}
function incItem(item) {
if (bigList.has(item))
bigList.set(item, bigList.get(item) + 1);
else
bigList.set(item, 1);
}
/**
* Process the body
* @param intext
* @returns {{score: number, bad: *, good: *}}
*/
Corpus.process = function(intext) {
const workText = cleanText(intext);
const workArray = workText.split(' ');
const cleanedArray = dedupe(workArray).filter((v) => {
return (words.indexOf(v) === -1 && wordsAdditional.indexOf(v) === -1);
});
const good = cleanedArray.filter((v) => {
return (goodWords.indexOf(v) !== -1);
});
const bad = cleanedArray.filter((v) => {
return (badWords.indexOf(v) !== -1);
});
const unused = cleanedArray.filter((v) => {
return ((badWords.indexOf(v) === -1) && (goodWords.indexOf(v) === -1));
});
cleanedArray.map((item) => {
incItem(item);
});
unrated = [...unrated, ...unused];
const score = good.length - (bad.length * 5);
// console.log('unused', unused);
return { good, bad, score, 'words':cleanedArray };
};
Corpus.exportUnused = function() {
jsonfile.writeFileSync('./unused.json', dedupe(unrated));
jsonfile.writeFileSync('./biglist.json', [...bigList].sort((a, b) => b[1] - a[1]));
console.log([...bigList]);
};
if (typeof module !== 'undefined')
module.exports = {
'Corpus': Corpus
};

34
lib/mongoManager.js Normal file
View File

@ -0,0 +1,34 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 17:00
*/
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const JobsModel = require('../models/jobs');
// const { Utils } = require('@rakh/utils');
require('dotenv').config();
logger.level = 'debug';
const mongoConnect = process.env.MONGOCONNECT;
// logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
// mongoose.connect('mongodb://martin:1V3D4m526i@127.0.0.1/jobs');
logger.debug(mongoConnect);
mongoose.connect(mongoConnect);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
module.exports = JobsModel;

View File

@ -89,6 +89,7 @@ class MasterRSS extends MasterBase {
await this.filterAdverts(); await this.filterAdverts();
if (this.items.length > 0) await this.addToDB(); if (this.items.length > 0) await this.addToDB();
if (this.items.length > 0) await this.addToMongo();
} }
else else
console.log('No items to process'); console.log('No items to process');

View File

@ -21,9 +21,14 @@ class MasterScraper extends MasterBase {
super(); super();
} }
/**
*
* @param url
* @param useStone
* @returns {Promise<unknown>}
*/
getContent(url, useStone = false) { getContent(url, useStone = false) {
/* /*
let headers = new Headers({ let headers = new Headers({
"Accept" : "application/json", "Accept" : "application/json",
@ -54,19 +59,28 @@ fetch(url, {
resolve(response.body); resolve(response.body);
}) })
.catch((e) => { .catch((e) => {
console.error('getContent', e );
reject(e.response.body); reject(e.response.body);
}); });
}); });
}; };
async savePage(html) {
const now = fecha.format(new Date(), 'YYYY-MM-DD--hh');
const filename = `pages/${this.siteid}-${now}.html`;
fs.writeFileSync(filename, html);
}
async getPage() { async getPage() {
console.log('>> getPage: fetching', this.url); console.log('>> getPage: fetching', this.url);
const now = fecha.format(new Date(), 'YYYY-MM-DD--hhmmss');
const filename = `${this.siteid}-${now}.html`;
await this.getContent(this.url, this.useStone) await this.getContent(this.url, this.useStone)
.then((html) => { .then((html) => {
fs.writeFileSync(filename, html); // console.log('>> getPage:: got', html);
console.log('>> getPage:: OK');
if (this.saveFile) this.savePage(html);
const $ = cheerio.load(html); const $ = cheerio.load(html);
this.loadPage($); this.loadPage($);
}) })
@ -75,30 +89,59 @@ fetch(url, {
// Site specific parts below here // Site specific parts below here
/**
* Break each page into items
* @returns {Promise<void>}
*/
async breakPage() { async breakPage() {
} }
/**
*
* @param part
* @returns {Promise<void>}
*/
async extractDetails(part) { async extractDetails(part) {
} }
/**
*
* @returns {Promise<void>}
*/
async checkNext() { async checkNext() {
} }
/**
*
* @returns {Promise<void>}
*/
async processSite() { async processSite() {
} }
/**
*
* @returns {Promise<void>}
*/
async getIndividualPage() { async getIndividualPage() {
} }
/**
*
* @returns {Promise<void>}
*/
async getJobPages() { async getJobPages() {
} }
/**
*
* @returns {Promise<void>}
*/
async go() { async go() {
} }

1007
lib/wordlist.json Normal file

File diff suppressed because it is too large Load Diff

8790
lib/wordlistAdditional.json Normal file

File diff suppressed because it is too large Load Diff

559
limited.json Normal file
View File

@ -0,0 +1,559 @@
[
"experienced",
"exceptional",
"maintaining",
"familiarity",
"commodities",
"opportunity",
"possibility",
"integration",
"engineering",
"derivatives",
"prefferable",
"nutritional",
"performance",
"immediately",
"information",
"responsible",
"environment",
"stakeholder",
"proactively",
"requirement",
"temporarily",
"interrogate",
"effectively",
"progressing",
"substantial",
"identifying",
"maintenance",
"workarounds",
"departments",
"consultancy",
"regulations",
"statistical",
"previously·",
"euromonitor",
"documenting",
"bookkeeping",
"reconciling",
"hardworking",
"themselves!",
"appropriate",
"socialising",
"fundraising",
"initiatives",
"sponsorship",
"orientation",
"competitive",
"illustrator",
"outstanding",
"interaction",
"consistency",
"touchpoints",
"freshtechit",
"recruitment",
"catastrophe",
"accountable",
"workstreams",
"scalability",
"undertaking",
"interacting",
"significant",
"considering",
"independent",
"collaborate",
"arrangement",
"unsolicited",
"empowerment",
"connections",
"specialists",
"credentials",
"personality",
"established",
"northampton",
"advertising",
"operational",
"mathematics",
"contractors",
"instruments",
"referencing",
"locationsco",
"disciplines",
"corporation",
"investments",
"conferences",
"demonstrate",
"directorate",
"acknowledge",
"legislation",
"designgreat",
"understands",
"perspective",
"association",
"enforcement",
"prestigious",
"individuals",
"alternative",
"technically",
"challenging",
"discussions",
"lifeworking",
"interactive",
"storyboards",
"communicate",
"abilitywork",
"englishgood",
"detailbonus",
"angularwhat",
"neededabout",
"innovations",
"enthusiasts",
"instructors",
"prospective",
"comfortable",
"involvement",
"adventurous",
"marketplace",
"forecasting",
"contractual",
"underpinned",
"acquisition",
"microsofts",
"progression",
"suggestions",
"proficiency",
"participate",
"joblocation",
"methodology",
"continually",
"cataloguing",
"projectgood",
"incremental",
"overarching",
"confidently",
"circulatory",
"adjustments",
"interesting",
"consultants",
"experienceb",
"hourscasual",
"switzerland",
"contributes",
"participant",
"improvement",
"articulates",
"contributed",
"comfortably",
"deployments",
"integrating",
"configuring",
"platforming",
"educatedday",
"contracting",
"monthstotal",
"outsourcing",
"designswork",
"ideasdesign",
"deviceswork",
"fundamental",
"businessjob",
"implemented",
"transaction",
"reliability",
"upgradesyou",
"uncertainty",
"enterpriser",
"teamprovide",
"trafficking",
"doubleclick",
"communities",
"forestlink",
"dimensional",
"coordinator",
"spreadsheet",
"pressurised",
"assignments",
"willingness",
"certificate",
"summaryrole",
"institution",
"segregation",
"preparation",
"electronics",
"duplication",
"surrounding",
"informatica",
"blackfriars",
"terminology",
"shabarinath",
"interfacing",
"expectation",
"proprietary",
"conflicting",
"itecopeople",
"opowershell",
"submissions",
"negotiating",
"escalations",
"transferred",
"protections",
"customizing",
"oxfordshire",
"progressive",
"bishopsgate",
"partnership",
"futureheads",
"permissions",
"efficiently",
"unspecified",
"potentially",
"disclaimers",
"foreseeable",
"sustainable",
"calculation",
"replication",
"constitutes",
"recommended",
"enterprises",
"negotiation",
"imaginative",
"differences",
"nationality",
"impediments",
"refinements",
"translating",
"obligations",
"flexibility",
"unashamedly",
"exclusively",
"replacement",
"essentially",
"artifactory",
"theoretical",
"probability",
"integrators",
"contractor?",
"interested?",
"functioning",
"chamberlain",
"inclusivity",
"iteratively",
"enhancement",
"constraints",
"establishes",
"qualitative",
"influencing",
"procurement",
"experiences",
"furthermore",
"disciplined",
"unnecessary",
"bureaucracy",
"represented",
"siteimprove",
"lokhandwala",
"specialises",
"rationalize",
"competncies",
"restoration",
"allocations",
"admittances",
"furnishings",
"cleanliness",
"residential",
"contactable",
"conventions",
"translation",
"approaching",
"intecselect",
"linguistics",
"southampton",
"beautifully",
"estimations",
"newsletters",
"summarising",
"simulations",
"portfolio's",
"coronavirus",
"opoortunity",
"unavailable",
"accordingly",
"penetration",
"remediation",
"elimination",
"achievement",
"facilitator",
"westminster",
"introducing",
"businesses'",
"capitalists",
"investigate",
"countryside",
"problematic",
"coordinates",
"components'",
"supervision",
"bonavolonta",
"proposition",
"foundations",
"suitability",
"researchers",
"explanation",
"commitments",
"computation",
"questioning",
"experiments",
"visualfiles",
"cloudstream",
"determining",
"deliverable",
"inquisitive",
"backgrounds",
"thoughtspot",
"specialized",
"veloppement",
"importantes",
"typedscript",
"restaurants",
"prophylaxis",
"transmitted",
"appointment",
"encouraging",
"aggregating",
"championing",
"conjunction",
"customising",
"photography",
"authorities",
"competition",
"collections",
"contraintes",
"fonctionnel",
"adaptabilit",
"changements",
"conceptions",
"utilisation",
"shortlisted",
"reusability",
"recognizing",
"decisioning",
"accommodate",
"limitations",
"resourceful",
"algorithmic",
"unconcerned",
"intelligent",
"considerate",
"clientbased",
"accelerator",
"dreamweaver",
"applicant's",
"proactivity",
"aggregation",
"restriction",
"traditional",
"corporately",
"memberships",
"standardise",
"theecsgroup",
"scarchitect",
"consolidate",
"extensively",
"afghanistan",
"encompasses",
"distinctive",
"professions",
"interviewed",
"formulation",
"transitions",
"aspirations",
"ingredients",
"setterfield",
"candidates",
"leatherhead",
"publication",
"undoubtedly",
"basingstoke",
"underground",
"reinsurance",
"exemplifies",
"civiization",
"developer's",
"bazzelgette",
"adjacencies",
"feasibility",
"frontinvest",
"neogotiable",
"unconnected",
"conditional",
"bottlenecks",
"productions",
"pharmacists",
"technicians",
"prescribing",
"stewardship",
"recognising",
"convictions",
"subscribing",
"transparent",
"wireframing",
"insidehmcts",
"justicejobs",
"criminology",
"hospitality",
"structuring",
"educational",
"substantive",
"secondments",
"transgender",
"smartphones",
"microsoft's",
"definitions",
"validations",
"prioritised",
"autoscaling",
"abstraction",
"correlation",
"recognition",
"contributor",
"apigedevops",
"incorporate",
"woocommerce",
"informatics",
"adfadc@apps",
"automations",
"formulating",
"beneficiary",
"referential",
"jsdevsecops",
"solutioning",
"measurement",
"familiarise",
"eligibility",
"standardize",
"experience?",
"bournemouth",
"implementer",
"agilesphere",
"assumptions",
"accountancy",
"cockroachdb",
"promotional",
"facilitates",
"discoveries",
"bladecenter",
"considered!",
"cooperation",
"exploration",
"angulareact",
"preferabbly",
"harmonising",
"convenience",
"inclusively",
"strategists",
"attribution",
"fromscratch",
"combination",
"solutionize",
"accelerated",
"diagnostics",
"sensibility",
"informative",
"intellegnce",
"specilisits",
"projections",
"associative",
"personalize",
"farnborough",
"necessarily",
"nservicebus",
"constrained",
"prioritized",
"behavioural",
"chakraborty",
"leaderships",
"flourishing",
"uniqstudios",
"simplifying",
"realisation",
"extensions!",
"prioritises",
"experience!",
"candidates!",
"inclination",
"stimulating",
"appreciated",
"reinventing",
"compression",
"jscybsecdev",
"equirements",
"generalized",
"compressors",
"assessments",
"beyondtrust",
"engagements",
"numerically",
"electricity",
"interchange",
"jsswift_dev",
"circulating",
"attachments",
"credibility",
"vnetpeering",
"territories",
"staggering!",
"developers!",
"peripherals",
"virtualized",
"bitdefender",
"jssitecorjs",
"positioning",
"appreciates",
"chessington",
"controllers",
"controlling",
"quantifying",
"virtualised",
"manufacture",
"fluorescent",
"governments",
"bigcommerce",
"therapeutic",
"importantly",
"differently",
"rigourously",
"shareholder",
"copywriting",
"anticipated",
"approximate",
"behdarvandi",
"testability",
"beneficial!",
"jswmibmcraw",
"exhibitions",
"talentpoint",
"propagation",
"interviews!",
"solutionise",
"elasticache",
"manoeuvring",
"teamservice",
"geographies",
"efficientip",
"organically",
"advancement",
"jshodanular",
"wholesalers",
"multitenant",
"encouraged?",
"freelancers",
"composition",
"#jobswagger",
"typographic",
"stereotypes",
"clerkenwell",
"sacrificing",
"resolutions",
"technology?",
"advantagous"
]

22
mapbuilder.js Normal file
View File

@ -0,0 +1,22 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 15:34
*/
const jsonfile = require('jsonfile');
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
const brain = new Map([]);
for(let i = 0;i < goodWords.length - 1;i++)
brain.set(goodWords[i], 3);
for(let i = 0;i < badWords.length - 1;i++)
brain.set(badWords[i], -5);
jsonfile.writeFileSync('brain.json', [...brain]);

156
migrate.js Normal file
View File

@ -0,0 +1,156 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 10:20
*/
const db = require('./lib/connect');
const log4js = require('log4js');
const logger = log4js.getLogger();
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./lib/corpus');
const SHA = require('crypto-js/sha256');
/*
2604
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const Jobs = require('./models/jobs');
require('dotenv').config();
logger.level = 'debug';
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
*/
const Jobs = require('./lib/mongoManager');
const migrate = (function() {
function analyseRate(inval) {
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
else return 0;
return outVal;
}
function reduceData(d) {
const clearPremium = /(\n+)(Featured|Premium)/gi;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.details.title = outObj.details.title.replace(clearPremium, '');
outObj.details.title = outObj.details.title.replace(otherStupid, '');
outObj.details.hashed = SHA(outObj.details.summary);
// outObj.data.read = d.read || 0;
outObj.data.read = 0;
outObj.data.applied = d.applied || 0;
outObj.data.jobtype = analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
function getCurrent() {
const outgoing = [];
console.log('get version');
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
return new Promise((resolve, reject) => {
db.all(sql, [], (err, rows) => {
if (err)
reject(err);
rows.forEach((row) => {
outgoing.push(row);
});
resolve(outgoing) ;
});
});
}
async function start() {
await getCurrent().then(async (d) => {
logger.debug(d.length);
for (let t = 0;t < (d.length - 1);t++) {
const newD = reduceData(d[t]);
// logger.debug(newD);
const newJob = Jobs(newD);
await newJob.save().then((m) => {
logger.debug('m', m.details.title);
}).catch((err) => {
logger.error(err.keyPattern);
});
}
}).then(() => {
logger.debug('SAVING!!');
Corpus.exportUnused();
})
.catch((err) => {
logger.error(err.keyPattern);
});
}
async function deleteOld() {
const oneDay = 86400000;
const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay);
logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo);
logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } });
Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => {
logger.debug('m', m);
}).catch((err) => {
logger.error(err);
});
}
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
return {
'start':start,
'deleteOld': deleteOld
};
})();
(async function() {
await migrate.start();
await migrate.deleteOld();
logger.info('Done??');
})();

47
models/jobs.js Normal file
View File

@ -0,0 +1,47 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 14:18
*/
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
const jobSchema = new Schema({
'details': {
'title': { 'type': String, 'required': true },
'site': { 'type': String, 'required': true },
'url': { 'type': String, 'required': true, 'unique': true },
'id': String,
'summary': String,
'company': String,
'location': String,
'postdate': String,
'salary': String,
'easyapply': Number,
'timestamp': Number,
'hashed' : { 'type': String, 'required':true, 'unique':true }
},
'data': {
'read': { 'type': Number, 'default': 0 },
'applied': { 'type': Number, 'default': 0 },
'jobtype': { 'type': Number, 'default': 0 },
'class': { 'type': Number, 'default': 0 },
'autoclass': {
'good': Array,
'bad': Array,
'words': Array,
'score': { 'type': Number, 'default': 0 }
},
'timestamp': { 'type': Number, 'default': 0 },
'created_at': { 'type': Date, 'default': Date.now }
}
});
mongoose.set('useFindAndModify', false);
const Jobs = mongoose.model('Jobs', jobSchema);
module.exports = Jobs;

66
onetime.js Normal file
View File

@ -0,0 +1,66 @@
/**
* Created by WebStorm.
* User: martin
* Date: 16/04/2020
* Time: 23:35
*/
const CronJob = require('cron').CronJob;
const IndeedScraper = require('./scrapers/indeed');
const TotaljobsScraper = require('./scrapers/totaljobs');
const CwjobsScraper = require('./scrapers/cwjobs');
const JobserveScraper = require('./scrapers/rss.jobserve');
const RssS1Jobs = require('./scrapers/rss.s1jobs');
const RssTechnojobs = require('./scrapers/rss.technojobs');
(async function () {
console.log('Started..');
const indeedScraper = new IndeedScraper();
const totaljobsScraper = new TotaljobsScraper();
const cwjobsScraper = new CwjobsScraper();
const jobserveScraper = new JobserveScraper();
const s1jobsScraper = new RssS1Jobs();
const technojobsScraper = new RssTechnojobs();
await indeedScraper.go('london');
await totaljobsScraper.go('london');
await cwjobsScraper.go('london');
await indeedScraper.go('glasgow');
await totaljobsScraper.go('glasgow');
await cwjobsScraper.go('glasgow');
await indeedScraper.go('edinburgh');
await totaljobsScraper.go('edinburgh');
await cwjobsScraper.go('edinburgh');
await indeedScraper.go('milton keynes');
await totaljobsScraper.go('milton keynes');
await cwjobsScraper.go('milton keynes');
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
})();

1290
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +1,31 @@
{ {
"name": "jobscraper", "name": "jobscraper",
"version": "1.0.0", "version": "1.0.2",
"description": "", "description": "",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"grabber": "node grabber.js" "release": "vik patch -t",
"grabber": "node grabber.js",
"server" : "node server/server.js"
}, },
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@rakh/utils": "^1.0.0",
"axios": "^0.19.2", "axios": "^0.19.2",
"bayes": "^1.0.0",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.3", "cheerio": "^1.0.0-rc.3",
"cron": "^1.8.2", "cron": "^1.8.2",
"crypto-js": "^4.0.0",
"dotenv": "^8.2.0", "dotenv": "^8.2.0",
"eslint": "^6.8.0", "eslint": "^6.8.0",
"express": "^4.17.1", "express": "^4.17.1",
"fecha": "^4.2.0", "fecha": "^4.2.0",
"got": "^11.2.0", "got": "^11.2.0",
"jsonfile": "^6.0.1",
"log4js": "^6.3.0",
"mongoose": "^5.9.25",
"present": "^1.0.0", "present": "^1.0.0",
"rss-parser": "^3.8.0", "rss-parser": "^3.8.0",
"sqlite3": "^4.1.1", "sqlite3": "^4.1.1",

45
preload.js Normal file
View File

@ -0,0 +1,45 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 10:51
*/
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
// teach it positive phrases
async function load() {
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt', 'es6', 'es2016', 'es2017', 'es2018', 'freelance'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
for(let i = 0;i < goodWords.length - 1;i++)
await classifier.learn(goodWords[i], 'good');
for(let i = 0;i < badWords.length - 1;i++)
await classifier.learn(badWords[i], 'bad');
// now ask it to categorize a document it has never seen before
console.log(await classifier.categorize(['ui', 'developer', 'london', 'react'].join(',')));
console.log(await classifier.categorize(['mysql', 'react', 'js', 'node', 'docker', 'kubernetes', 'google'].join(',')));
// serialize the classifier's state as a JSON string.
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();

View File

@ -20,7 +20,7 @@ class CwjobsScraper extends TotaljobsScraper {
} }
async go(location = 'london') { async go(location = 'london') {
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`); this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch'); // this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
// Glasgow // Glasgow

View File

@ -133,12 +133,15 @@ class IndeedScraper extends MasterScraper {
await this.filterAdverts(); await this.filterAdverts();
await this.addToDB(); await this.addToDB();
await this.addToMongo();
} }
async go(location = 'london') { async go(location = 'london') {
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`); this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Angular+Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
await this.processSite(); await this.processSite().catch((err) => {
console.error('Indeed Go', err);
});
console.log(`Indeed ${location} completed`); console.log(`Indeed ${location} completed`);
} }

View File

@ -140,6 +140,7 @@ class IndeedMobileScraper extends MasterScraper {
await this.filterAdverts(); await this.filterAdverts();
await this.addToDB(); await this.addToDB();
await this.addToMongo();
} }
async go(location = 'london') { async go(location = 'london') {

View File

@ -22,7 +22,10 @@ class TotaljobsScraper extends MasterScraper {
} }
// Site specific parts below here // Site specific parts below here
/**
*
* @returns {Promise<void>}
*/
async breakPage() { async breakPage() {
const $ = this.currentPage; const $ = this.currentPage;
const ads = []; const ads = [];
@ -39,6 +42,11 @@ class TotaljobsScraper extends MasterScraper {
this.items = [...this.items, ...ads]; this.items = [...this.items, ...ads];
} }
/**
*
* @param part
* @returns {Promise<{}>}
*/
async extractDetails(part) { async extractDetails(part) {
const newObj = {}; const newObj = {};
const $part = cheerio.load(part); const $part = cheerio.load(part);
@ -61,6 +69,11 @@ class TotaljobsScraper extends MasterScraper {
return newObj; return newObj;
} }
/**
*
* @param item
* @returns {Promise<*>}
*/
async getIndividualPage(item) { async getIndividualPage(item) {
const newItem = {...item}; const newItem = {...item};
console.log('Getting', item.url); console.log('Getting', item.url);
@ -75,6 +88,10 @@ class TotaljobsScraper extends MasterScraper {
return newItem; return newItem;
} }
/**
*
* @returns {Promise<void>}
*/
async getJobPages() { async getJobPages() {
const newItems = []; const newItems = [];
for (let item of this.items) { for (let item of this.items) {
@ -86,6 +103,10 @@ class TotaljobsScraper extends MasterScraper {
this.items = [...newItems]; this.items = [...newItems];
} }
/**
*
* @returns {Promise<void>}
*/
async checkNext() { async checkNext() {
const $ = this.currentPage; const $ = this.currentPage;
const next = $('.pagination > *:last-child').attr('href') || ''; const next = $('.pagination > *:last-child').attr('href') || '';
@ -96,6 +117,10 @@ class TotaljobsScraper extends MasterScraper {
console.log(next); console.log(next);
} }
/**
*
* @returns {Promise<void>}
*/
async processSite() { async processSite() {
console.log('Processing...'); console.log('Processing...');
@ -121,10 +146,16 @@ class TotaljobsScraper extends MasterScraper {
await this.filterAdverts(); await this.filterAdverts();
await this.addToDB(); await this.addToDB();
await this.addToMongo();
} }
/**
*
* @param location
* @returns {Promise<void>}
*/
async go(location = 'london') { async go(location = 'london') {
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`); this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch'); // this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
// Glasgow // Glasgow

View File

@ -0,0 +1,81 @@
/**
* Created by WebStorm.
* User: martin
* Date: 10/09/2020
* Time: 16:07
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
exports.markApplied = (req, res) => {
console.log('>V2 markApplied req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const aid = req.params.id;
const now = new Date().getTime();
// touchOne
console.log('aid', aid);
Jobs.updateMany({ '_id':aid }, { '$set': { 'data.applied':now } } ).then((data) => {
console.log(data);
res.status(200).end();
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
/*
dbmanager.appliedOne({ aid, a })
.then((data) => {
console.log(data);
res.status(200).end();
})
.catch((err) => {
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
*/
};
exports.markAllRead = (req, res) => {
console.log('>V2 markAllRead req', req.params);
const now = new Date().getTime();
Jobs.updateMany({ 'data.read':0 }, { '$set': { 'data.read':now } } ).then((data) => {
console.log(data);
res.status(200).end();
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
/*
dbmanager.markAllRead()
.then((data) => {
console.log(data);
res.status(200).end();
})
.catch((err) => {
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
*/
};

View File

@ -0,0 +1,124 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:45
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const killNLDoubleSpace = /(\\n)\s{2,}|(\\n)|\s{2,}/g;
function reduceList(data) {
if (arguments.length === 0 || arguments[0] === null ) return '';
const outObj = data.map((v) => {
const o = Utils.extractFromObj({...v.details,...v.data, _id:v._id},['title','site', 'company', 'timestamp', 'read', 'applied', 'jobtype', 'class', 'autoclass']);
o._id = v._id;
return o;
});
// console.log(data);
return outObj;
}
function reduceRecord(record) {
// console.log('Reducderecord', record);
let outRec = {...record.details,data:record.data,_id:record._id};
return outRec;
}
exports.getList = (req, res) => {
console.log('>getList req', req.params);
Jobs.find({}, { 'details.title':1, 'details.site':1, 'details.company':1, 'data':1, '_id':1 }).limit(200).sort( { 'data.timestamp': -1 } ).then((doc) => {
if (doc) {
res.send(reduceList(doc));
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.getJob = (req, res) => {
console.log('>getJob req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
const item = reduceRecord(doc._doc);
const date = new Date( item.timestamp * 1000);
console.log(item);
item.date = date.toLocaleString();
item.title = item.title.replace(killNLDoubleSpace, ' ');
res.send(item);
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.readJob = (req, res) => {
console.log('>readJob req', req.params);
let id;
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
else
id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
let fullDoc = Object.assign({}, doc._doc);
console.log('fullDoc', fullDoc);
if (!Utils.isEmpty(fullDoc)){
fullDoc.data.read = new Date().getTime();
Jobs.findByIdAndUpdate(id, fullDoc, {'new':true}).then((doc) => {
console.log(doc._doc);
res.status(200).end();
}).catch((err) => {
console.error('inside',err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
}
}
}).catch((err) => {
console.error('outer', err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

View File

@ -0,0 +1,89 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:08
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
function load() {
const file = fs.readFileSync('brain.json');
classifier = bayes.fromJson(file);
}
function save() {
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();
exports.upvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'good');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.downvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'bad');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

47
server/dist/3rdpartylicenses.txt vendored Normal file
View File

@ -0,0 +1,47 @@
css-loader
MIT
Copyright JS Foundation and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
zone.js
MIT
The MIT License
Copyright (c) 2010-2020 Google LLC. http://angular.io/license
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
!function(e){function r(r){for(var n,l,f=r[0],i=r[1],p=r[2],c=0,s=[];c<f.length;c++)l=f[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(a&&a(r);s.length;)s.shift()();return u.push.apply(u,p||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,f=1;f<t.length;f++)0!==o[t[f]]&&(n=!1);n&&(u.splice(r--,1),e=l(l.s=t[0]))}return e}var n={},o={0:0},u=[];function l(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,l),t.l=!0,t.exports}l.m=e,l.c=n,l.d=function(e,r,t){l.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,r){if(1&r&&(e=l(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(l.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)l.d(t,n,(function(r){return e[r]}).bind(null,n));return t},l.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(r,"a",r),r},l.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},l.p="";var f=window.webpackJsonp=window.webpackJsonp||[],i=f.push.bind(f);f.push=r,f=f.slice();for(var p=0;p<f.length;p++)r(f[p]);var a=i;t()}([]);

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,24 @@
/**
* Created by WebStorm.
* User: martin
* Date: 10/09/2020
* Time: 16:06
*/
/**
* Created by WebStorm.
* User: martin
* Date: 25/05/2020
* Time: 13:36
*/
const apply = require('../controllers/apply.v2.controller');
module.exports = (app) => {
app.route('/v2/apply/:id')
.put(apply.markApplied);
app.route('/v2/readall')
.put(apply.markAllRead);
};

View File

@ -0,0 +1,17 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:42
*/
const jobs = require('../controllers/jobs.v2.controller');
module.exports = (app) => {
app.route('/v2/jobs')
.get(jobs.getList);
app.route('/v2/jobs/:id')
.get(jobs.getJob)
.put(jobs.readJob);
};

View File

@ -0,0 +1,17 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:07
*/
const vote = require('../controllers/vote.controller');
module.exports = (app) => {
app.route('/vote/up/:id')
.put(vote.upvote);
app.route('/vote/down/:id')
.put(vote.downvote);
};

View File

@ -58,7 +58,9 @@ app.use(bodyParser.json());
app.post('/auth', auth.auth); app.post('/auth', auth.auth);
require('./routes/jobs.route')(app); require('./routes/jobs.route')(app);
require('./routes/apply.route')(app); require('./routes/jobs.v2.route')(app);
require('./routes/apply.v2.route')(app);
require('./routes/vote.route')(app);
app.listen(serverPort, () => { app.listen(serverPort, () => {
console.log(`Server is listening on port ${serverPort}`); console.log(`Server is listening on port ${serverPort}`);

File diff suppressed because one or more lines are too long

View File

@ -20,7 +20,7 @@ const indeedScraper = new IndeedScraper();
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html'); // const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
const page = fs.readFileSync('data/indeed/page2.html'); const page = fs.readFileSync('data/indeed/page2.html');
test.test('Test Indeed scraper', async t => { test.skip('Test Indeed scraper', async t => {
const $ = cheerio.load(page); const $ = cheerio.load(page);
indeedScraper.loadPage($); indeedScraper.loadPage($);
@ -35,13 +35,36 @@ test.test('Test Indeed scraper', async t => {
await indeedScraper.filterAdverts(); await indeedScraper.filterAdverts();
// await indeedScraper.addToDB(); await indeedScraper.addToMongo();
t.end(); t.end();
}); });
test.test('Test full run Indeed scraper', async t => { test.skip('Test full run Indeed scraper', async t => {
await indeedScraper.go('london'); await indeedScraper.go('london').catch((err) => {
console.error('Indeed GO', err);
});
t.end();
});
test.test('Test Indeed scraper -- MONGO', async t => {
const $ = cheerio.load(page);
indeedScraper.loadPage($);
await indeedScraper.breakPage();
// await indeedScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
// console.log(indeedScraper.items);
// await indeedScraper.filterAdverts();
await indeedScraper.addToMongo();
t.end(); t.end();
}); });

View File

@ -26,13 +26,14 @@ const s1jobsScraper = new RssS1Jobs();
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml'); const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
test.test('Test Jobserve scraper', async t => { test.test('Test Jobserve scraper', async t => {
let url = 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml';
await s1jobsScraper.setStartUrl(url); await s1jobsScraper.setStartUrl(url);
s1jobsScraper.reduceItems(); s1jobsScraper.reduceItems();
await s1jobsScraper.filterAdverts(); await s1jobsScraper.filterAdverts();
await s1jobsScraper.addToDB(); // await s1jobsScraper.addToDB();
t.end(); t.end();
}); });

View File

@ -19,17 +19,17 @@ const testScraper = new RssTechnojobs();
const feed = fs.readFileSync('test/data/technojobs/page1'); const feed = fs.readFileSync('test/data/technojobs/page1');
test.test('Test Technojobs scraper', async t => { test.test('Test Technojobs scraper', async t => {
// await testScraper.loadFeed(feed); await testScraper.loadFeed('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
// testScraper.reduceItems(); await testScraper.reduceItems();
// await s1jobsScraper.filterAdverts(); await s1jobsScraper.filterAdverts();
// await s1jobsScraper.addToDB(); // await s1jobsScraper.addToDB();
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1') /* await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1') await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1') await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
*/
t.end(); t.end();
}); });

View File

@ -31,7 +31,7 @@ test.test('Test Totaljobs scraper', async t => {
await totaljobsScraper.getJobPages(); await totaljobsScraper.getJobPages();
// console.log(await indeedScraper.checkNext()); // console.log(await indeedScraper.checkNext());
console.log(totaljobsScraper.items); // console.log(totaljobsScraper.items);
await totaljobsScraper.filterAdverts(); await totaljobsScraper.filterAdverts();

14
test/wip.js Normal file
View File

@ -0,0 +1,14 @@
/**
* Created by WebStorm.
* User: martin
* Date: 23/07/2020
* Time: 09:26
*/
const { Corpus } = require('../lib/corpus');
const text = 'ESTAMP DEVELOPER 6 month contract £450-525 / day Developer, SQL, Photoshop, Javascript,  NET, C#, Javascript Advanced knowledge of SQL Server TSQL Experience of the design and  PDF stamp development E-STAMP DEVELOPER 6 month contract';
const out = Corpus.process(text);
console.log(out);

71
testgrabber.js Normal file
View File

@ -0,0 +1,71 @@
/**
* Created by WebStorm.
* User: martin
* Date: 16/04/2020
* Time: 23:35
*/
const CronJob = require('cron').CronJob;
const IndeedScraper = require('./scrapers/indeed');
const TotaljobsScraper = require('./scrapers/totaljobs');
const CwjobsScraper = require('./scrapers/cwjobs');
const JobserveScraper = require('./scrapers/rss.jobserve');
const RssS1Jobs = require('./scrapers/rss.s1jobs');
const RssTechnojobs = require('./scrapers/rss.technojobs');
(async function () {
console.log('Started..');
const indeedScraper = new IndeedScraper();
const totaljobsScraper = new TotaljobsScraper();
const cwjobsScraper = new CwjobsScraper();
const jobserveScraper = new JobserveScraper();
const s1jobsScraper = new RssS1Jobs();
const technojobsScraper = new RssTechnojobs();
await indeedScraper.go('london');
await totaljobsScraper.go('london');
await cwjobsScraper.go('london');
await indeedScraper.go('glasgow');
await totaljobsScraper.go('glasgow');
await cwjobsScraper.go('glasgow');
await indeedScraper.go('edinburgh');
await totaljobsScraper.go('edinburgh');
await cwjobsScraper.go('edinburgh');
await indeedScraper.go('milton keynes');
await totaljobsScraper.go('milton keynes');
await cwjobsScraper.go('milton keynes');
/*
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
*/
})();

1
unused.json Normal file

File diff suppressed because one or more lines are too long

22
words.js Normal file
View File

@ -0,0 +1,22 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 10:08
*/
const jsonfile = require('jsonfile');
const data = require('./unused.json');
function show(size) {
const f = data.filter((v) => {
return (v.length === size);
});
jsonfile.writeFileSync('limited.json', [...new Set(f)]);
console.log('done');
}
show(11);