Compare commits

..

No commits in common. "development" and "svelte-updates" have entirely different histories.

51 changed files with 576 additions and 14582 deletions

View File

@ -1,32 +0,0 @@
; http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
[*.txt]
insert_final_newline = false
trim_trailing_whitespace = false
[*.py]
indent_size = 4
[*.m]
indent_size = 4
[Makefile]
indent_style = tab
indent_size = 8
[*.{js,json}]
indent_style = space
indent_size = 2
[*.md]
trim_trailing_whitespace = false

View File

@ -9,7 +9,7 @@
"env": {
"browser": true,
"node": true,
"es2017": true
"es6": true
},
"rules": {
"arrow-spacing": "error",

1
.gitignore vendored
View File

@ -147,4 +147,3 @@ fabric.properties
/live/
!/output/
/db/jobs.db
!/db/

File diff suppressed because one or more lines are too long

View File

@ -1,204 +0,0 @@
{
"categories": {
"good": true,
"bad": true
},
"docCount": {
"good": 43,
"bad": 5
},
"totalDocuments": 48,
"vocabulary": {
"tsql": true,
"developer": true,
"contract": true,
"web": true,
"javascript": true,
"js": true,
"node": true,
"es": true,
"agile": true,
"nodejs": true,
"london": true,
"aws": true,
"sql": true,
"postgresql": true,
"mysql": true,
"docker": true,
"ecs": true,
"automation": true,
"jslint": true,
"jshint": true,
"vuejs": true,
"vue": true,
"nginx": true,
"remotely": true,
"mvc": true,
"remote": true,
"iot": true,
"mqtt": true,
"es6": true,
"es2016": true,
"es2017": true,
"es2018": true,
"react": true,
"redux": true,
"graphql": true,
"java": true,
"reactjs": true,
"apps": true,
"html": true,
"css": true,
"code": true,
"angular": true,
"ember": true,
"restful": true,
"apis": true,
"infrastructure": true,
"software": true,
"native": true,
"med": true,
"mobile": true,
"client": true,
"applications": true,
"digital": true,
"analytics": true,
"dashboarding": true,
"online": true,
"analyse": true,
"dashboards": true,
"google": true,
"query": true,
"data": true,
"stakeholders": true,
"enhancements": true,
"requirements": true,
"c": true,
"net": true,
"technologies": true,
"azure": true,
"understanding": true,
"devops": true,
"tools": true,
"frameworks": true,
"scotland": true,
"responsibility": true,
"programme": true,
"functions": true,
"asp": true,
"project": true,
"transform": true,
"collaborative": true,
"technical": true,
"framework": true,
"nhibernate": true,
"server": true,
"api": true,
"development": true,
"lifecycle": true,
"specification": true,
"appointments": true
},
"vocabularySize": 89,
"wordCount": {
"good": 157,
"bad": 5
},
"wordFrequencyCount": {
"good": {
"tsql": 1,
"developer": 6,
"contract": 9,
"web": 6,
"javascript": 7,
"js": 3,
"node": 2,
"es": 1,
"agile": 2,
"nodejs": 1,
"london": 3,
"aws": 3,
"sql": 3,
"postgresql": 1,
"mysql": 1,
"docker": 1,
"ecs": 1,
"automation": 1,
"jslint": 1,
"jshint": 1,
"vuejs": 1,
"vue": 2,
"nginx": 1,
"remotely": 1,
"mvc": 5,
"remote": 2,
"iot": 1,
"mqtt": 1,
"es6": 1,
"es2016": 1,
"es2017": 1,
"es2018": 1,
"apps": 1,
"html": 5,
"css": 5,
"code": 2,
"react": 2,
"angular": 1,
"ember": 1,
"restful": 1,
"apis": 1,
"infrastructure": 1,
"software": 2,
"native": 1,
"med": 1,
"mobile": 1,
"client": 4,
"applications": 2,
"digital": 2,
"analytics": 1,
"dashboarding": 1,
"online": 1,
"analyse": 1,
"dashboards": 1,
"google": 1,
"query": 1,
"data": 1,
"stakeholders": 1,
"enhancements": 3,
"requirements": 3,
"c": 4,
"net": 5,
"technologies": 4,
"azure": 2,
"understanding": 1,
"devops": 2,
"tools": 1,
"frameworks": 1,
"scotland": 1,
"responsibility": 1,
"programme": 1,
"functions": 1,
"asp": 1,
"project": 1,
"transform": 1,
"collaborative": 1,
"technical": 1,
"framework": 1,
"nhibernate": 1,
"server": 1,
"api": 1,
"development": 1,
"lifecycle": 1,
"specification": 1,
"appointments": 1
},
"bad": {
"react": 1,
"redux": 1,
"graphql": 1,
"java": 1,
"reactjs": 1
}
},
"options": {}
}

Binary file not shown.

View File

@ -38,7 +38,6 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
}, null, true);
new CronJob('0 6-23/1 * * *', async function() {
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
@ -55,13 +54,13 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
/* await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');

View File

@ -8,12 +8,6 @@
const filterReject = require('../lib/filter_reject');
const filterAccept = require('../lib/filter_md_jobs');
const dbmanager = require('../lib/dbmanager');
const JobsModel = require('../lib/mongoManager');
const SHA = require('crypto-js/sha256');
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./corpus');
class MasterBase {
@ -63,79 +57,6 @@ class MasterBase {
});
}
/**
*
*/
addToMongo() {
console.log('>> ADD TO MONGO!');
for(const item of this.items) {
// console.log('add', item);
const newObj = this.reduceData(item);
const newJob = new JobsModel(newObj);
newJob.save().then((m) => {
console.log('m', m.details.title);
}).catch((err) => {
console.error('m', err);
});
}
}
/**
*
* @param inval
* @returns {number}
*/
analyseRate(inval) {
console.log('analyseRate', inval);
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
else return 0;
return outVal;
}
/**
*
* @param d
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
*/
reduceData(d) {
const clearPremium = /(\n+)(Featured|Premium)/gi;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.details.title = outObj.details.title.replace(clearPremium, '');
outObj.details.title = outObj.details.title.replace(otherStupid, '');
outObj.details.hashed = SHA(outObj.details.summary);
outObj.data.read = 0;
outObj.data.applied = d.applied || 0;
outObj.data.jobtype = this.analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
/**
*
* @returns {Promise<void>}
@ -199,15 +120,10 @@ class MasterBase {
return `https://image.silvrtree.co.uk/q${q}/${url}`;
}
/**
*
* @returns {Promise<void>}
*/
async go() {
this.items = [];
this.rawItems = [];
}
}
module.exports = MasterBase;

View File

@ -1,91 +0,0 @@
const jsonfile = require('jsonfile');
const words = require('../lib/wordlist.json');
const wordsAdditional = require('../lib/wordlistAdditional.json');
const bigList = new Map([]);
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es',
'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs',
'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote',
'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
let unrated = [];
var _global = typeof global === 'undefined' ? window : global;
var Corpus = (_global.Corpus = _global.Corpus || {});
const emailRegex = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
const detagRegex = /(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)/gi;
const desymbolNumberRegex = /[\n\t+$,\?\.\%\*=&:;()\\/\-£…"]|\d+/gi;
const deSpace = /\s+/g;
function cleanText(intext) {
if (arguments.length === 0 || typeof intext === 'undefined' || intext === null ) return '';
return intext.replace(emailRegex, ' ').replace(detagRegex, ' ').replace(desymbolNumberRegex, ' ').replace(deSpace, ' ').trim().toLowerCase();
}
function dedupe(intext) {
if (arguments.length === 0 || intext === null ) return [];
return [...new Set(intext)];
}
function incItem(item) {
if (bigList.has(item))
bigList.set(item, bigList.get(item) + 1);
else
bigList.set(item, 1);
}
/**
* Process the body
* @param intext
* @returns {{score: number, bad: *, good: *}}
*/
Corpus.process = function(intext) {
const workText = cleanText(intext);
const workArray = workText.split(' ');
const cleanedArray = dedupe(workArray).filter((v) => {
return (words.indexOf(v) === -1 && wordsAdditional.indexOf(v) === -1);
});
const good = cleanedArray.filter((v) => {
return (goodWords.indexOf(v) !== -1);
});
const bad = cleanedArray.filter((v) => {
return (badWords.indexOf(v) !== -1);
});
const unused = cleanedArray.filter((v) => {
return ((badWords.indexOf(v) === -1) && (goodWords.indexOf(v) === -1));
});
cleanedArray.map((item) => {
incItem(item);
});
unrated = [...unrated, ...unused];
const score = good.length - (bad.length * 5);
// console.log('unused', unused);
return { good, bad, score, 'words':cleanedArray };
};
Corpus.exportUnused = function() {
jsonfile.writeFileSync('./unused.json', dedupe(unrated));
jsonfile.writeFileSync('./biglist.json', [...bigList].sort((a, b) => b[1] - a[1]));
console.log([...bigList]);
};
if (typeof module !== 'undefined')
module.exports = {
'Corpus': Corpus
};

View File

@ -1,34 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 17:00
*/
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const JobsModel = require('../models/jobs');
// const { Utils } = require('@rakh/utils');
require('dotenv').config();
logger.level = 'debug';
const mongoConnect = process.env.MONGOCONNECT;
// logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
// mongoose.connect('mongodb://martin:1V3D4m526i@127.0.0.1/jobs');
logger.debug(mongoConnect);
mongoose.connect(mongoConnect);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
module.exports = JobsModel;

View File

@ -89,7 +89,6 @@ class MasterRSS extends MasterBase {
await this.filterAdverts();
if (this.items.length > 0) await this.addToDB();
if (this.items.length > 0) await this.addToMongo();
}
else
console.log('No items to process');

View File

@ -20,15 +20,10 @@ class MasterScraper extends MasterBase {
constructor() {
super();
}
/**
*
* @param url
* @param useStone
* @returns {Promise<unknown>}
*/
getContent(url, useStone = false) {
/*
let headers = new Headers({
"Accept" : "application/json",
@ -59,28 +54,19 @@ fetch(url, {
resolve(response.body);
})
.catch((e) => {
console.error('getContent', e );
reject(e.response.body);
});
});
};
async savePage(html) {
const now = fecha.format(new Date(), 'YYYY-MM-DD--hh');
const filename = `pages/${this.siteid}-${now}.html`;
fs.writeFileSync(filename, html);
}
async getPage() {
console.log('>> getPage: fetching', this.url);
const now = fecha.format(new Date(), 'YYYY-MM-DD--hhmmss');
const filename = `${this.siteid}-${now}.html`;
await this.getContent(this.url, this.useStone)
.then((html) => {
// console.log('>> getPage:: got', html);
console.log('>> getPage:: OK');
if (this.saveFile) this.savePage(html);
fs.writeFileSync(filename, html);
const $ = cheerio.load(html);
this.loadPage($);
})
@ -89,59 +75,30 @@ fetch(url, {
// Site specific parts below here
/**
* Break each page into items
* @returns {Promise<void>}
*/
async breakPage() {
}
/**
*
* @param part
* @returns {Promise<void>}
*/
async extractDetails(part) {
}
/**
*
* @returns {Promise<void>}
*/
async checkNext() {
}
/**
*
* @returns {Promise<void>}
*/
async processSite() {
}
/**
*
* @returns {Promise<void>}
*/
async getIndividualPage() {
}
/**
*
* @returns {Promise<void>}
*/
async getJobPages() {
}
/**
*
* @returns {Promise<void>}
*/
async go() {
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,559 +0,0 @@
[
"experienced",
"exceptional",
"maintaining",
"familiarity",
"commodities",
"opportunity",
"possibility",
"integration",
"engineering",
"derivatives",
"prefferable",
"nutritional",
"performance",
"immediately",
"information",
"responsible",
"environment",
"stakeholder",
"proactively",
"requirement",
"temporarily",
"interrogate",
"effectively",
"progressing",
"substantial",
"identifying",
"maintenance",
"workarounds",
"departments",
"consultancy",
"regulations",
"statistical",
"previously·",
"euromonitor",
"documenting",
"bookkeeping",
"reconciling",
"hardworking",
"themselves!",
"appropriate",
"socialising",
"fundraising",
"initiatives",
"sponsorship",
"orientation",
"competitive",
"illustrator",
"outstanding",
"interaction",
"consistency",
"touchpoints",
"freshtechit",
"recruitment",
"catastrophe",
"accountable",
"workstreams",
"scalability",
"undertaking",
"interacting",
"significant",
"considering",
"independent",
"collaborate",
"arrangement",
"unsolicited",
"empowerment",
"connections",
"specialists",
"credentials",
"personality",
"established",
"northampton",
"advertising",
"operational",
"mathematics",
"contractors",
"instruments",
"referencing",
"locationsco",
"disciplines",
"corporation",
"investments",
"conferences",
"demonstrate",
"directorate",
"acknowledge",
"legislation",
"designgreat",
"understands",
"perspective",
"association",
"enforcement",
"prestigious",
"individuals",
"alternative",
"technically",
"challenging",
"discussions",
"lifeworking",
"interactive",
"storyboards",
"communicate",
"abilitywork",
"englishgood",
"detailbonus",
"angularwhat",
"neededabout",
"innovations",
"enthusiasts",
"instructors",
"prospective",
"comfortable",
"involvement",
"adventurous",
"marketplace",
"forecasting",
"contractual",
"underpinned",
"acquisition",
"microsofts",
"progression",
"suggestions",
"proficiency",
"participate",
"joblocation",
"methodology",
"continually",
"cataloguing",
"projectgood",
"incremental",
"overarching",
"confidently",
"circulatory",
"adjustments",
"interesting",
"consultants",
"experienceb",
"hourscasual",
"switzerland",
"contributes",
"participant",
"improvement",
"articulates",
"contributed",
"comfortably",
"deployments",
"integrating",
"configuring",
"platforming",
"educatedday",
"contracting",
"monthstotal",
"outsourcing",
"designswork",
"ideasdesign",
"deviceswork",
"fundamental",
"businessjob",
"implemented",
"transaction",
"reliability",
"upgradesyou",
"uncertainty",
"enterpriser",
"teamprovide",
"trafficking",
"doubleclick",
"communities",
"forestlink",
"dimensional",
"coordinator",
"spreadsheet",
"pressurised",
"assignments",
"willingness",
"certificate",
"summaryrole",
"institution",
"segregation",
"preparation",
"electronics",
"duplication",
"surrounding",
"informatica",
"blackfriars",
"terminology",
"shabarinath",
"interfacing",
"expectation",
"proprietary",
"conflicting",
"itecopeople",
"opowershell",
"submissions",
"negotiating",
"escalations",
"transferred",
"protections",
"customizing",
"oxfordshire",
"progressive",
"bishopsgate",
"partnership",
"futureheads",
"permissions",
"efficiently",
"unspecified",
"potentially",
"disclaimers",
"foreseeable",
"sustainable",
"calculation",
"replication",
"constitutes",
"recommended",
"enterprises",
"negotiation",
"imaginative",
"differences",
"nationality",
"impediments",
"refinements",
"translating",
"obligations",
"flexibility",
"unashamedly",
"exclusively",
"replacement",
"essentially",
"artifactory",
"theoretical",
"probability",
"integrators",
"contractor?",
"interested?",
"functioning",
"chamberlain",
"inclusivity",
"iteratively",
"enhancement",
"constraints",
"establishes",
"qualitative",
"influencing",
"procurement",
"experiences",
"furthermore",
"disciplined",
"unnecessary",
"bureaucracy",
"represented",
"siteimprove",
"lokhandwala",
"specialises",
"rationalize",
"competncies",
"restoration",
"allocations",
"admittances",
"furnishings",
"cleanliness",
"residential",
"contactable",
"conventions",
"translation",
"approaching",
"intecselect",
"linguistics",
"southampton",
"beautifully",
"estimations",
"newsletters",
"summarising",
"simulations",
"portfolio's",
"coronavirus",
"opoortunity",
"unavailable",
"accordingly",
"penetration",
"remediation",
"elimination",
"achievement",
"facilitator",
"westminster",
"introducing",
"businesses'",
"capitalists",
"investigate",
"countryside",
"problematic",
"coordinates",
"components'",
"supervision",
"bonavolonta",
"proposition",
"foundations",
"suitability",
"researchers",
"explanation",
"commitments",
"computation",
"questioning",
"experiments",
"visualfiles",
"cloudstream",
"determining",
"deliverable",
"inquisitive",
"backgrounds",
"thoughtspot",
"specialized",
"veloppement",
"importantes",
"typedscript",
"restaurants",
"prophylaxis",
"transmitted",
"appointment",
"encouraging",
"aggregating",
"championing",
"conjunction",
"customising",
"photography",
"authorities",
"competition",
"collections",
"contraintes",
"fonctionnel",
"adaptabilit",
"changements",
"conceptions",
"utilisation",
"shortlisted",
"reusability",
"recognizing",
"decisioning",
"accommodate",
"limitations",
"resourceful",
"algorithmic",
"unconcerned",
"intelligent",
"considerate",
"clientbased",
"accelerator",
"dreamweaver",
"applicant's",
"proactivity",
"aggregation",
"restriction",
"traditional",
"corporately",
"memberships",
"standardise",
"theecsgroup",
"scarchitect",
"consolidate",
"extensively",
"afghanistan",
"encompasses",
"distinctive",
"professions",
"interviewed",
"formulation",
"transitions",
"aspirations",
"ingredients",
"setterfield",
"candidates",
"leatherhead",
"publication",
"undoubtedly",
"basingstoke",
"underground",
"reinsurance",
"exemplifies",
"civiization",
"developer's",
"bazzelgette",
"adjacencies",
"feasibility",
"frontinvest",
"neogotiable",
"unconnected",
"conditional",
"bottlenecks",
"productions",
"pharmacists",
"technicians",
"prescribing",
"stewardship",
"recognising",
"convictions",
"subscribing",
"transparent",
"wireframing",
"insidehmcts",
"justicejobs",
"criminology",
"hospitality",
"structuring",
"educational",
"substantive",
"secondments",
"transgender",
"smartphones",
"microsoft's",
"definitions",
"validations",
"prioritised",
"autoscaling",
"abstraction",
"correlation",
"recognition",
"contributor",
"apigedevops",
"incorporate",
"woocommerce",
"informatics",
"adfadc@apps",
"automations",
"formulating",
"beneficiary",
"referential",
"jsdevsecops",
"solutioning",
"measurement",
"familiarise",
"eligibility",
"standardize",
"experience?",
"bournemouth",
"implementer",
"agilesphere",
"assumptions",
"accountancy",
"cockroachdb",
"promotional",
"facilitates",
"discoveries",
"bladecenter",
"considered!",
"cooperation",
"exploration",
"angulareact",
"preferabbly",
"harmonising",
"convenience",
"inclusively",
"strategists",
"attribution",
"fromscratch",
"combination",
"solutionize",
"accelerated",
"diagnostics",
"sensibility",
"informative",
"intellegnce",
"specilisits",
"projections",
"associative",
"personalize",
"farnborough",
"necessarily",
"nservicebus",
"constrained",
"prioritized",
"behavioural",
"chakraborty",
"leaderships",
"flourishing",
"uniqstudios",
"simplifying",
"realisation",
"extensions!",
"prioritises",
"experience!",
"candidates!",
"inclination",
"stimulating",
"appreciated",
"reinventing",
"compression",
"jscybsecdev",
"equirements",
"generalized",
"compressors",
"assessments",
"beyondtrust",
"engagements",
"numerically",
"electricity",
"interchange",
"jsswift_dev",
"circulating",
"attachments",
"credibility",
"vnetpeering",
"territories",
"staggering!",
"developers!",
"peripherals",
"virtualized",
"bitdefender",
"jssitecorjs",
"positioning",
"appreciates",
"chessington",
"controllers",
"controlling",
"quantifying",
"virtualised",
"manufacture",
"fluorescent",
"governments",
"bigcommerce",
"therapeutic",
"importantly",
"differently",
"rigourously",
"shareholder",
"copywriting",
"anticipated",
"approximate",
"behdarvandi",
"testability",
"beneficial!",
"jswmibmcraw",
"exhibitions",
"talentpoint",
"propagation",
"interviews!",
"solutionise",
"elasticache",
"manoeuvring",
"teamservice",
"geographies",
"efficientip",
"organically",
"advancement",
"jshodanular",
"wholesalers",
"multitenant",
"encouraged?",
"freelancers",
"composition",
"#jobswagger",
"typographic",
"stereotypes",
"clerkenwell",
"sacrificing",
"resolutions",
"technology?",
"advantagous"
]

View File

@ -1,22 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 15:34
*/
const jsonfile = require('jsonfile');
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
const brain = new Map([]);
for(let i = 0;i < goodWords.length - 1;i++)
brain.set(goodWords[i], 3);
for(let i = 0;i < badWords.length - 1;i++)
brain.set(badWords[i], -5);
jsonfile.writeFileSync('brain.json', [...brain]);

View File

@ -1,156 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 10:20
*/
const db = require('./lib/connect');
const log4js = require('log4js');
const logger = log4js.getLogger();
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./lib/corpus');
const SHA = require('crypto-js/sha256');
/*
2604
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const Jobs = require('./models/jobs');
require('dotenv').config();
logger.level = 'debug';
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
*/
const Jobs = require('./lib/mongoManager');
const migrate = (function() {
function analyseRate(inval) {
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
else return 0;
return outVal;
}
function reduceData(d) {
const clearPremium = /(\n+)(Featured|Premium)/gi;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.details.title = outObj.details.title.replace(clearPremium, '');
outObj.details.title = outObj.details.title.replace(otherStupid, '');
outObj.details.hashed = SHA(outObj.details.summary);
// outObj.data.read = d.read || 0;
outObj.data.read = 0;
outObj.data.applied = d.applied || 0;
outObj.data.jobtype = analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
function getCurrent() {
const outgoing = [];
console.log('get version');
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
return new Promise((resolve, reject) => {
db.all(sql, [], (err, rows) => {
if (err)
reject(err);
rows.forEach((row) => {
outgoing.push(row);
});
resolve(outgoing) ;
});
});
}
async function start() {
await getCurrent().then(async (d) => {
logger.debug(d.length);
for (let t = 0;t < (d.length - 1);t++) {
const newD = reduceData(d[t]);
// logger.debug(newD);
const newJob = Jobs(newD);
await newJob.save().then((m) => {
logger.debug('m', m.details.title);
}).catch((err) => {
logger.error(err.keyPattern);
});
}
}).then(() => {
logger.debug('SAVING!!');
Corpus.exportUnused();
})
.catch((err) => {
logger.error(err.keyPattern);
});
}
async function deleteOld() {
const oneDay = 86400000;
const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay);
logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo);
logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } });
Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => {
logger.debug('m', m);
}).catch((err) => {
logger.error(err);
});
}
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
return {
'start':start,
'deleteOld': deleteOld
};
})();
(async function() {
await migrate.start();
await migrate.deleteOld();
logger.info('Done??');
})();

View File

@ -1,47 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 14:18
*/
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
const jobSchema = new Schema({
'details': {
'title': { 'type': String, 'required': true },
'site': { 'type': String, 'required': true },
'url': { 'type': String, 'required': true, 'unique': true },
'id': String,
'summary': String,
'company': String,
'location': String,
'postdate': String,
'salary': String,
'easyapply': Number,
'timestamp': Number,
'hashed' : { 'type': String, 'required':true, 'unique':true }
},
'data': {
'read': { 'type': Number, 'default': 0 },
'applied': { 'type': Number, 'default': 0 },
'jobtype': { 'type': Number, 'default': 0 },
'class': { 'type': Number, 'default': 0 },
'autoclass': {
'good': Array,
'bad': Array,
'words': Array,
'score': { 'type': Number, 'default': 0 }
},
'timestamp': { 'type': Number, 'default': 0 },
'created_at': { 'type': Date, 'default': Date.now }
}
});
mongoose.set('useFindAndModify', false);
const Jobs = mongoose.model('Jobs', jobSchema);
module.exports = Jobs;

View File

@ -1,66 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 16/04/2020
* Time: 23:35
*/
const CronJob = require('cron').CronJob;
const IndeedScraper = require('./scrapers/indeed');
const TotaljobsScraper = require('./scrapers/totaljobs');
const CwjobsScraper = require('./scrapers/cwjobs');
const JobserveScraper = require('./scrapers/rss.jobserve');
const RssS1Jobs = require('./scrapers/rss.s1jobs');
const RssTechnojobs = require('./scrapers/rss.technojobs');
(async function () {
console.log('Started..');
const indeedScraper = new IndeedScraper();
const totaljobsScraper = new TotaljobsScraper();
const cwjobsScraper = new CwjobsScraper();
const jobserveScraper = new JobserveScraper();
const s1jobsScraper = new RssS1Jobs();
const technojobsScraper = new RssTechnojobs();
await indeedScraper.go('london');
await totaljobsScraper.go('london');
await cwjobsScraper.go('london');
await indeedScraper.go('glasgow');
await totaljobsScraper.go('glasgow');
await cwjobsScraper.go('glasgow');
await indeedScraper.go('edinburgh');
await totaljobsScraper.go('edinburgh');
await cwjobsScraper.go('edinburgh');
await indeedScraper.go('milton keynes');
await totaljobsScraper.go('milton keynes');
await cwjobsScraper.go('milton keynes');
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
})();

1296
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +1,23 @@
{
"name": "jobscraper",
"version": "1.0.2",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"release": "vik patch -t",
"grabber": "node grabber.js",
"server" : "node server/server.js"
"grabber": "node grabber.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"@rakh/utils": "^1.0.0",
"axios": "^0.19.2",
"bayes": "^1.0.0",
"body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.3",
"cron": "^1.8.2",
"crypto-js": "^4.0.0",
"dotenv": "^8.2.0",
"eslint": "^6.8.0",
"express": "^4.17.1",
"fecha": "^4.2.0",
"got": "^11.2.0",
"jsonfile": "^6.0.1",
"log4js": "^6.3.0",
"mongoose": "^5.9.25",
"present": "^1.0.0",
"rss-parser": "^3.8.0",
"sqlite3": "^4.1.1",

View File

@ -1,45 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 10:51
*/
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
// teach it positive phrases
async function load() {
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt', 'es6', 'es2016', 'es2017', 'es2018', 'freelance'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
for(let i = 0;i < goodWords.length - 1;i++)
await classifier.learn(goodWords[i], 'good');
for(let i = 0;i < badWords.length - 1;i++)
await classifier.learn(badWords[i], 'bad');
// now ask it to categorize a document it has never seen before
console.log(await classifier.categorize(['ui', 'developer', 'london', 'react'].join(',')));
console.log(await classifier.categorize(['mysql', 'react', 'js', 'node', 'docker', 'kubernetes', 'google'].join(',')));
// serialize the classifier's state as a JSON string.
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();

View File

@ -20,7 +20,7 @@ class CwjobsScraper extends TotaljobsScraper {
}
async go(location = 'london') {
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
// Glasgow

View File

@ -11,7 +11,7 @@ const cheerio = require('cheerio');
const MasterScraper = require('../lib/scraper');
class IndeedScraper extends MasterScraper {
constructor() {
super();
this.siteurl = 'www.indeed.co.uk';
@ -23,15 +23,15 @@ class IndeedScraper extends MasterScraper {
this.antiAd = /sja\d+/gi;
}
// Site specific parts below here
async breakPage() {
const $ = this.currentPage;
const ads = [];
const sections = $('div.row.result');
await sections.each(async (index, item) => {
// console.log($(item).html());
const ad = await this.extractDetails(item);
@ -73,7 +73,7 @@ class IndeedScraper extends MasterScraper {
return newObj;
}
async getIndividualPage(item) {
const newItem = {...item};
console.log('Getting', item.url);
@ -100,10 +100,10 @@ class IndeedScraper extends MasterScraper {
async checkNext() {
const $ = this.currentPage;
const next = $('.pagination > *:last-child').attr('href') || '';
if (next !== '')
if (next !== '')
// next = `https://${ this.siteurl }${next}`;
this.makeUrl(next);
console.log(next);
}
@ -122,9 +122,9 @@ class IndeedScraper extends MasterScraper {
await this.checkNext();
await this.getJobPages();
// nextPage = await this.checkNext();
// if (nextPage === previousPage) nextPage = '';
// this.setStartUrl(nextPage);
@ -133,19 +133,16 @@ class IndeedScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
async go(location = 'london') {
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Angular+Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
this.setStartUrl(`https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=${encodeURIComponent(location)}&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch`);
await this.processSite().catch((err) => {
console.error('Indeed Go', err);
});
await this.processSite();
console.log(`Indeed ${location} completed`);
}
}
module.exports = IndeedScraper;

View File

@ -140,7 +140,6 @@ class IndeedMobileScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
async go(location = 'london') {

View File

@ -22,10 +22,7 @@ class TotaljobsScraper extends MasterScraper {
}
// Site specific parts below here
/**
*
* @returns {Promise<void>}
*/
async breakPage() {
const $ = this.currentPage;
const ads = [];
@ -42,11 +39,6 @@ class TotaljobsScraper extends MasterScraper {
this.items = [...this.items, ...ads];
}
/**
*
* @param part
* @returns {Promise<{}>}
*/
async extractDetails(part) {
const newObj = {};
const $part = cheerio.load(part);
@ -69,11 +61,6 @@ class TotaljobsScraper extends MasterScraper {
return newObj;
}
/**
*
* @param item
* @returns {Promise<*>}
*/
async getIndividualPage(item) {
const newItem = {...item};
console.log('Getting', item.url);
@ -88,10 +75,6 @@ class TotaljobsScraper extends MasterScraper {
return newItem;
}
/**
*
* @returns {Promise<void>}
*/
async getJobPages() {
const newItems = [];
for (let item of this.items) {
@ -103,10 +86,6 @@ class TotaljobsScraper extends MasterScraper {
this.items = [...newItems];
}
/**
*
* @returns {Promise<void>}
*/
async checkNext() {
const $ = this.currentPage;
const next = $('.pagination > *:last-child').attr('href') || '';
@ -117,10 +96,6 @@ class TotaljobsScraper extends MasterScraper {
console.log(next);
}
/**
*
* @returns {Promise<void>}
*/
async processSite() {
console.log('Processing...');
@ -146,16 +121,10 @@ class TotaljobsScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
/**
*
* @param location
* @returns {Promise<void>}
*/
async go(location = 'london') {
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-angular-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
// Glasgow

View File

@ -1,81 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 10/09/2020
* Time: 16:07
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
exports.markApplied = (req, res) => {
console.log('>V2 markApplied req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const aid = req.params.id;
const now = new Date().getTime();
// touchOne
console.log('aid', aid);
Jobs.updateMany({ '_id':aid }, { '$set': { 'data.applied':now } } ).then((data) => {
console.log(data);
res.status(200).end();
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
/*
dbmanager.appliedOne({ aid, a })
.then((data) => {
console.log(data);
res.status(200).end();
})
.catch((err) => {
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
*/
};
exports.markAllRead = (req, res) => {
console.log('>V2 markAllRead req', req.params);
const now = new Date().getTime();
Jobs.updateMany({ 'data.read':0 }, { '$set': { 'data.read':now } } ).then((data) => {
console.log(data);
res.status(200).end();
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
/*
dbmanager.markAllRead()
.then((data) => {
console.log(data);
res.status(200).end();
})
.catch((err) => {
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
*/
};

View File

@ -1,124 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:45
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const killNLDoubleSpace = /(\\n)\s{2,}|(\\n)|\s{2,}/g;
function reduceList(data) {
if (arguments.length === 0 || arguments[0] === null ) return '';
const outObj = data.map((v) => {
const o = Utils.extractFromObj({...v.details,...v.data, _id:v._id},['title','site', 'company', 'timestamp', 'read', 'applied', 'jobtype', 'class', 'autoclass']);
o._id = v._id;
return o;
});
// console.log(data);
return outObj;
}
function reduceRecord(record) {
// console.log('Reducderecord', record);
let outRec = {...record.details,data:record.data,_id:record._id};
return outRec;
}
exports.getList = (req, res) => {
console.log('>getList req', req.params);
Jobs.find({}, { 'details.title':1, 'details.site':1, 'details.company':1, 'data':1, '_id':1 }).limit(200).sort( { 'data.timestamp': -1 } ).then((doc) => {
if (doc) {
res.send(reduceList(doc));
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.getJob = (req, res) => {
console.log('>getJob req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
const item = reduceRecord(doc._doc);
const date = new Date( item.timestamp * 1000);
console.log(item);
item.date = date.toLocaleString();
item.title = item.title.replace(killNLDoubleSpace, ' ');
res.send(item);
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.readJob = (req, res) => {
console.log('>readJob req', req.params);
let id;
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
else
id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
let fullDoc = Object.assign({}, doc._doc);
console.log('fullDoc', fullDoc);
if (!Utils.isEmpty(fullDoc)){
fullDoc.data.read = new Date().getTime();
Jobs.findByIdAndUpdate(id, fullDoc, {'new':true}).then((doc) => {
console.log(doc._doc);
res.status(200).end();
}).catch((err) => {
console.error('inside',err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
}
}
}).catch((err) => {
console.error('outer', err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

View File

@ -1,89 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:08
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
function load() {
const file = fs.readFileSync('brain.json');
classifier = bayes.fromJson(file);
}
function save() {
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();
exports.upvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'good');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.downvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'bad');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

View File

@ -1,47 +0,0 @@
css-loader
MIT
Copyright JS Foundation and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
zone.js
MIT
The MIT License
Copyright (c) 2010-2020 Google LLC. http://angular.io/license
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
!function(e){function r(r){for(var n,l,f=r[0],i=r[1],p=r[2],c=0,s=[];c<f.length;c++)l=f[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(a&&a(r);s.length;)s.shift()();return u.push.apply(u,p||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,f=1;f<t.length;f++)0!==o[t[f]]&&(n=!1);n&&(u.splice(r--,1),e=l(l.s=t[0]))}return e}var n={},o={0:0},u=[];function l(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,l),t.l=!0,t.exports}l.m=e,l.c=n,l.d=function(e,r,t){l.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,r){if(1&r&&(e=l(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(l.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)l.d(t,n,(function(r){return e[r]}).bind(null,n));return t},l.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(r,"a",r),r},l.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},l.p="";var f=window.webpackJsonp=window.webpackJsonp||[],i=f.push.bind(f);f.push=r,f=f.slice();for(var p=0;p<f.length;p++)r(f[p]);var a=i;t()}([]);

File diff suppressed because one or more lines are too long

View File

@ -1,24 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 10/09/2020
* Time: 16:06
*/
/**
* Created by WebStorm.
* User: martin
* Date: 25/05/2020
* Time: 13:36
*/
const apply = require('../controllers/apply.v2.controller');
module.exports = (app) => {
app.route('/v2/apply/:id')
.put(apply.markApplied);
app.route('/v2/readall')
.put(apply.markAllRead);
};

View File

@ -1,17 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:42
*/
const jobs = require('../controllers/jobs.v2.controller');
module.exports = (app) => {
app.route('/v2/jobs')
.get(jobs.getList);
app.route('/v2/jobs/:id')
.get(jobs.getJob)
.put(jobs.readJob);
};

View File

@ -1,17 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:07
*/
const vote = require('../controllers/vote.controller');
module.exports = (app) => {
app.route('/vote/up/:id')
.put(vote.upvote);
app.route('/vote/down/:id')
.put(vote.downvote);
};

View File

@ -58,9 +58,7 @@ app.use(bodyParser.json());
app.post('/auth', auth.auth);
require('./routes/jobs.route')(app);
require('./routes/jobs.v2.route')(app);
require('./routes/apply.v2.route')(app);
require('./routes/vote.route')(app);
require('./routes/apply.route')(app);
app.listen(serverPort, () => {
console.log(`Server is listening on port ${serverPort}`);

File diff suppressed because one or more lines are too long

View File

@ -20,7 +20,7 @@ const indeedScraper = new IndeedScraper();
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
const page = fs.readFileSync('data/indeed/page2.html');
test.skip('Test Indeed scraper', async t => {
test.test('Test Indeed scraper', async t => {
const $ = cheerio.load(page);
indeedScraper.loadPage($);
@ -35,36 +35,13 @@ test.skip('Test Indeed scraper', async t => {
await indeedScraper.filterAdverts();
await indeedScraper.addToMongo();
// await indeedScraper.addToDB();
t.end();
});
test.skip('Test full run Indeed scraper', async t => {
await indeedScraper.go('london').catch((err) => {
console.error('Indeed GO', err);
});
test.test('Test full run Indeed scraper', async t => {
await indeedScraper.go('london');
t.end();
});
test.test('Test Indeed scraper -- MONGO', async t => {
const $ = cheerio.load(page);
indeedScraper.loadPage($);
await indeedScraper.breakPage();
// await indeedScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
// console.log(indeedScraper.items);
// await indeedScraper.filterAdverts();
await indeedScraper.addToMongo();
t.end();
});

View File

@ -26,14 +26,13 @@ const s1jobsScraper = new RssS1Jobs();
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
test.test('Test Jobserve scraper', async t => {
let url = 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml';
await s1jobsScraper.setStartUrl(url);
s1jobsScraper.reduceItems();
await s1jobsScraper.filterAdverts();
// await s1jobsScraper.addToDB();
await s1jobsScraper.addToDB();
t.end();
});

View File

@ -19,17 +19,17 @@ const testScraper = new RssTechnojobs();
const feed = fs.readFileSync('test/data/technojobs/page1');
test.test('Test Technojobs scraper', async t => {
await testScraper.loadFeed('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
// await testScraper.loadFeed(feed);
await testScraper.reduceItems();
// testScraper.reduceItems();
await s1jobsScraper.filterAdverts();
// await s1jobsScraper.filterAdverts();
// await s1jobsScraper.addToDB();
/* await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
*/
t.end();
});

View File

@ -22,20 +22,20 @@ console.log(`${__dirname}`);
const page = fs.readFileSync(`${__dirname}/data/totaljobs/totaljobs-2020-04-16--121504.html`);
test.test('Test Totaljobs scraper', async t => {
const $ = cheerio.load(page);
const $ = cheerio.load(page);
totaljobsScraper.loadPage($);
totaljobsScraper.loadPage($);
await totaljobsScraper.breakPage();
await totaljobsScraper.breakPage();
await totaljobsScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
await totaljobsScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
// console.log(totaljobsScraper.items);
console.log(totaljobsScraper.items);
await totaljobsScraper.filterAdverts();
await totaljobsScraper.filterAdverts();
// await totaljobsScraper.addToDB();
// await totaljobsScraper.addToDB();
t.end();
t.end();
});

View File

@ -1,14 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 23/07/2020
* Time: 09:26
*/
const { Corpus } = require('../lib/corpus');
const text = 'ESTAMP DEVELOPER 6 month contract £450-525 / day Developer, SQL, Photoshop, Javascript,  NET, C#, Javascript Advanced knowledge of SQL Server TSQL Experience of the design and  PDF stamp development E-STAMP DEVELOPER 6 month contract';
const out = Corpus.process(text);
console.log(out);

View File

@ -1,71 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 16/04/2020
* Time: 23:35
*/
const CronJob = require('cron').CronJob;
const IndeedScraper = require('./scrapers/indeed');
const TotaljobsScraper = require('./scrapers/totaljobs');
const CwjobsScraper = require('./scrapers/cwjobs');
const JobserveScraper = require('./scrapers/rss.jobserve');
const RssS1Jobs = require('./scrapers/rss.s1jobs');
const RssTechnojobs = require('./scrapers/rss.technojobs');
(async function () {
console.log('Started..');
const indeedScraper = new IndeedScraper();
const totaljobsScraper = new TotaljobsScraper();
const cwjobsScraper = new CwjobsScraper();
const jobserveScraper = new JobserveScraper();
const s1jobsScraper = new RssS1Jobs();
const technojobsScraper = new RssTechnojobs();
await indeedScraper.go('london');
await totaljobsScraper.go('london');
await cwjobsScraper.go('london');
await indeedScraper.go('glasgow');
await totaljobsScraper.go('glasgow');
await cwjobsScraper.go('glasgow');
await indeedScraper.go('edinburgh');
await totaljobsScraper.go('edinburgh');
await cwjobsScraper.go('edinburgh');
await indeedScraper.go('milton keynes');
await totaljobsScraper.go('milton keynes');
await cwjobsScraper.go('milton keynes');
/*
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
*/
})();

File diff suppressed because one or more lines are too long

View File

@ -1,22 +0,0 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 10:08
*/
const jsonfile = require('jsonfile');
const data = require('./unused.json');
function show(size) {
const f = data.filter((v) => {
return (v.length === size);
});
jsonfile.writeFileSync('limited.json', [...new Set(f)]);
console.log('done');
}
show(11);