WIP: Adding brain

This commit is contained in:
Martin Donnelly 2020-08-24 09:35:30 +01:00
parent 8300828cd1
commit 6a23583b5b
35 changed files with 21287 additions and 19 deletions

32
.edditorconfig Normal file
View File

@ -0,0 +1,32 @@
; http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
[*.txt]
insert_final_newline = false
trim_trailing_whitespace = false
[*.py]
indent_size = 4
[*.m]
indent_size = 4
[Makefile]
indent_style = tab
indent_size = 8
[*.{js,json}]
indent_style = space
indent_size = 2
[*.md]
trim_trailing_whitespace = false

View File

@ -9,7 +9,7 @@
"env": {
"browser": true,
"node": true,
"es6": true
"es2017": true
},
"rules": {
"arrow-spacing": "error",

7178
biglist.json Normal file

File diff suppressed because it is too large Load Diff

204
brain.json Normal file
View File

@ -0,0 +1,204 @@
{
"categories": {
"good": true,
"bad": true
},
"docCount": {
"good": 43,
"bad": 5
},
"totalDocuments": 48,
"vocabulary": {
"tsql": true,
"developer": true,
"contract": true,
"web": true,
"javascript": true,
"js": true,
"node": true,
"es": true,
"agile": true,
"nodejs": true,
"london": true,
"aws": true,
"sql": true,
"postgresql": true,
"mysql": true,
"docker": true,
"ecs": true,
"automation": true,
"jslint": true,
"jshint": true,
"vuejs": true,
"vue": true,
"nginx": true,
"remotely": true,
"mvc": true,
"remote": true,
"iot": true,
"mqtt": true,
"es6": true,
"es2016": true,
"es2017": true,
"es2018": true,
"react": true,
"redux": true,
"graphql": true,
"java": true,
"reactjs": true,
"apps": true,
"html": true,
"css": true,
"code": true,
"angular": true,
"ember": true,
"restful": true,
"apis": true,
"infrastructure": true,
"software": true,
"native": true,
"med": true,
"mobile": true,
"client": true,
"applications": true,
"digital": true,
"analytics": true,
"dashboarding": true,
"online": true,
"analyse": true,
"dashboards": true,
"google": true,
"query": true,
"data": true,
"stakeholders": true,
"enhancements": true,
"requirements": true,
"c": true,
"net": true,
"technologies": true,
"azure": true,
"understanding": true,
"devops": true,
"tools": true,
"frameworks": true,
"scotland": true,
"responsibility": true,
"programme": true,
"functions": true,
"asp": true,
"project": true,
"transform": true,
"collaborative": true,
"technical": true,
"framework": true,
"nhibernate": true,
"server": true,
"api": true,
"development": true,
"lifecycle": true,
"specification": true,
"appointments": true
},
"vocabularySize": 89,
"wordCount": {
"good": 157,
"bad": 5
},
"wordFrequencyCount": {
"good": {
"tsql": 1,
"developer": 6,
"contract": 9,
"web": 6,
"javascript": 7,
"js": 3,
"node": 2,
"es": 1,
"agile": 2,
"nodejs": 1,
"london": 3,
"aws": 3,
"sql": 3,
"postgresql": 1,
"mysql": 1,
"docker": 1,
"ecs": 1,
"automation": 1,
"jslint": 1,
"jshint": 1,
"vuejs": 1,
"vue": 2,
"nginx": 1,
"remotely": 1,
"mvc": 5,
"remote": 2,
"iot": 1,
"mqtt": 1,
"es6": 1,
"es2016": 1,
"es2017": 1,
"es2018": 1,
"apps": 1,
"html": 5,
"css": 5,
"code": 2,
"react": 2,
"angular": 1,
"ember": 1,
"restful": 1,
"apis": 1,
"infrastructure": 1,
"software": 2,
"native": 1,
"med": 1,
"mobile": 1,
"client": 4,
"applications": 2,
"digital": 2,
"analytics": 1,
"dashboarding": 1,
"online": 1,
"analyse": 1,
"dashboards": 1,
"google": 1,
"query": 1,
"data": 1,
"stakeholders": 1,
"enhancements": 3,
"requirements": 3,
"c": 4,
"net": 5,
"technologies": 4,
"azure": 2,
"understanding": 1,
"devops": 2,
"tools": 1,
"frameworks": 1,
"scotland": 1,
"responsibility": 1,
"programme": 1,
"functions": 1,
"asp": 1,
"project": 1,
"transform": 1,
"collaborative": 1,
"technical": 1,
"framework": 1,
"nhibernate": 1,
"server": 1,
"api": 1,
"development": 1,
"lifecycle": 1,
"specification": 1,
"appointments": 1
},
"bad": {
"react": 1,
"redux": 1,
"graphql": 1,
"java": 1,
"reactjs": 1
}
},
"options": {}
}

Binary file not shown.

View File

@ -8,6 +8,10 @@
const filterReject = require('../lib/filter_reject');
const filterAccept = require('../lib/filter_md_jobs');
const dbmanager = require('../lib/dbmanager');
const Jobs = require('../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./corpus');
class MasterBase {
@ -57,6 +61,56 @@ class MasterBase {
});
}
addToMongo() {
console.log('>> ADD TO MONGO!');
for(const item of this.items) {
// console.log('add', item);
const newObj = this.reduceData(item);
const newJob = Jobs(newObj);
newJob.save().then((m) => {
console.log('m', m.details.title);
}).catch((err) => {
console.error(err);
});
}
}
analyseRate(inval) {
console.log('analyseRate', inval);
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
else return 0;
return outVal;
}
reduceData(d) {
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.data.jobtype = this.analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
/**
*
* @returns {Promise<void>}
@ -124,6 +178,7 @@ class MasterBase {
this.items = [];
this.rawItems = [];
}
}
module.exports = MasterBase;

90
lib/corpus.js Normal file
View File

@ -0,0 +1,90 @@
const jsonfile = require('jsonfile');
const words = require('../lib/wordlist.json');
const wordsAdditional = require('../lib/wordlistAdditional.json');
const bigList = new Map([]);
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
let unrated = [];
var _global = typeof global === 'undefined' ? window : global;
var Corpus = (_global.Corpus = _global.Corpus || {});
const emailRegex = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
const detagRegex = /(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)/gi;
const desymbolNumberRegex = /[\n\t+$,\?\.\%\*=&:;()\\/\-£…"]|\d+/gi;
const deSpace = /\s+/g;
function cleanText(intext) {
if (arguments.length === 0 || intext === null ) return '';
return intext.replace(emailRegex, ' ').replace(detagRegex, ' ').replace(desymbolNumberRegex, ' ').replace(deSpace, ' ').trim().toLowerCase();
}
function dedupe(intext) {
if (arguments.length === 0 || intext === null ) return [];
return [...new Set(intext)];
}
function incItem(item) {
if (bigList.has(item))
bigList.set(item, bigList.get(item) + 1);
else
bigList.set(item, 1);
}
/**
* Process the body
* @param intext
* @returns {{score: number, bad: *, good: *}}
*/
Corpus.process = function(intext) {
const workText = cleanText(intext);
const workArray = workText.split(' ');
const cleanedArray = dedupe(workArray).filter((v) => {
return (words.indexOf(v) === -1 && wordsAdditional.indexOf(v) === -1);
});
const good = cleanedArray.filter((v) => {
return (goodWords.indexOf(v) !== -1);
});
const bad = cleanedArray.filter((v) => {
return (badWords.indexOf(v) !== -1);
});
const unused = cleanedArray.filter((v) => {
return ((badWords.indexOf(v) === -1) && (goodWords.indexOf(v) === -1));
});
cleanedArray.map((item)=> {
incItem(item);
});
unrated = [...unrated, ...unused];
const score = good.length - (bad.length * 5);
// console.log('unused', unused);
return { good, bad, score, 'words':cleanedArray };
};
Corpus.exportUnused = function() {
jsonfile.writeFileSync('./unused.json', dedupe(unrated));
jsonfile.writeFileSync('./biglist.json', [...bigList].sort((a, b) => b[1] - a[1]));
console.log([...bigList]);
};
if (typeof module !== 'undefined')
module.exports = {
'Corpus': Corpus
};

28
lib/mongoManager.js Normal file
View File

@ -0,0 +1,28 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 17:00
*/
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const Jobs = require('../models/jobs');
const { Utils } = require('@rakh/utils');
require('dotenv').config();
logger.level = 'debug';
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
mongoose.connect(`mongodb://martin:1V3D4m526i@127.0.0.1/jobs`);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
module.exports = Jobs;

View File

@ -89,6 +89,7 @@ class MasterRSS extends MasterBase {
await this.filterAdverts();
if (this.items.length > 0) await this.addToDB();
if (this.items.length > 0) await this.addToMongo();
}
else
console.log('No items to process');

1007
lib/wordlist.json Normal file

File diff suppressed because it is too large Load Diff

8790
lib/wordlistAdditional.json Normal file

File diff suppressed because it is too large Load Diff

559
limited.json Normal file
View File

@ -0,0 +1,559 @@
[
"experienced",
"exceptional",
"maintaining",
"familiarity",
"commodities",
"opportunity",
"possibility",
"integration",
"engineering",
"derivatives",
"prefferable",
"nutritional",
"performance",
"immediately",
"information",
"responsible",
"environment",
"stakeholder",
"proactively",
"requirement",
"temporarily",
"interrogate",
"effectively",
"progressing",
"substantial",
"identifying",
"maintenance",
"workarounds",
"departments",
"consultancy",
"regulations",
"statistical",
"previously·",
"euromonitor",
"documenting",
"bookkeeping",
"reconciling",
"hardworking",
"themselves!",
"appropriate",
"socialising",
"fundraising",
"initiatives",
"sponsorship",
"orientation",
"competitive",
"illustrator",
"outstanding",
"interaction",
"consistency",
"touchpoints",
"freshtechit",
"recruitment",
"catastrophe",
"accountable",
"workstreams",
"scalability",
"undertaking",
"interacting",
"significant",
"considering",
"independent",
"collaborate",
"arrangement",
"unsolicited",
"empowerment",
"connections",
"specialists",
"credentials",
"personality",
"established",
"northampton",
"advertising",
"operational",
"mathematics",
"contractors",
"instruments",
"referencing",
"locationsco",
"disciplines",
"corporation",
"investments",
"conferences",
"demonstrate",
"directorate",
"acknowledge",
"legislation",
"designgreat",
"understands",
"perspective",
"association",
"enforcement",
"prestigious",
"individuals",
"alternative",
"technically",
"challenging",
"discussions",
"lifeworking",
"interactive",
"storyboards",
"communicate",
"abilitywork",
"englishgood",
"detailbonus",
"angularwhat",
"neededabout",
"innovations",
"enthusiasts",
"instructors",
"prospective",
"comfortable",
"involvement",
"adventurous",
"marketplace",
"forecasting",
"contractual",
"underpinned",
"acquisition",
"microsofts",
"progression",
"suggestions",
"proficiency",
"participate",
"joblocation",
"methodology",
"continually",
"cataloguing",
"projectgood",
"incremental",
"overarching",
"confidently",
"circulatory",
"adjustments",
"interesting",
"consultants",
"experienceb",
"hourscasual",
"switzerland",
"contributes",
"participant",
"improvement",
"articulates",
"contributed",
"comfortably",
"deployments",
"integrating",
"configuring",
"platforming",
"educatedday",
"contracting",
"monthstotal",
"outsourcing",
"designswork",
"ideasdesign",
"deviceswork",
"fundamental",
"businessjob",
"implemented",
"transaction",
"reliability",
"upgradesyou",
"uncertainty",
"enterpriser",
"teamprovide",
"trafficking",
"doubleclick",
"communities",
"forestlink",
"dimensional",
"coordinator",
"spreadsheet",
"pressurised",
"assignments",
"willingness",
"certificate",
"summaryrole",
"institution",
"segregation",
"preparation",
"electronics",
"duplication",
"surrounding",
"informatica",
"blackfriars",
"terminology",
"shabarinath",
"interfacing",
"expectation",
"proprietary",
"conflicting",
"itecopeople",
"opowershell",
"submissions",
"negotiating",
"escalations",
"transferred",
"protections",
"customizing",
"oxfordshire",
"progressive",
"bishopsgate",
"partnership",
"futureheads",
"permissions",
"efficiently",
"unspecified",
"potentially",
"disclaimers",
"foreseeable",
"sustainable",
"calculation",
"replication",
"constitutes",
"recommended",
"enterprises",
"negotiation",
"imaginative",
"differences",
"nationality",
"impediments",
"refinements",
"translating",
"obligations",
"flexibility",
"unashamedly",
"exclusively",
"replacement",
"essentially",
"artifactory",
"theoretical",
"probability",
"integrators",
"contractor?",
"interested?",
"functioning",
"chamberlain",
"inclusivity",
"iteratively",
"enhancement",
"constraints",
"establishes",
"qualitative",
"influencing",
"procurement",
"experiences",
"furthermore",
"disciplined",
"unnecessary",
"bureaucracy",
"represented",
"siteimprove",
"lokhandwala",
"specialises",
"rationalize",
"competncies",
"restoration",
"allocations",
"admittances",
"furnishings",
"cleanliness",
"residential",
"contactable",
"conventions",
"translation",
"approaching",
"intecselect",
"linguistics",
"southampton",
"beautifully",
"estimations",
"newsletters",
"summarising",
"simulations",
"portfolio's",
"coronavirus",
"opoortunity",
"unavailable",
"accordingly",
"penetration",
"remediation",
"elimination",
"achievement",
"facilitator",
"westminster",
"introducing",
"businesses'",
"capitalists",
"investigate",
"countryside",
"problematic",
"coordinates",
"components'",
"supervision",
"bonavolonta",
"proposition",
"foundations",
"suitability",
"researchers",
"explanation",
"commitments",
"computation",
"questioning",
"experiments",
"visualfiles",
"cloudstream",
"determining",
"deliverable",
"inquisitive",
"backgrounds",
"thoughtspot",
"specialized",
"veloppement",
"importantes",
"typedscript",
"restaurants",
"prophylaxis",
"transmitted",
"appointment",
"encouraging",
"aggregating",
"championing",
"conjunction",
"customising",
"photography",
"authorities",
"competition",
"collections",
"contraintes",
"fonctionnel",
"adaptabilit",
"changements",
"conceptions",
"utilisation",
"shortlisted",
"reusability",
"recognizing",
"decisioning",
"accommodate",
"limitations",
"resourceful",
"algorithmic",
"unconcerned",
"intelligent",
"considerate",
"clientbased",
"accelerator",
"dreamweaver",
"applicant's",
"proactivity",
"aggregation",
"restriction",
"traditional",
"corporately",
"memberships",
"standardise",
"theecsgroup",
"scarchitect",
"consolidate",
"extensively",
"afghanistan",
"encompasses",
"distinctive",
"professions",
"interviewed",
"formulation",
"transitions",
"aspirations",
"ingredients",
"setterfield",
"candidates",
"leatherhead",
"publication",
"undoubtedly",
"basingstoke",
"underground",
"reinsurance",
"exemplifies",
"civiization",
"developer's",
"bazzelgette",
"adjacencies",
"feasibility",
"frontinvest",
"neogotiable",
"unconnected",
"conditional",
"bottlenecks",
"productions",
"pharmacists",
"technicians",
"prescribing",
"stewardship",
"recognising",
"convictions",
"subscribing",
"transparent",
"wireframing",
"insidehmcts",
"justicejobs",
"criminology",
"hospitality",
"structuring",
"educational",
"substantive",
"secondments",
"transgender",
"smartphones",
"microsoft's",
"definitions",
"validations",
"prioritised",
"autoscaling",
"abstraction",
"correlation",
"recognition",
"contributor",
"apigedevops",
"incorporate",
"woocommerce",
"informatics",
"adfadc@apps",
"automations",
"formulating",
"beneficiary",
"referential",
"jsdevsecops",
"solutioning",
"measurement",
"familiarise",
"eligibility",
"standardize",
"experience?",
"bournemouth",
"implementer",
"agilesphere",
"assumptions",
"accountancy",
"cockroachdb",
"promotional",
"facilitates",
"discoveries",
"bladecenter",
"considered!",
"cooperation",
"exploration",
"angulareact",
"preferabbly",
"harmonising",
"convenience",
"inclusively",
"strategists",
"attribution",
"fromscratch",
"combination",
"solutionize",
"accelerated",
"diagnostics",
"sensibility",
"informative",
"intellegnce",
"specilisits",
"projections",
"associative",
"personalize",
"farnborough",
"necessarily",
"nservicebus",
"constrained",
"prioritized",
"behavioural",
"chakraborty",
"leaderships",
"flourishing",
"uniqstudios",
"simplifying",
"realisation",
"extensions!",
"prioritises",
"experience!",
"candidates!",
"inclination",
"stimulating",
"appreciated",
"reinventing",
"compression",
"jscybsecdev",
"equirements",
"generalized",
"compressors",
"assessments",
"beyondtrust",
"engagements",
"numerically",
"electricity",
"interchange",
"jsswift_dev",
"circulating",
"attachments",
"credibility",
"vnetpeering",
"territories",
"staggering!",
"developers!",
"peripherals",
"virtualized",
"bitdefender",
"jssitecorjs",
"positioning",
"appreciates",
"chessington",
"controllers",
"controlling",
"quantifying",
"virtualised",
"manufacture",
"fluorescent",
"governments",
"bigcommerce",
"therapeutic",
"importantly",
"differently",
"rigourously",
"shareholder",
"copywriting",
"anticipated",
"approximate",
"behdarvandi",
"testability",
"beneficial!",
"jswmibmcraw",
"exhibitions",
"talentpoint",
"propagation",
"interviews!",
"solutionise",
"elasticache",
"manoeuvring",
"teamservice",
"geographies",
"efficientip",
"organically",
"advancement",
"jshodanular",
"wholesalers",
"multitenant",
"encouraged?",
"freelancers",
"composition",
"#jobswagger",
"typographic",
"stereotypes",
"clerkenwell",
"sacrificing",
"resolutions",
"technology?",
"advantagous"
]

22
mapbuilder.js Normal file
View File

@ -0,0 +1,22 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 15:34
*/
const jsonfile = require('jsonfile');
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
const brain = new Map([]);
for(let i = 0;i < goodWords.length - 1;i++)
brain.set(goodWords[i], 3);
for(let i = 0;i < badWords.length - 1;i++)
brain.set(badWords[i], -5);
jsonfile.writeFileSync('brain.json', [...brain]);

129
migrate.js Normal file
View File

@ -0,0 +1,129 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 10:20
*/
const db = require('./lib/connect');
const log4js = require('log4js');
const logger = log4js.getLogger();
const { Utils } = require('@rakh/utils');
const { Corpus } = require('./lib/corpus');
/*
const mongoose = require('mongoose');
const log4js = require('log4js');
const logger = log4js.getLogger();
const Jobs = require('./models/jobs');
require('dotenv').config();
logger.level = 'debug';
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
const mDB = mongoose.connection;
mDB.on('error', console.error.bind(console, 'connection error:'));
*/
const Jobs = require('./lib/mongoManager');
const migrate = (function() {
function analyseRate(inval) {
let outVal = 0;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
const clearSpace = /\s+/g;
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
const resultArray = result.trim().split((' '));
if (resultArray.length > 0) {
const item = parseInt(resultArray[0], 10);
if (item < 100) outVal = 0;
else if ((item > 100) && (item < 5000)) outVal = 1;
else if (item >= 5000) outVal = 2;
}
return outVal;
}
function reduceData(d) {
const clearPremium = /(\n+)(Featured|Premium)/gi;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
outObj.details.title = outObj.details.title.replace(clearPremium, '');
outObj.details.title = outObj.details.title.replace(otherStupid, '');
// outObj.data.read = d.read || 0;
outObj.data.read = 0;
outObj.data.applied = d.applied || 0;
outObj.data.jobtype = analyseRate(d.salary);
outObj.data.autoclass = Corpus.process(d.summary);
outObj.data.timestamp = d.timestamp * 1000;
return outObj;
}
function getCurrent() {
const outgoing = [];
console.log('get version');
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
return new Promise((resolve, reject) => {
db.all(sql, [], (err, rows) => {
if (err)
reject(err);
rows.forEach((row) => {
outgoing.push(row);
});
resolve(outgoing) ;
});
});
}
function start() {
getCurrent().then((d) => {
logger.debug(d.length);
for (let t = 0;t < (d.length - 1);t++) {
const newD = reduceData(d[t]);
// logger.debug(newD);
const newJob = Jobs(newD);
newJob.save().then((m) => {
logger.debug('m', m.details.title);
}).catch((err) => {
logger.error(err);
});
}
}).then(() => {
logger.debug('SAVING!!');
Corpus.exportUnused();
})
.catch((err) => {
logger.error(err);
});
}
return {
'start':start
};
})();
migrate.start();
logger.info('Done??');

46
models/jobs.js Normal file
View File

@ -0,0 +1,46 @@
/**
* Created by WebStorm.
* User: martin
* Date: 22/07/2020
* Time: 14:18
*/
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
const jobSchema = new Schema({
'details': {
'title': { 'type': String, 'required': true },
'site': { 'type': String, 'required': true },
'url': { 'type': String, 'required': true, 'unique': true },
'id': String,
'summary': String,
'company': String,
'location': String,
'postdate': String,
'salary': String,
'easyapply': Number,
'timestamp': Number
},
'data': {
'read': { 'type': Number, 'default': 0 },
'applied': { 'type': Number, 'default': 0 },
'jobtype': { 'type': Number, 'default': 0 },
'class': { 'type': Number, 'default': 0 },
'autoclass': {
'good': Array,
'bad': Array,
'words': Array,
'score': { 'type': Number, 'default': 0 }
},
'timestamp': { 'type': Number, 'default': 0 },
'created_at': { 'type': Date, 'default': Date.now }
}
});
mongoose.set('useFindAndModify', false);
const Jobs = mongoose.model('Jobs', jobSchema);
module.exports = Jobs;

709
package-lock.json generated
View File

@ -27,6 +27,418 @@
"js-tokens": "^4.0.0"
}
},
"@rakh/utils": {
"version": "file:../utils",
"requires": {
"@rollup/plugin-commonjs": "^12.0.0",
"@rollup/plugin-node-resolve": "^8.0.0",
"consola": "^2.13.0",
"rollup": "^2.12.0",
"rollup-plugin-terser": "^6.1.0"
},
"dependencies": {
"@babel/code-frame": {
"version": "7.10.1",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.1.tgz",
"integrity": "sha512-IGhtTmpjGbYzcEDOw7DcQtbQSXcG9ftmAXtWTu9V936vDye4xjjekktFAtgZsWpzTj/X01jocB46mTywm/4SZw==",
"requires": {
"@babel/highlight": "^7.10.1"
}
},
"@babel/helper-validator-identifier": {
"version": "7.10.1",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.10.1.tgz",
"integrity": "sha512-5vW/JXLALhczRCWP0PnFDMCJAchlBvM7f4uk/jXritBnIa6E1KmqmtrS3yn1LAnxFBypQ3eneLuXjsnfQsgILw=="
},
"@babel/highlight": {
"version": "7.10.1",
"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.10.1.tgz",
"integrity": "sha512-8rMof+gVP8mxYZApLF/JgNDAkdKa+aJt3ZYxF8z6+j/hpeXL7iMsKCPHa2jNMHu/qqBwzQF4OHNoYi8dMA/rYg==",
"requires": {
"@babel/helper-validator-identifier": "^7.10.1",
"chalk": "^2.0.0",
"js-tokens": "^4.0.0"
}
},
"@rollup/plugin-commonjs": {
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-12.0.0.tgz",
"integrity": "sha512-8+mDQt1QUmN+4Y9D3yCG8AJNewuTSLYPJVzKKUZ+lGeQrI+bV12Tc5HCyt2WdlnG6ihIL/DPbKRJlB40DX40mw==",
"requires": {
"@rollup/pluginutils": "^3.0.8",
"commondir": "^1.0.1",
"estree-walker": "^1.0.1",
"glob": "^7.1.2",
"is-reference": "^1.1.2",
"magic-string": "^0.25.2",
"resolve": "^1.11.0"
}
},
"@rollup/plugin-node-resolve": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/@rollup/plugin-node-resolve/-/plugin-node-resolve-8.0.0.tgz",
"integrity": "sha512-5poJCChrkVggXXND/sQ7yNqwjUNT4fP31gpRWCnSNnlXuUXTCMHT33xZrTGxgjm5Rl18MHj7iEzlCT8rYWwQSA==",
"requires": {
"@rollup/pluginutils": "^3.0.8",
"@types/resolve": "0.0.8",
"builtin-modules": "^3.1.0",
"deep-freeze": "^0.0.1",
"deepmerge": "^4.2.2",
"is-module": "^1.0.0",
"resolve": "^1.14.2"
}
},
"@rollup/pluginutils": {
"version": "3.0.10",
"resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-3.0.10.tgz",
"integrity": "sha512-d44M7t+PjmMrASHbhgpSbVgtL6EFyX7J4mYxwQ/c5eoaE6N2VgCgEcWVzNnwycIloti+/MpwFr8qfw+nRw00sw==",
"requires": {
"@types/estree": "0.0.39",
"estree-walker": "^1.0.1",
"picomatch": "^2.2.2"
}
},
"@types/estree": {
"version": "0.0.39",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.39.tgz",
"integrity": "sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw=="
},
"@types/node": {
"version": "14.0.6",
"resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.6.tgz",
"integrity": "sha512-FbNmu4F67d3oZMWBV6Y4MaPER+0EpE9eIYf2yaHhCWovc1dlXCZkqGX4NLHfVVr6umt20TNBdRzrNJIzIKfdbw=="
},
"@types/resolve": {
"version": "0.0.8",
"resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-0.0.8.tgz",
"integrity": "sha512-auApPaJf3NPfe18hSoJkp8EbZzer2ISk7o8mCC3M9he/a04+gbMF97NkpD2S8riMGvm4BMRI59/SZQSaLTKpsQ==",
"requires": {
"@types/node": "*"
}
},
"ansi-styles": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
"requires": {
"color-convert": "^1.9.0"
}
},
"balanced-match": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz",
"integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c="
},
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"buffer-from": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
"integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
},
"builtin-modules": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.1.0.tgz",
"integrity": "sha512-k0KL0aWZuBt2lrxrcASWDfwOLMnodeQjodT/1SxEQAXsHANgo6ZC/VEaSEHCXt7aSTZ4/4H5LKa+tBXmW7Vtvw=="
},
"chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"requires": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
}
},
"color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"requires": {
"color-name": "1.1.3"
}
},
"color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
"commander": {
"version": "2.20.3",
"resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz",
"integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ=="
},
"commondir": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz",
"integrity": "sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs="
},
"concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
},
"consola": {
"version": "2.14.0",
"resolved": "https://registry.npmjs.org/consola/-/consola-2.14.0.tgz",
"integrity": "sha512-A2j1x4u8d6SIVikhZROfpFJxQZie+cZOfQMyI/tu2+hWXe8iAv7R6FW6s6x04/7zBCst94lPddztot/d6GJiuQ=="
},
"deep-freeze": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/deep-freeze/-/deep-freeze-0.0.1.tgz",
"integrity": "sha1-OgsABd4YZygZ39OM0x+RF5yJPoQ="
},
"deepmerge": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.2.2.tgz",
"integrity": "sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg=="
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
},
"estree-walker": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-1.0.1.tgz",
"integrity": "sha512-1fMXF3YP4pZZVozF8j/ZLfvnR8NSIljt56UhbZ5PeeDmmGHpgpdwQt7ITlGvYaQukCvuBRMLEiKiYC+oeIg4cg=="
},
"fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
"integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8="
},
"fsevents": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.3.tgz",
"integrity": "sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==",
"optional": true
},
"glob": {
"version": "7.1.6",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
"integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
"requires": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.0.4",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
}
},
"has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
},
"inflight": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
"integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
"requires": {
"once": "^1.3.0",
"wrappy": "1"
}
},
"inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
},
"is-module": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/is-module/-/is-module-1.0.0.tgz",
"integrity": "sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE="
},
"is-reference": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.2.0.tgz",
"integrity": "sha512-ZVxq+5TkOx6GQdnoMm2aRdCKADdcrOWXLGzGT+vIA8DMpqEJaRk5AL1bS80zJ2bjHunVmjdzfCt0e4BymIEqKQ==",
"requires": {
"@types/estree": "0.0.44"
},
"dependencies": {
"@types/estree": {
"version": "0.0.44",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.44.tgz",
"integrity": "sha512-iaIVzr+w2ZJ5HkidlZ3EJM8VTZb2MJLCjw3V+505yVts0gRC4UMvjw0d1HPtGqI/HQC/KdsYtayfzl+AXY2R8g=="
}
}
},
"jest-worker": {
"version": "26.0.0",
"resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-26.0.0.tgz",
"integrity": "sha512-pPaYa2+JnwmiZjK9x7p9BoZht+47ecFCDFA/CJxspHzeDvQcfVBLWzCiWyo+EGrSiQMWZtCFo9iSvMZnAAo8vw==",
"requires": {
"merge-stream": "^2.0.0",
"supports-color": "^7.0.0"
},
"dependencies": {
"has-flag": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
"integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="
},
"supports-color": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.1.0.tgz",
"integrity": "sha512-oRSIpR8pxT1Wr2FquTNnGet79b3BWljqOuoW/h4oBhxJ/HUbX5nX6JSruTkvXDCFMwDPvsaTTbvMLKZWSy0R5g==",
"requires": {
"has-flag": "^4.0.0"
}
}
}
},
"js-tokens": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="
},
"magic-string": {
"version": "0.25.7",
"resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.25.7.tgz",
"integrity": "sha512-4CrMT5DOHTDk4HYDlzmwu4FVCcIYI8gauveasrdCu2IKIFOJ3f0v/8MDGJCDL9oD2ppz/Av1b0Nj345H9M+XIA==",
"requires": {
"sourcemap-codec": "^1.4.4"
}
},
"merge-stream": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
"integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="
},
"minimatch": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
"requires": {
"brace-expansion": "^1.1.7"
}
},
"once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
"requires": {
"wrappy": "1"
}
},
"path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18="
},
"path-parse": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz",
"integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw=="
},
"picomatch": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.2.tgz",
"integrity": "sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg=="
},
"randombytes": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz",
"integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==",
"requires": {
"safe-buffer": "^5.1.0"
}
},
"resolve": {
"version": "1.17.0",
"resolved": "https://registry.npmjs.org/resolve/-/resolve-1.17.0.tgz",
"integrity": "sha512-ic+7JYiV8Vi2yzQGFWOkiZD5Z9z7O2Zhm9XMaTxdJExKasieFCr+yXZ/WmXsckHiKl12ar0y6XiXDx3m4RHn1w==",
"requires": {
"path-parse": "^1.0.6"
}
},
"rollup": {
"version": "2.12.0",
"resolved": "https://registry.npmjs.org/rollup/-/rollup-2.12.0.tgz",
"integrity": "sha512-vKwc/xFkZGM9DRai3Eztpr/4g0yYDgNKVq8tLXhq/aSLbR+/EVL6rTjEW9bgWgeYEIKoN66/5w2Bjv1gzyHR/w==",
"requires": {
"fsevents": "~2.1.2"
}
},
"rollup-plugin-terser": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/rollup-plugin-terser/-/rollup-plugin-terser-6.1.0.tgz",
"integrity": "sha512-4fB3M9nuoWxrwm39habpd4hvrbrde2W2GG4zEGPQg1YITNkM3Tqur5jSuXlWNzbv/2aMLJ+dZJaySc3GCD8oDw==",
"requires": {
"@babel/code-frame": "^7.8.3",
"jest-worker": "^26.0.0",
"serialize-javascript": "^3.0.0",
"terser": "^4.7.0"
}
},
"safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
},
"serialize-javascript": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-3.1.0.tgz",
"integrity": "sha512-JIJT1DGiWmIKhzRsG91aS6Ze4sFUrYbltlkg2onR5OrnNM02Kl/hnY/T4FN2omvyeBbQmMJv+K4cPOpGzOTFBg==",
"requires": {
"randombytes": "^2.1.0"
}
},
"source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="
},
"source-map-support": {
"version": "0.5.19",
"resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz",
"integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==",
"requires": {
"buffer-from": "^1.0.0",
"source-map": "^0.6.0"
}
},
"sourcemap-codec": {
"version": "1.4.8",
"resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz",
"integrity": "sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA=="
},
"supports-color": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
"requires": {
"has-flag": "^3.0.0"
}
},
"terser": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/terser/-/terser-4.7.0.tgz",
"integrity": "sha512-Lfb0RiZcjRDXCC3OSHJpEkxJ9Qeqs6mp2v4jf2MHfy8vGERmVDuvjXdd/EnP5Deme5F2yBRBymKmKHCBg2echw==",
"requires": {
"commander": "^2.20.0",
"source-map": "~0.6.1",
"source-map-support": "~0.5.12"
}
},
"wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8="
}
}
},
"@sindresorhus/is": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-2.1.1.tgz",
@ -247,6 +659,11 @@
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz",
"integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c="
},
"bayes": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/bayes/-/bayes-1.0.0.tgz",
"integrity": "sha512-dJkTHtGBbOLtrmcm37R44jelbgKalMPXLLmhNceEgeLRJLdDTU2DoEF7L+UqM3m36dve7/Vka4hgaacT7a8Jjw=="
},
"bcrypt-pbkdf": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
@ -255,6 +672,58 @@
"tweetnacl": "^0.14.3"
}
},
"bl": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-2.2.0.tgz",
"integrity": "sha512-wbgvOpqopSr7uq6fJrLH8EsvYMJf9gzfo2jCsL2eTy75qXPukA4pCgHamOQkZtY5vmfVtjB+P3LNlMHW5CEZXA==",
"requires": {
"readable-stream": "^2.3.5",
"safe-buffer": "^5.1.1"
},
"dependencies": {
"readable-stream": {
"version": "2.3.7",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
"integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
"requires": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
"isarray": "~1.0.0",
"process-nextick-args": "~2.0.0",
"safe-buffer": "~5.1.1",
"string_decoder": "~1.1.1",
"util-deprecate": "~1.0.1"
},
"dependencies": {
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
},
"string_decoder": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"requires": {
"safe-buffer": "~5.1.0"
},
"dependencies": {
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
}
}
},
"bluebird": {
"version": "3.5.1",
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.1.tgz",
"integrity": "sha512-MKiLiV+I1AA596t9w1sQJ8jkiSr5+ZKi0WKrYGUn6d1Fx+Ij4tIj+m2WMQSGczs5jZVxV339chE8iwk6F64wjA=="
},
"body-parser": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
@ -312,6 +781,11 @@
"concat-map": "0.0.1"
}
},
"bson": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/bson/-/bson-1.1.4.tgz",
"integrity": "sha512-S/yKGU1syOMzO86+dGpg2qGoDL0zvzcb262G+gqEy6TgP6rt6z6qxSFX/8X6vLC91P7G7C3nLs0+bvDzmvBA3Q=="
},
"bytes": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
@ -550,6 +1024,11 @@
"integrity": "sha1-bYCcnNDPe7iVLYD8hPoT1H3bEwg=",
"dev": true
},
"date-format": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/date-format/-/date-format-3.0.0.tgz",
"integrity": "sha512-eyTcpKOcamdhWJXj56DpQMo1ylSQpcGtGKXcU0Tb97+K56/CF5amAqqqNj0+KvA0iw2ynxtHWFsPDSClCxe48w=="
},
"debug": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz",
@ -624,6 +1103,11 @@
"resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz",
"integrity": "sha1-hMbhWbgZBP3KWaDvRM2HDTElD5o="
},
"denque": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/denque/-/denque-1.4.1.tgz",
"integrity": "sha512-OfzPuSZKGcgr96rf1oODnfjqBFmr1DVoc/TrItj3Ohe0Ah1C5WX5Baquw/9U9KovnQ88EqmJbD66rKYUQYN1tQ=="
},
"depd": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
@ -1172,6 +1656,26 @@
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
"integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac="
},
"fs-extra": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz",
"integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==",
"requires": {
"graceful-fs": "^4.2.0",
"jsonfile": "^4.0.0",
"universalify": "^0.1.0"
},
"dependencies": {
"jsonfile": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz",
"integrity": "sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=",
"requires": {
"graceful-fs": "^4.1.6"
}
}
}
},
"fs-minipass": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-1.2.7.tgz",
@ -1307,6 +1811,11 @@
"responselike": "^2.0.0"
}
},
"graceful-fs": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz",
"integrity": "sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw=="
},
"har-schema": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
@ -1735,6 +2244,22 @@
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
"integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus="
},
"jsonfile": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.0.1.tgz",
"integrity": "sha512-jR2b5v7d2vIOust+w3wtFKZIfpC2pnRmFAhAC/BuweZFQR8qZzxH1OyrQ10HmdVYiXWkYUqPVsz91cG7EL2FBg==",
"requires": {
"graceful-fs": "^4.1.6",
"universalify": "^1.0.0"
},
"dependencies": {
"universalify": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-1.0.0.tgz",
"integrity": "sha512-rb6X1W158d7pRQBg5gkR8uPaSfiids68LTJQYOtEUhoJUWBdaQHsuT/EUduxXYxcrt4r5PJ4fuHW1MHT6p0qug=="
}
}
},
"jsprim": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz",
@ -1746,6 +2271,11 @@
"verror": "1.10.0"
}
},
"kareem": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/kareem/-/kareem-2.3.1.tgz",
"integrity": "sha512-l3hLhffs9zqoDe8zjmb/mAN4B8VT3L56EUvKNqLFVs9YlFA+zx7ke1DO8STAdDyYNkeSo1nKmjuvQeI12So8Xw=="
},
"keyv": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.0.1.tgz",
@ -1768,6 +2298,18 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
},
"log4js": {
"version": "6.3.0",
"resolved": "https://registry.npmjs.org/log4js/-/log4js-6.3.0.tgz",
"integrity": "sha512-Mc8jNuSFImQUIateBFwdOQcmC6Q5maU0VVvdC2R6XMb66/VnT+7WS4D/0EeNMZu1YODmJe5NIn2XftCzEocUgw==",
"requires": {
"date-format": "^3.0.0",
"debug": "^4.1.1",
"flatted": "^2.0.1",
"rfdc": "^1.1.4",
"streamroller": "^2.2.4"
}
},
"lowercase-keys": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz",
@ -1778,6 +2320,12 @@
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
},
"memory-pager": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz",
"integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==",
"optional": true
},
"merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@ -1867,6 +2415,86 @@
"moment": ">= 2.9.0"
}
},
"mongodb": {
"version": "3.5.9",
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.5.9.tgz",
"integrity": "sha512-vXHBY1CsGYcEPoVWhwgxIBeWqP3dSu9RuRDsoLRPTITrcrgm1f0Ubu1xqF9ozMwv53agmEiZm0YGo+7WL3Nbug==",
"requires": {
"bl": "^2.2.0",
"bson": "^1.1.4",
"denque": "^1.4.1",
"require_optional": "^1.0.1",
"safe-buffer": "^5.1.2",
"saslprep": "^1.0.0"
}
},
"mongoose": {
"version": "5.9.25",
"resolved": "https://registry.npmjs.org/mongoose/-/mongoose-5.9.25.tgz",
"integrity": "sha512-vz/DqJ3mrHqEIlfRbKmDZ9TzQ1a0hCtSQpjHScIxr4rEtLs0tjsXDeEWcJ/vEEc3oLfP6vRx9V+uYSprXDUvFQ==",
"requires": {
"bson": "^1.1.4",
"kareem": "2.3.1",
"mongodb": "3.5.9",
"mongoose-legacy-pluralize": "1.0.2",
"mpath": "0.7.0",
"mquery": "3.2.2",
"ms": "2.1.2",
"regexp-clone": "1.0.0",
"safe-buffer": "5.2.1",
"sift": "7.0.1",
"sliced": "1.0.1"
},
"dependencies": {
"safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
}
}
},
"mongoose-legacy-pluralize": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/mongoose-legacy-pluralize/-/mongoose-legacy-pluralize-1.0.2.tgz",
"integrity": "sha512-Yo/7qQU4/EyIS8YDFSeenIvXxZN+ld7YdV9LqFVQJzTLye8unujAWPZ4NWKfFA+RNjh+wvTWKY9Z3E5XM6ZZiQ=="
},
"mpath": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/mpath/-/mpath-0.7.0.tgz",
"integrity": "sha512-Aiq04hILxhz1L+f7sjGyn7IxYzWm1zLNNXcfhDtx04kZ2Gk7uvFdgZ8ts1cWa/6d0TQmag2yR8zSGZUmp0tFNg=="
},
"mquery": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/mquery/-/mquery-3.2.2.tgz",
"integrity": "sha512-XB52992COp0KP230I3qloVUbkLUxJIu328HBP2t2EsxSFtf4W1HPSOBWOXf1bqxK4Xbb66lfMJ+Bpfd9/yZE1Q==",
"requires": {
"bluebird": "3.5.1",
"debug": "3.1.0",
"regexp-clone": "^1.0.0",
"safe-buffer": "5.1.2",
"sliced": "1.0.1"
},
"dependencies": {
"debug": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
@ -2275,6 +2903,11 @@
"integrity": "sha512-LgQJIuS6nAy1Jd88DCQRemyE3mS+ispwlqMk3b0yjZ257fI1v9c+/p6SD5gP5FGyXUIgrNOAfmyioHwZtYv2VA==",
"dev": true
},
"regexp-clone": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/regexp-clone/-/regexp-clone-1.0.0.tgz",
"integrity": "sha512-TuAasHQNamyyJ2hb97IuBEif4qBHGjPHBS64sZwytpLEqtBQ1gPJTnOaQ6qmpET16cK14kkjbazl6+p0RRv0yw=="
},
"regexp.prototype.flags": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.0.tgz",
@ -2316,6 +2949,27 @@
"uuid": "^3.3.2"
}
},
"require_optional": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz",
"integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==",
"requires": {
"resolve-from": "^2.0.0",
"semver": "^5.1.0"
},
"dependencies": {
"resolve-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz",
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
},
"semver": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz",
"integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ=="
}
}
},
"resolve": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/resolve/-/resolve-1.15.1.tgz",
@ -2359,6 +3013,11 @@
"through": "~2.3.4"
}
},
"rfdc": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.1.4.tgz",
"integrity": "sha512-5C9HXdzK8EAqN7JDif30jqsBzavB7wLpaubisuQIGHWf2gUXSpzy6ArX/+Da8RjFpagWsCn+pIgxTMAmKw9Zug=="
},
"rimraf": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz",
@ -2402,6 +3061,15 @@
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"saslprep": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/saslprep/-/saslprep-1.0.3.tgz",
"integrity": "sha512-/MY/PEMbk2SuY5sScONwhUDsV2p77Znkb/q3nSVstq/yQzYJOH/Azh29p9oJLsl3LnQwSvZDKagDGBsBwSooag==",
"optional": true,
"requires": {
"sparse-bitfield": "^3.0.3"
}
},
"sax": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
@ -2488,6 +3156,11 @@
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-1.0.0.tgz",
"integrity": "sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM="
},
"sift": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/sift/-/sift-7.0.1.tgz",
"integrity": "sha512-oqD7PMJ+uO6jV9EQCl0LrRw1OwsiPsiFQR5AR30heR+4Dl7jBBbDLnNvWiak20tzZlSE1H7RB30SX/1j/YYT7g=="
},
"signal-exit": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.3.tgz",
@ -2510,6 +3183,20 @@
}
}
},
"sliced": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/sliced/-/sliced-1.0.1.tgz",
"integrity": "sha1-CzpmK10Ewxd7GSa+qCsD+Dei70E="
},
"sparse-bitfield": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz",
"integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=",
"optional": true,
"requires": {
"memory-pager": "^1.0.2"
}
},
"sprintf-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
@ -2546,6 +3233,23 @@
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
"integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow="
},
"streamroller": {
"version": "2.2.4",
"resolved": "https://registry.npmjs.org/streamroller/-/streamroller-2.2.4.tgz",
"integrity": "sha512-OG79qm3AujAM9ImoqgWEY1xG4HX+Lw+yY6qZj9R1K2mhF5bEmQ849wvrb+4vt4jLMLzwXttJlQbOdPOQVRv7DQ==",
"requires": {
"date-format": "^2.1.0",
"debug": "^4.1.1",
"fs-extra": "^8.1.0"
},
"dependencies": {
"date-format": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/date-format/-/date-format-2.1.0.tgz",
"integrity": "sha512-bYQuGLeFxhkxNOF3rcMtiZxvCBAquGzZm6oWA1oZ0g2THUzivaRhv8uOhdr19LmoobSOLoIAxeUK2RdbM8IFTA=="
}
}
},
"string-width": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.0.tgz",
@ -2824,6 +3528,11 @@
"random-bytes": "~1.0.0"
}
},
"universalify": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
"integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg=="
},
"unpipe": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",

View File

@ -9,7 +9,9 @@
"author": "",
"license": "ISC",
"dependencies": {
"@rakh/utils": "file:../utils",
"axios": "^0.19.2",
"bayes": "^1.0.0",
"body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.3",
"cron": "^1.8.2",
@ -18,6 +20,9 @@
"express": "^4.17.1",
"fecha": "^4.2.0",
"got": "^11.2.0",
"jsonfile": "^6.0.1",
"log4js": "^6.3.0",
"mongoose": "^5.9.25",
"present": "^1.0.0",
"rss-parser": "^3.8.0",
"sqlite3": "^4.1.1",

45
preload.js Normal file
View File

@ -0,0 +1,45 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 10:51
*/
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
// teach it positive phrases
async function load() {
const goodWords = ['tsql', 'developer', 'contract', 'web', 'javascript', 'js', 'node', 'es', 'agile', 'nodejs', 'london', 'aws', 'sql', 'postgresql', 'mysql', 'docker', 'ecs', 'automation', 'jslint', 'jshint', 'vuejs', 'vue', 'nginx', 'remotely', 'mvc', 'remote', 'iot', 'mqtt', 'es6', 'es2016', 'es2017', 'es2018', 'freelance'];
const badWords = ['react', 'redux', 'graphql', 'java', 'reactjs', 'shopify'];
for(let i = 0;i < goodWords.length - 1;i++)
await classifier.learn(goodWords[i], 'good');
for(let i = 0;i < badWords.length - 1;i++)
await classifier.learn(badWords[i], 'bad');
// now ask it to categorize a document it has never seen before
console.log(await classifier.categorize(['ui', 'developer', 'london', 'react'].join(',')));
console.log(await classifier.categorize(['mysql', 'react', 'js', 'node', 'docker', 'kubernetes', 'google'].join(',')));
// serialize the classifier's state as a JSON string.
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();

View File

@ -133,6 +133,7 @@ class IndeedScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
async go(location = 'london') {

View File

@ -140,6 +140,7 @@ class IndeedMobileScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
async go(location = 'london') {

View File

@ -121,6 +121,7 @@ class TotaljobsScraper extends MasterScraper {
await this.filterAdverts();
await this.addToDB();
await this.addToMongo();
}
async go(location = 'london') {

View File

@ -0,0 +1,124 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:45
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const killNLDoubleSpace = /(\\n)\s{2,}|(\\n)|\s{2,}/g;
function reduceList(data) {
if (arguments.length === 0 || arguments[0] === null ) return '';
const outObj = data.map((v) => {
const o = Utils.extractFromObj({...v.details,...v.data, _id:v._id},['title','site', 'company', 'timestamp', 'read', 'applied', 'jobtype', 'class', 'autoclass']);
o._id = v._id;
return o;
});
// console.log(data);
return outObj;
}
function reduceRecord(record) {
// console.log('Reducderecord', record);
let outRec = {...record.details,data:record.data,_id:record._id};
return outRec;
}
exports.getList = (req, res) => {
console.log('>getList req', req.params);
Jobs.find( {}, { 'details.title':1, 'details.site':1, 'details.company':1, 'data':1, '_id':1 }).limit(200).then((doc) => {
if (doc) {
res.send(reduceList(doc));
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.getJob = (req, res) => {
console.log('>getJob req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
const item = reduceRecord(doc._doc);
const date = new Date( item.timestamp * 1000);
console.log(item);
item.date = date.toLocaleString();
item.title = item.title.replace(killNLDoubleSpace, ' ');
res.send(item);
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.readJob = (req, res) => {
console.log('>readJob req', req.params);
let id;
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
else
id = req.params.id;
Jobs.findById(id).then((doc) => {
if (doc) {
let fullDoc = Object.assign({}, doc._doc);
console.log('fullDoc', fullDoc);
if (!Utils.isEmpty(fullDoc)){
fullDoc.data.read = new Date().getTime();
Jobs.findByIdAndUpdate(id, fullDoc, {'new':true}).then((doc) => {
console.log(doc._doc);
res.status(200).end();
}).catch((err) => {
console.error('inside',err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
}
}
}).catch((err) => {
console.error('outer', err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

View File

@ -0,0 +1,89 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:08
*/
const Jobs = require('../../lib/mongoManager');
const { Utils } = require('@rakh/utils');
const fs = require('fs');
var bayes = require('bayes');
var classifier = bayes({
'tokenizer': function (text) {
return text.split(',');
}
});
function load() {
const file = fs.readFileSync('brain.json');
classifier = bayes.fromJson(file);
}
function save() {
var stateJson = classifier.toJson();
console.log(stateJson);
fs.writeFileSync('brain.json', stateJson);
}
load();
exports.upvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'good');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};
exports.downvote = (req, res) => {
console.log('>upvote req', req.params);
if(!req.params.id)
return res.status(500).send({
'message': 'Job id missing'
});
const id = req.params.id;
Jobs.findById(id).then(async (doc) => {
if (doc) {
const words = doc._doc.data.autoclass.words.join(',');
await classifier.learn(words, 'bad');
save();
res.status(200).end();
}
}).catch((err) => {
console.error(err.message);
res.status(500).send({
'message': err.message || 'Some error occurred while querying the database.'
});
});
};

View File

@ -0,0 +1,17 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/07/2020
* Time: 11:42
*/
const jobs = require('../controllers/jobs.v2.controller');
module.exports = (app) => {
app.route('/v2/jobs')
.get(jobs.getList);
app.route('/v2/jobs/:id')
.get(jobs.getJob)
.put(jobs.readJob);
};

View File

@ -0,0 +1,17 @@
/**
* Created by WebStorm.
* User: martin
* Date: 28/07/2020
* Time: 11:07
*/
const vote = require('../controllers/vote.controller');
module.exports = (app) => {
app.route('/vote/up/:id')
.put(vote.upvote);
app.route('/vote/down/:id')
.put(vote.downvote);
};

View File

@ -58,7 +58,9 @@ app.use(bodyParser.json());
app.post('/auth', auth.auth);
require('./routes/jobs.route')(app);
require('./routes/jobs.v2.route')(app);
require('./routes/apply.route')(app);
require('./routes/vote.route')(app);
app.listen(serverPort, () => {
console.log(`Server is listening on port ${serverPort}`);

File diff suppressed because one or more lines are too long

View File

@ -20,7 +20,7 @@ const indeedScraper = new IndeedScraper();
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
const page = fs.readFileSync('data/indeed/page2.html');
test.test('Test Indeed scraper', async t => {
test.skip('Test Indeed scraper', async t => {
const $ = cheerio.load(page);
indeedScraper.loadPage($);
@ -35,13 +35,34 @@ test.test('Test Indeed scraper', async t => {
await indeedScraper.filterAdverts();
// await indeedScraper.addToDB();
await indeedScraper.addToMongo();
t.end();
});
test.test('Test full run Indeed scraper', async t => {
test.skip('Test full run Indeed scraper', async t => {
await indeedScraper.go('london');
t.end();
});
test.test('Test Indeed scraper -- MONGO', async t => {
const $ = cheerio.load(page);
indeedScraper.loadPage($);
await indeedScraper.breakPage();
// await indeedScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
// console.log(indeedScraper.items);
// await indeedScraper.filterAdverts();
await indeedScraper.addToMongo();
t.end();
});

View File

@ -26,13 +26,14 @@ const s1jobsScraper = new RssS1Jobs();
const feed = fs.readFileSync('test/data/s1jobs/m7dp711z2r.xml');
test.test('Test Jobserve scraper', async t => {
let url = 'http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml';
await s1jobsScraper.setStartUrl(url);
s1jobsScraper.reduceItems();
await s1jobsScraper.filterAdverts();
await s1jobsScraper.addToDB();
// await s1jobsScraper.addToDB();
t.end();
});

View File

@ -19,17 +19,17 @@ const testScraper = new RssTechnojobs();
const feed = fs.readFileSync('test/data/technojobs/page1');
test.test('Test Technojobs scraper', async t => {
// await testScraper.loadFeed(feed);
await testScraper.loadFeed('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
// testScraper.reduceItems();
await testScraper.reduceItems();
// await s1jobsScraper.filterAdverts();
await s1jobsScraper.filterAdverts();
// await s1jobsScraper.addToDB();
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
/* await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
await testScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1')
*/
t.end();
});

View File

@ -22,20 +22,20 @@ console.log(`${__dirname}`);
const page = fs.readFileSync(`${__dirname}/data/totaljobs/totaljobs-2020-04-16--121504.html`);
test.test('Test Totaljobs scraper', async t => {
const $ = cheerio.load(page);
const $ = cheerio.load(page);
totaljobsScraper.loadPage($);
totaljobsScraper.loadPage($);
await totaljobsScraper.breakPage();
await totaljobsScraper.breakPage();
await totaljobsScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
await totaljobsScraper.getJobPages();
// console.log(await indeedScraper.checkNext());
console.log(totaljobsScraper.items);
// console.log(totaljobsScraper.items);
await totaljobsScraper.filterAdverts();
await totaljobsScraper.filterAdverts();
// await totaljobsScraper.addToDB();
// await totaljobsScraper.addToDB();
t.end();
t.end();
});

14
test/wip.js Normal file
View File

@ -0,0 +1,14 @@
/**
* Created by WebStorm.
* User: martin
* Date: 23/07/2020
* Time: 09:26
*/
const { Corpus } = require('../lib/corpus');
const text = 'ESTAMP DEVELOPER 6 month contract £450-525 / day Developer, SQL, Photoshop, Javascript,  NET, C#, Javascript Advanced knowledge of SQL Server TSQL Experience of the design and  PDF stamp development E-STAMP DEVELOPER 6 month contract';
const out = Corpus.process(text);
console.log(out);

71
testgrabber.js Normal file
View File

@ -0,0 +1,71 @@
/**
* Created by WebStorm.
* User: martin
* Date: 16/04/2020
* Time: 23:35
*/
const CronJob = require('cron').CronJob;
const IndeedScraper = require('./scrapers/indeed');
const TotaljobsScraper = require('./scrapers/totaljobs');
const CwjobsScraper = require('./scrapers/cwjobs');
const JobserveScraper = require('./scrapers/rss.jobserve');
const RssS1Jobs = require('./scrapers/rss.s1jobs');
const RssTechnojobs = require('./scrapers/rss.technojobs');
(async function () {
console.log('Started..');
const indeedScraper = new IndeedScraper();
const totaljobsScraper = new TotaljobsScraper();
const cwjobsScraper = new CwjobsScraper();
const jobserveScraper = new JobserveScraper();
const s1jobsScraper = new RssS1Jobs();
const technojobsScraper = new RssTechnojobs();
await indeedScraper.go('london');
await totaljobsScraper.go('london');
await cwjobsScraper.go('london');
await indeedScraper.go('glasgow');
await totaljobsScraper.go('glasgow');
await cwjobsScraper.go('glasgow');
await indeedScraper.go('edinburgh');
await totaljobsScraper.go('edinburgh');
await cwjobsScraper.go('edinburgh');
await indeedScraper.go('milton keynes');
await totaljobsScraper.go('milton keynes');
await cwjobsScraper.go('milton keynes');
/*
await jobserveScraper.go('https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/9BCBF25C586A0E3F.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/4E2AC50E02AD128B.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6DA9769BA89834AA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/EDF47BEA6B31EF.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/3CAD044BEF2BFA.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/C7B25D86D0844A.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/64A3EEF615FA4C.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/CA49421A86CA3F74.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/846CDA8658FF93A3.rss');
await jobserveScraper.go('https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss'); // javascript node 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/4C67595E323E3453.rss'); // vuejs 2 Jul 2020
await jobserveScraper.go('https://www.jobserve.com/MySearch/DCD6B8CE431FE402.rss'); // svelte 2 Jul 2020
await s1jobsScraper.go('http://www.s1jobs.com/xml/m7dp711z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/pfvf7o7z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/lluqnt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/tu33qt8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/u3btnz8z2r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
*/
})();

1
unused.json Normal file

File diff suppressed because one or more lines are too long

22
words.js Normal file
View File

@ -0,0 +1,22 @@
/**
* Created by WebStorm.
* User: martin
* Date: 27/07/2020
* Time: 10:08
*/
const jsonfile = require('jsonfile');
const data = require('./unused.json');
function show(size) {
const f = data.filter((v) => {
return (v.length === size);
});
jsonfile.writeFileSync('limited.json', [...new Set(f)]);
console.log('done');
}
show(11);