2020-05-22 22:40:33 +00:00
|
|
|
/**
|
|
|
|
* Created by WebStorm.
|
|
|
|
* User: martin
|
|
|
|
* Date: 22/05/2020
|
|
|
|
* Time: 12:01
|
|
|
|
|
|
|
|
*/
|
|
|
|
const filterReject = require('../lib/filter_reject');
|
|
|
|
const filterAccept = require('../lib/filter_md_jobs');
|
|
|
|
const dbmanager = require('../lib/dbmanager');
|
2020-09-01 11:44:42 +00:00
|
|
|
const JobsModel = require('../lib/mongoManager');
|
2020-08-24 08:35:30 +00:00
|
|
|
|
2020-09-10 13:13:08 +00:00
|
|
|
const SHA = require('crypto-js/sha256');
|
|
|
|
|
2020-08-24 08:35:30 +00:00
|
|
|
const { Utils } = require('@rakh/utils');
|
|
|
|
const { Corpus } = require('./corpus');
|
2020-05-22 22:40:33 +00:00
|
|
|
|
|
|
|
class MasterBase {
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
constructor() {
|
|
|
|
this.url = '';
|
|
|
|
this.items = [];
|
|
|
|
this.currentPage = null;
|
|
|
|
this.hosturl = '';
|
|
|
|
this.siteid = '';
|
|
|
|
this.useStone = false;
|
2020-07-21 11:05:01 +00:00
|
|
|
this.saveFile = false;
|
2020-05-22 22:40:33 +00:00
|
|
|
this.requestOptions = {
|
|
|
|
'url' : '',
|
|
|
|
'proxy' : 'http://uk.proxymesh.com:31280',
|
|
|
|
'tunnel' : true
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @returns {{summary: string, site: string, postDate: string, location: string, company: string, id: string, title: string, isEasyApply: boolean, salary: string, url: string, timestamp: number}}
|
|
|
|
*/
|
|
|
|
newRecord() {
|
|
|
|
const now = ~~(new Date().getTime() / 1000.0);
|
|
|
|
|
|
|
|
return { 'title': '', 'site': this.siteid || '', 'url':'', 'id':'', 'summary':'', 'postDate':'', 'isEasyApply':false, 'location': '', 'company': '', 'salary': '', 'timestamp':now };
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
*/
|
|
|
|
async addToDB() {
|
|
|
|
for(const item of this.items)
|
|
|
|
// console.log(item);
|
|
|
|
|
|
|
|
dbmanager.insertOne(item)
|
|
|
|
.then((data) => {
|
|
|
|
console.log(data);
|
|
|
|
})
|
|
|
|
.catch((err) => {
|
2020-07-21 11:05:01 +00:00
|
|
|
console.error(`${this.siteid} db error`);
|
2020-05-22 22:40:33 +00:00
|
|
|
console.error(err.message || 'Some error occurred while querying the database.');
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-01 11:44:42 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
2020-08-24 08:35:30 +00:00
|
|
|
addToMongo() {
|
|
|
|
console.log('>> ADD TO MONGO!');
|
|
|
|
|
|
|
|
for(const item of this.items) {
|
|
|
|
// console.log('add', item);
|
|
|
|
const newObj = this.reduceData(item);
|
2020-09-01 11:44:42 +00:00
|
|
|
const newJob = new JobsModel(newObj);
|
2020-08-24 08:35:30 +00:00
|
|
|
|
|
|
|
newJob.save().then((m) => {
|
|
|
|
console.log('m', m.details.title);
|
|
|
|
}).catch((err) => {
|
2020-09-10 13:13:08 +00:00
|
|
|
console.error('m', err);
|
2020-08-24 08:35:30 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-01 11:44:42 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param inval
|
|
|
|
* @returns {number}
|
|
|
|
*/
|
2020-08-24 08:35:30 +00:00
|
|
|
analyseRate(inval) {
|
|
|
|
console.log('analyseRate', inval);
|
|
|
|
let outVal = 0;
|
|
|
|
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
|
|
|
const clearSpace = /\s+/g;
|
|
|
|
|
|
|
|
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
|
|
|
const resultArray = result.trim().split((' '));
|
|
|
|
|
|
|
|
if (resultArray.length > 0) {
|
|
|
|
const item = parseInt(resultArray[0], 10);
|
|
|
|
|
|
|
|
if (item < 100) outVal = 0;
|
|
|
|
else if ((item > 100) && (item < 5000)) outVal = 1;
|
|
|
|
else if (item >= 5000) outVal = 2;
|
|
|
|
}
|
|
|
|
else return 0;
|
|
|
|
|
|
|
|
return outVal;
|
|
|
|
}
|
|
|
|
|
2020-09-01 11:44:42 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param d
|
|
|
|
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
|
|
|
|
*/
|
2020-08-24 08:35:30 +00:00
|
|
|
reduceData(d) {
|
2020-09-10 13:13:08 +00:00
|
|
|
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
|
|
|
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
|
|
|
|
2020-08-24 08:35:30 +00:00
|
|
|
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
|
|
|
|
|
|
|
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
|
|
|
|
2020-09-10 13:13:08 +00:00
|
|
|
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
|
|
|
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
|
|
|
outObj.details.hashed = SHA(outObj.details.summary);
|
|
|
|
|
|
|
|
outObj.data.read = 0;
|
|
|
|
outObj.data.applied = d.applied || 0;
|
|
|
|
|
2020-08-24 08:35:30 +00:00
|
|
|
outObj.data.jobtype = this.analyseRate(d.salary);
|
|
|
|
outObj.data.autoclass = Corpus.process(d.summary);
|
|
|
|
|
|
|
|
outObj.data.timestamp = d.timestamp * 1000;
|
|
|
|
|
|
|
|
return outObj;
|
|
|
|
}
|
|
|
|
|
2020-05-22 22:40:33 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
*/
|
|
|
|
async filterAdverts() {
|
|
|
|
console.log('>> FilterAdverts');
|
|
|
|
console.log(`Currently ${this.items.length} items...`);
|
|
|
|
|
|
|
|
this.items = this.items.filter(filterReject);
|
|
|
|
|
|
|
|
console.log(`After reject ${this.items.length} items...`);
|
|
|
|
|
|
|
|
this.items = this.items.filter(filterAccept);
|
|
|
|
|
|
|
|
console.log(`After accept ${this.items.length} items...`);
|
|
|
|
|
|
|
|
// console.log(this.items);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param newUrl
|
|
|
|
*/
|
|
|
|
setStartUrl(newUrl) {
|
|
|
|
this.url = newUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param page
|
|
|
|
*/
|
|
|
|
loadPage(page) {
|
|
|
|
this.currentPage = page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param appended
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
makeUrl(appended) {
|
|
|
|
return `https://${ this.siteurl }${appended}`;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param appended
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
makeProxyUrl(appended) {
|
|
|
|
return `https://${ this.siteurl }${appended}`;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param url
|
|
|
|
* @param q
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
makeImg(url, q = 75) {
|
|
|
|
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
|
|
|
}
|
|
|
|
|
2020-09-01 11:44:42 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
*/
|
2020-05-22 22:40:33 +00:00
|
|
|
async go() {
|
|
|
|
this.items = [];
|
|
|
|
this.rawItems = [];
|
|
|
|
}
|
2020-08-24 08:35:30 +00:00
|
|
|
|
2020-05-22 22:40:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = MasterBase;
|