/** * Created by WebStorm. * User: martin * Date: 22/05/2020 * Time: 12:01 */ const filterReject = require('../lib/filter_reject'); const filterAccept = require('../lib/filter_md_jobs'); const dbmanager = require('../lib/dbmanager'); // const JobsModel = require('../lib/mongoManager'); const SHA = require('crypto-js/sha256'); const { Utils } = require('@rakh/utils'); const { Corpus } = require('./corpus'); class MasterBase { /** * */ constructor() { this.url = ''; this.items = []; this.currentPage = null; this.hosturl = ''; this.siteid = ''; this.useStone = false; this.saveFile = false; this.requestOptions = { 'url' : '', 'proxy' : 'http://uk.proxymesh.com:31280', 'tunnel' : true }; } /** * * @returns {{summary: string, site: string, postDate: string, location: string, company: string, id: string, title: string, isEasyApply: boolean, salary: string, url: string, timestamp: number}} */ newRecord() { const now = ~~(new Date().getTime() / 1000.0); return { 'title': '', 'site': this.siteid || '', 'url':'', 'id':'', 'summary':'', 'postDate':'', 'isEasyApply':false, 'location': '', 'company': '', 'salary': '', 'timestamp':now }; } /** * * @returns {Promise} */ async addToDB() { for(const item of this.items) // console.log(item); dbmanager.insertOne(item) .then((data) => { console.log(data); }) .catch((err) => { console.error(`${this.siteid} db error`); console.error(err.message || 'Some error occurred while querying the database.'); }); } /** * */ addToMongo() { console.log('>> no ADD TO MONGO!'); return; for(const item of this.items) { // console.log('add', item); const newObj = this.reduceData(item); const newJob = new JobsModel(newObj); newJob.save().then((m) => { console.log('m', m.details.title); }).catch((err) => { console.error('m', err); }); } } /** * * @param inval * @returns {number} */ analyseRate(inval) { console.log('analyseRate', inval); let outVal = 0; const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi; const clearSpace = /\s+/g; const result = inval.replace(cleanerReg, '').replace(clearSpace, ' '); const resultArray = result.trim().split((' ')); if (resultArray.length > 0) { const item = parseInt(resultArray[0], 10); if (item < 100) outVal = 0; else if ((item > 100) && (item < 5000)) outVal = 1; else if (item >= 5000) outVal = 2; } else return 0; return outVal; } /** * * @param d * @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}} */ reduceData(d) { const clearPremium = /(\n+)(Featured|Premium)/gi; const otherStupid = /((↵\s+)+)(Featured|Premium)/gi; const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } }; outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']); outObj.details.title = outObj.details.title.replace(clearPremium, ''); outObj.details.title = outObj.details.title.replace(otherStupid, ''); outObj.details.hashed = SHA(outObj.details.summary); outObj.data.read = 0; outObj.data.applied = d.applied || 0; outObj.data.jobtype = this.analyseRate(d.salary); outObj.data.autoclass = Corpus.process(d.summary); outObj.data.timestamp = d.timestamp * 1000; return outObj; } /** * * @returns {Promise} */ async filterAdverts() { console.log('>> FilterAdverts'); console.log(`Currently ${this.items.length} items...`); this.items = this.items.filter(filterReject); console.log(`After reject ${this.items.length} items...`); this.items = this.items.filter(filterAccept); console.log(`After accept ${this.items.length} items...`); // console.log(this.items); } /** * * @param newUrl */ setStartUrl(newUrl) { this.url = newUrl; } /** * * @param page */ loadPage(page) { this.currentPage = page; } /** * * @param appended * @returns {string} */ makeUrl(appended) { return `https://${ this.siteurl }${appended}`; } /** * * @param appended * @returns {string} */ makeProxyUrl(appended) { return `https://${ this.siteurl }${appended}`; } /** * * @param url * @param q * @returns {string} */ makeImg(url, q = 75) { return `https://image.silvrtree.co.uk/q${q}/${url}`; } /** * * @returns {Promise} */ async go() { this.items = []; this.rawItems = []; } } module.exports = MasterBase;