/** * Created by WebStorm. * User: martin * Date: 22/07/2020 * Time: 10:20 */ const db = require('./lib/connect'); const log4js = require('log4js'); const logger = log4js.getLogger(); const { Utils } = require('@rakh/utils'); const { Corpus } = require('./lib/corpus'); const SHA = require('crypto-js/sha256'); /* 2604 const mongoose = require('mongoose'); const log4js = require('log4js'); const logger = log4js.getLogger(); const Jobs = require('./models/jobs'); require('dotenv').config(); logger.level = 'debug'; logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`); mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`); const mDB = mongoose.connection; mDB.on('error', console.error.bind(console, 'connection error:')); */ const Jobs = require('./lib/mongoManager'); const migrate = (function() { function analyseRate(inval) { let outVal = 0; const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi; const clearSpace = /\s+/g; const result = inval.replace(cleanerReg, '').replace(clearSpace, ' '); const resultArray = result.trim().split((' ')); if (resultArray.length > 0) { const item = parseInt(resultArray[0], 10); if (item < 100) outVal = 0; else if ((item > 100) && (item < 5000)) outVal = 1; else if (item >= 5000) outVal = 2; } else return 0; return outVal; } function reduceData(d) { const clearPremium = /(\n+)(Featured|Premium)/gi; const otherStupid = /((↵\s+)+)(Featured|Premium)/gi; const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } }; outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']); outObj.details.title = outObj.details.title.replace(clearPremium, ''); outObj.details.title = outObj.details.title.replace(otherStupid, ''); outObj.details.hashed = SHA(outObj.details.summary); // outObj.data.read = d.read || 0; outObj.data.read = 0; outObj.data.applied = d.applied || 0; outObj.data.jobtype = analyseRate(d.salary); outObj.data.autoclass = Corpus.process(d.summary); outObj.data.timestamp = d.timestamp * 1000; return outObj; } function getCurrent() { const outgoing = []; console.log('get version'); const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;'; return new Promise((resolve, reject) => { db.all(sql, [], (err, rows) => { if (err) reject(err); rows.forEach((row) => { outgoing.push(row); }); resolve(outgoing) ; }); }); } async function start() { await getCurrent().then(async (d) => { logger.debug(d.length); for (let t = 0;t < (d.length - 1);t++) { const newD = reduceData(d[t]); // logger.debug(newD); const newJob = Jobs(newD); await newJob.save().then((m) => { logger.debug('m', m.details.title); }).catch((err) => { logger.error(err.keyPattern); }); } }).then(() => { logger.debug('SAVING!!'); Corpus.exportUnused(); }) .catch((err) => { logger.error(err.keyPattern); }); } async function deleteOld() { const oneDay = 86400000; const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay); logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo); logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } }); Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => { logger.debug('m', m); }).catch((err) => { logger.error(err); }); } // newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } }); return { 'start':start, 'deleteOld': deleteOld }; })(); (async function() { await migrate.start(); await migrate.deleteOld(); logger.info('Done??'); })();