f2880b661e
* Moved to mongo * UI updated to use mongo * UI is a bit fancier now * Import sql to mongo
157 lines
4.1 KiB
JavaScript
157 lines
4.1 KiB
JavaScript
/**
|
|
* Created by WebStorm.
|
|
* User: martin
|
|
* Date: 22/07/2020
|
|
* Time: 10:20
|
|
|
|
*/
|
|
const db = require('./lib/connect');
|
|
const log4js = require('log4js');
|
|
const logger = log4js.getLogger();
|
|
const { Utils } = require('@rakh/utils');
|
|
|
|
const { Corpus } = require('./lib/corpus');
|
|
|
|
const SHA = require('crypto-js/sha256');
|
|
|
|
/*
|
|
|
|
2604
|
|
|
|
const mongoose = require('mongoose');
|
|
const log4js = require('log4js');
|
|
const logger = log4js.getLogger();
|
|
|
|
const Jobs = require('./models/jobs');
|
|
|
|
require('dotenv').config();
|
|
|
|
logger.level = 'debug';
|
|
|
|
logger.debug(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
|
|
|
mongoose.connect(`mongodb://martin:1V3D4m526i@${ process.env.DBHOST }/${ process.env.DBNAME}`);
|
|
|
|
const mDB = mongoose.connection;
|
|
mDB.on('error', console.error.bind(console, 'connection error:'));
|
|
*/
|
|
|
|
const Jobs = require('./lib/mongoManager');
|
|
|
|
const migrate = (function() {
|
|
function analyseRate(inval) {
|
|
let outVal = 0;
|
|
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
|
const clearSpace = /\s+/g;
|
|
|
|
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
|
const resultArray = result.trim().split((' '));
|
|
|
|
if (resultArray.length > 0) {
|
|
const item = parseInt(resultArray[0], 10);
|
|
|
|
if (item < 100) outVal = 0;
|
|
else if ((item > 100) && (item < 5000)) outVal = 1;
|
|
else if (item >= 5000) outVal = 2;
|
|
}
|
|
else return 0;
|
|
|
|
return outVal;
|
|
}
|
|
function reduceData(d) {
|
|
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
|
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
|
|
|
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
|
|
|
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
|
|
|
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
|
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
|
outObj.details.hashed = SHA(outObj.details.summary);
|
|
|
|
// outObj.data.read = d.read || 0;
|
|
outObj.data.read = 0;
|
|
outObj.data.applied = d.applied || 0;
|
|
outObj.data.jobtype = analyseRate(d.salary);
|
|
|
|
outObj.data.autoclass = Corpus.process(d.summary);
|
|
|
|
outObj.data.timestamp = d.timestamp * 1000;
|
|
|
|
return outObj;
|
|
}
|
|
|
|
function getCurrent() {
|
|
const outgoing = [];
|
|
console.log('get version');
|
|
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;';
|
|
|
|
return new Promise((resolve, reject) => {
|
|
db.all(sql, [], (err, rows) => {
|
|
if (err)
|
|
reject(err);
|
|
|
|
rows.forEach((row) => {
|
|
outgoing.push(row);
|
|
});
|
|
|
|
resolve(outgoing) ;
|
|
});
|
|
});
|
|
}
|
|
|
|
async function start() {
|
|
await getCurrent().then(async (d) => {
|
|
logger.debug(d.length);
|
|
|
|
for (let t = 0;t < (d.length - 1);t++) {
|
|
const newD = reduceData(d[t]);
|
|
|
|
// logger.debug(newD);
|
|
|
|
const newJob = Jobs(newD);
|
|
|
|
await newJob.save().then((m) => {
|
|
logger.debug('m', m.details.title);
|
|
}).catch((err) => {
|
|
logger.error(err.keyPattern);
|
|
});
|
|
}
|
|
}).then(() => {
|
|
logger.debug('SAVING!!');
|
|
Corpus.exportUnused();
|
|
})
|
|
.catch((err) => {
|
|
logger.error(err.keyPattern);
|
|
});
|
|
}
|
|
|
|
async function deleteOld() {
|
|
const oneDay = 86400000;
|
|
const twoWeeksAgo = new Date().getTime() - ( 14 * oneDay);
|
|
|
|
logger.debug('Delete older than: ', new Date(twoWeeksAgo), twoWeeksAgo);
|
|
|
|
logger.debug({ 'data.timestamp': { '$lt': twoWeeksAgo } });
|
|
Jobs.deleteMany({ 'data.timestamp': { '$lt': twoWeeksAgo }, 'data.applied': 0 }).then((m) => {
|
|
logger.debug('m', m);
|
|
}).catch((err) => {
|
|
logger.error(err);
|
|
});
|
|
}
|
|
|
|
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
|
|
|
|
return {
|
|
'start':start,
|
|
'deleteOld': deleteOld
|
|
};
|
|
})();
|
|
|
|
(async function() {
|
|
await migrate.start();
|
|
await migrate.deleteOld();
|
|
logger.info('Done??');
|
|
})();
|