2020-08-24 08:35:30 +00:00
/ * *
* Created by WebStorm .
* User : martin
* Date : 22 / 07 / 2020
* Time : 10 : 20
* /
const db = require ( './lib/connect' ) ;
const log4js = require ( 'log4js' ) ;
const logger = log4js . getLogger ( ) ;
const { Utils } = require ( '@rakh/utils' ) ;
const { Corpus } = require ( './lib/corpus' ) ;
2020-09-10 13:13:08 +00:00
const SHA = require ( 'crypto-js/sha256' ) ;
2020-08-24 08:35:30 +00:00
/ *
2020-09-10 13:13:08 +00:00
2604
2020-08-24 08:35:30 +00:00
const mongoose = require ( 'mongoose' ) ;
const log4js = require ( 'log4js' ) ;
const logger = log4js . getLogger ( ) ;
const Jobs = require ( './models/jobs' ) ;
require ( 'dotenv' ) . config ( ) ;
logger . level = 'debug' ;
logger . debug ( ` mongodb://martin:1V3D4m526i@ ${ process . env . DBHOST } / ${ process . env . DBNAME } ` ) ;
mongoose . connect ( ` mongodb://martin:1V3D4m526i@ ${ process . env . DBHOST } / ${ process . env . DBNAME } ` ) ;
const mDB = mongoose . connection ;
mDB . on ( 'error' , console . error . bind ( console , 'connection error:' ) ) ;
* /
const Jobs = require ( './lib/mongoManager' ) ;
const migrate = ( function ( ) {
function analyseRate ( inval ) {
let outVal = 0 ;
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi ;
const clearSpace = /\s+/g ;
const result = inval . replace ( cleanerReg , '' ) . replace ( clearSpace , ' ' ) ;
const resultArray = result . trim ( ) . split ( ( ' ' ) ) ;
if ( resultArray . length > 0 ) {
const item = parseInt ( resultArray [ 0 ] , 10 ) ;
if ( item < 100 ) outVal = 0 ;
else if ( ( item > 100 ) && ( item < 5000 ) ) outVal = 1 ;
else if ( item >= 5000 ) outVal = 2 ;
}
2020-09-10 13:13:08 +00:00
else return 0 ;
2020-08-24 08:35:30 +00:00
return outVal ;
}
function reduceData ( d ) {
const clearPremium = /(\n+)(Featured|Premium)/gi ;
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi ;
const outObj = { 'details' : { } , 'data' : { 'read' : 0 , 'applied' : 0 , 'jobtype' : 0 , 'class' : 0 , 'autoclass' : 0 } } ;
outObj . details = Utils . extractFromObj ( d , [ 'title' , 'site' , 'url' , 'id' , 'summary' , 'company' , 'location' , 'postdate' , 'salary' , 'easyapply' , 'timestamp' ] ) ;
outObj . details . title = outObj . details . title . replace ( clearPremium , '' ) ;
outObj . details . title = outObj . details . title . replace ( otherStupid , '' ) ;
2020-09-10 13:13:08 +00:00
outObj . details . hashed = SHA ( outObj . details . summary ) ;
2020-08-24 08:35:30 +00:00
// outObj.data.read = d.read || 0;
outObj . data . read = 0 ;
outObj . data . applied = d . applied || 0 ;
outObj . data . jobtype = analyseRate ( d . salary ) ;
outObj . data . autoclass = Corpus . process ( d . summary ) ;
outObj . data . timestamp = d . timestamp * 1000 ;
return outObj ;
}
function getCurrent ( ) {
const outgoing = [ ] ;
console . log ( 'get version' ) ;
const sql = 'select jobs.*, applied.a as applied, read.d as read from jobs left join applied on applied.aid = jobs._id left join read on read.rid = jobs._id order by _id asc;' ;
return new Promise ( ( resolve , reject ) => {
db . all ( sql , [ ] , ( err , rows ) => {
if ( err )
reject ( err ) ;
rows . forEach ( ( row ) => {
outgoing . push ( row ) ;
} ) ;
resolve ( outgoing ) ;
} ) ;
} ) ;
}
2020-09-10 13:13:08 +00:00
async function start ( ) {
await getCurrent ( ) . then ( async ( d ) => {
2020-08-24 08:35:30 +00:00
logger . debug ( d . length ) ;
for ( let t = 0 ; t < ( d . length - 1 ) ; t ++ ) {
const newD = reduceData ( d [ t ] ) ;
// logger.debug(newD);
const newJob = Jobs ( newD ) ;
2020-09-10 13:13:08 +00:00
await newJob . save ( ) . then ( ( m ) => {
2020-08-24 08:35:30 +00:00
logger . debug ( 'm' , m . details . title ) ;
} ) . catch ( ( err ) => {
2020-09-10 13:13:08 +00:00
logger . error ( err . keyPattern ) ;
2020-08-24 08:35:30 +00:00
} ) ;
}
} ) . then ( ( ) => {
logger . debug ( 'SAVING!!' ) ;
Corpus . exportUnused ( ) ;
} )
. catch ( ( err ) => {
2020-09-10 13:13:08 +00:00
logger . error ( err . keyPattern ) ;
2020-08-24 08:35:30 +00:00
} ) ;
}
2020-09-10 13:13:08 +00:00
async function deleteOld ( ) {
const oneDay = 86400000 ;
const twoWeeksAgo = new Date ( ) . getTime ( ) - ( 14 * oneDay ) ;
logger . debug ( 'Delete older than: ' , new Date ( twoWeeksAgo ) , twoWeeksAgo ) ;
logger . debug ( { 'data.timestamp' : { '$lt' : twoWeeksAgo } } ) ;
Jobs . deleteMany ( { 'data.timestamp' : { '$lt' : twoWeeksAgo } , 'data.applied' : 0 } ) . then ( ( m ) => {
logger . debug ( 'm' , m ) ;
} ) . catch ( ( err ) => {
logger . error ( err ) ;
} ) ;
}
// newJob.find({ 'data': { 'timestamp': { '$lt': 1587034346000 } } });
2020-08-24 08:35:30 +00:00
return {
2020-09-10 13:13:08 +00:00
'start' : start ,
'deleteOld' : deleteOld
2020-08-24 08:35:30 +00:00
} ;
} ) ( ) ;
2020-09-10 13:13:08 +00:00
( async function ( ) {
await migrate . start ( ) ;
await migrate . deleteOld ( ) ;
logger . info ( 'Done??' ) ;
} ) ( ) ;