Compare commits
1 Commits
developmen
...
dockerisat
Author | SHA1 | Date | |
---|---|---|---|
|
39f399593a |
25
Docker/dev/Dockerfile
Normal file
25
Docker/dev/Dockerfile
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
FROM git.caliban.io/martin/node-python:10
|
||||||
|
#FROM martind2000/node-python3:18
|
||||||
|
ARG VERSION
|
||||||
|
ENV VERSION ${VERSION:-development}
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./Docker/start.sh ./package*.json ./grabber.js ./ecosystem.config.js ./brain.json /app/
|
||||||
|
|
||||||
|
RUN mkdir -p /app/db /app/lib /app/models /app/scrapers /app/server
|
||||||
|
|
||||||
|
# COPY ./src /app/src
|
||||||
|
|
||||||
|
# COPY ./types /app/types
|
||||||
|
|
||||||
|
RUN set -x \
|
||||||
|
&& npm install \
|
||||||
|
&& npm install -g pm2
|
||||||
|
|
||||||
|
RUN chmod +x /app/start.sh
|
||||||
|
|
||||||
|
EXPOSE 8120
|
||||||
|
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/start.sh"]
|
9
Docker/start.sh
Normal file
9
Docker/start.sh
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# npm run dev
|
||||||
|
|
||||||
|
# npm run start
|
||||||
|
|
||||||
|
|
||||||
|
while true; do sleep infinity; done
|
BIN
db/jobs.db
BIN
db/jobs.db
Binary file not shown.
27
docker-compose.yml
Normal file
27
docker-compose.yml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
version: '3.5'
|
||||||
|
|
||||||
|
services:
|
||||||
|
jubilee-src:
|
||||||
|
container_name: jobscraper
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Docker/dev/Dockerfile
|
||||||
|
image: jobscraper
|
||||||
|
logging:
|
||||||
|
options:
|
||||||
|
max-size: '1m'
|
||||||
|
max-file: '5'
|
||||||
|
# restart: always
|
||||||
|
# env_file:
|
||||||
|
# - .env
|
||||||
|
|
||||||
|
ports:
|
||||||
|
- '8120:8120'
|
||||||
|
volumes:
|
||||||
|
- ./db:/app/db
|
||||||
|
- ./lib:/app/lib
|
||||||
|
- ./models:/app/models
|
||||||
|
- ./scrapers:/app/scrapers
|
||||||
|
- ./server:/app/server
|
||||||
|
|
||||||
|
|
10
grabber.js
10
grabber.js
@ -22,7 +22,7 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|||||||
const s1jobsScraper = new RssS1Jobs();
|
const s1jobsScraper = new RssS1Jobs();
|
||||||
const technojobsScraper = new RssTechnojobs();
|
const technojobsScraper = new RssTechnojobs();
|
||||||
|
|
||||||
new CronJob('5 6-23/3 * * *', async function() {
|
/* new CronJob('5 6-23/3 * * *', async function() {
|
||||||
await indeedScraper.go('london');
|
await indeedScraper.go('london');
|
||||||
await totaljobsScraper.go('london');
|
await totaljobsScraper.go('london');
|
||||||
await cwjobsScraper.go('london');
|
await cwjobsScraper.go('london');
|
||||||
@ -35,7 +35,7 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|||||||
await indeedScraper.go('milton keynes');
|
await indeedScraper.go('milton keynes');
|
||||||
await totaljobsScraper.go('milton keynes');
|
await totaljobsScraper.go('milton keynes');
|
||||||
await cwjobsScraper.go('milton keynes');
|
await cwjobsScraper.go('milton keynes');
|
||||||
}, null, true);
|
}, null, true);*/
|
||||||
|
|
||||||
new CronJob('0 6-23/1 * * *', async function() {
|
new CronJob('0 6-23/1 * * *', async function() {
|
||||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
|
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
|
||||||
@ -63,8 +63,8 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
|||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
|
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
|
||||||
|
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||||
}, null, true);
|
}, null, true);
|
||||||
})();
|
})();
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
const filterReject = require('../lib/filter_reject');
|
const filterReject = require('../lib/filter_reject');
|
||||||
const filterAccept = require('../lib/filter_md_jobs');
|
const filterAccept = require('../lib/filter_md_jobs');
|
||||||
const dbmanager = require('../lib/dbmanager');
|
const dbmanager = require('../lib/dbmanager');
|
||||||
const JobsModel = require('../lib/mongoManager');
|
// const JobsModel = require('../lib/mongoManager');
|
||||||
|
|
||||||
const SHA = require('crypto-js/sha256');
|
const SHA = require('crypto-js/sha256');
|
||||||
|
|
||||||
@ -67,7 +67,8 @@ class MasterBase {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
addToMongo() {
|
addToMongo() {
|
||||||
console.log('>> ADD TO MONGO!');
|
console.log('>> no ADD TO MONGO!');
|
||||||
|
return;
|
||||||
|
|
||||||
for(const item of this.items) {
|
for(const item of this.items) {
|
||||||
// console.log('add', item);
|
// console.log('add', item);
|
||||||
|
@ -89,7 +89,7 @@ class MasterRSS extends MasterBase {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
if (this.items.length > 0) await this.addToDB();
|
if (this.items.length > 0) await this.addToDB();
|
||||||
if (this.items.length > 0) await this.addToMongo();
|
// if (this.items.length > 0) await this.addToMongo();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
console.log('No items to process');
|
console.log('No items to process');
|
||||||
|
@ -133,7 +133,7 @@ class IndeedScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
// await this.addToMongo();
|
||||||
}
|
}
|
||||||
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
|
@ -140,7 +140,7 @@ class IndeedMobileScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
// await this.addToMongo();
|
||||||
}
|
}
|
||||||
|
|
||||||
async go(location = 'london') {
|
async go(location = 'london') {
|
||||||
|
@ -146,7 +146,7 @@ class TotaljobsScraper extends MasterScraper {
|
|||||||
await this.filterAdverts();
|
await this.filterAdverts();
|
||||||
|
|
||||||
await this.addToDB();
|
await this.addToDB();
|
||||||
await this.addToMongo();
|
// await this.addToMongo();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -5,7 +5,10 @@
|
|||||||
* Time: 11:08
|
* Time: 11:08
|
||||||
|
|
||||||
*/
|
*/
|
||||||
const Jobs = require('../../lib/mongoManager');
|
// const Jobs = require('../../lib/mongoManager');
|
||||||
|
|
||||||
|
const Jobs = {};
|
||||||
|
|
||||||
const { Utils } = require('@rakh/utils');
|
const { Utils } = require('@rakh/utils');
|
||||||
|
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
@ -19,7 +22,7 @@ var classifier = bayes({
|
|||||||
});
|
});
|
||||||
|
|
||||||
function load() {
|
function load() {
|
||||||
const file = fs.readFileSync('brain.json');
|
const file = fs.readFileSync('/app/brain.json');
|
||||||
|
|
||||||
classifier = bayes.fromJson(file);
|
classifier = bayes.fromJson(file);
|
||||||
}
|
}
|
||||||
|
936
server/dist/build/bundle.js
vendored
936
server/dist/build/bundle.js
vendored
File diff suppressed because one or more lines are too long
@ -58,8 +58,8 @@ app.use(bodyParser.json());
|
|||||||
app.post('/auth', auth.auth);
|
app.post('/auth', auth.auth);
|
||||||
|
|
||||||
require('./routes/jobs.route')(app);
|
require('./routes/jobs.route')(app);
|
||||||
require('./routes/jobs.v2.route')(app);
|
// require('./routes/jobs.v2.route')(app);
|
||||||
require('./routes/apply.v2.route')(app);
|
// require('./routes/apply.v2.route')(app);
|
||||||
require('./routes/vote.route')(app);
|
require('./routes/vote.route')(app);
|
||||||
|
|
||||||
app.listen(serverPort, () => {
|
app.listen(serverPort, () => {
|
||||||
|
214
v2/lib/base.js
Normal file
214
v2/lib/base.js
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
/**
|
||||||
|
* Created by WebStorm.
|
||||||
|
* User: martin
|
||||||
|
* Date: 22/05/2020
|
||||||
|
* Time: 12:01
|
||||||
|
|
||||||
|
*/
|
||||||
|
const filterReject = require('../lib/filter_reject');
|
||||||
|
const filterAccept = require('../lib/filter_md_jobs');
|
||||||
|
const dbmanager = require('../lib/dbmanager');
|
||||||
|
// const JobsModel = require('../lib/mongoManager');
|
||||||
|
|
||||||
|
const SHA = require('crypto-js/sha256');
|
||||||
|
|
||||||
|
const { Utils } = require('@rakh/utils');
|
||||||
|
const { Corpus } = require('./corpus');
|
||||||
|
|
||||||
|
class MasterBase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
constructor() {
|
||||||
|
this.url = '';
|
||||||
|
this.items = [];
|
||||||
|
this.currentPage = null;
|
||||||
|
this.hosturl = '';
|
||||||
|
this.siteid = '';
|
||||||
|
this.useStone = false;
|
||||||
|
this.saveFile = false;
|
||||||
|
this.requestOptions = {
|
||||||
|
'url' : '',
|
||||||
|
'proxy' : 'http://uk.proxymesh.com:31280',
|
||||||
|
'tunnel' : true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @returns {{summary: string, site: string, postDate: string, location: string, company: string, id: string, title: string, isEasyApply: boolean, salary: string, url: string, timestamp: number}}
|
||||||
|
*/
|
||||||
|
newRecord() {
|
||||||
|
const now = ~~(new Date().getTime() / 1000.0);
|
||||||
|
|
||||||
|
return { 'title': '', 'site': this.siteid || '', 'url':'', 'id':'', 'summary':'', 'postDate':'', 'isEasyApply':false, 'location': '', 'company': '', 'salary': '', 'timestamp':now };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async addToDB() {
|
||||||
|
for(const item of this.items)
|
||||||
|
// console.log(item);
|
||||||
|
|
||||||
|
dbmanager.insertOne(item)
|
||||||
|
.then((data) => {
|
||||||
|
console.log(data);
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
console.error(`${this.siteid} db error`);
|
||||||
|
console.error(err.message || 'Some error occurred while querying the database.');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
addToMongo() {
|
||||||
|
console.log('>> no ADD TO MONGO!');
|
||||||
|
return;
|
||||||
|
|
||||||
|
for(const item of this.items) {
|
||||||
|
// console.log('add', item);
|
||||||
|
const newObj = this.reduceData(item);
|
||||||
|
const newJob = new JobsModel(newObj);
|
||||||
|
|
||||||
|
newJob.save().then((m) => {
|
||||||
|
console.log('m', m.details.title);
|
||||||
|
}).catch((err) => {
|
||||||
|
console.error('m', err);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param inval
|
||||||
|
* @returns {number}
|
||||||
|
*/
|
||||||
|
analyseRate(inval) {
|
||||||
|
console.log('analyseRate', inval);
|
||||||
|
let outVal = 0;
|
||||||
|
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
||||||
|
const clearSpace = /\s+/g;
|
||||||
|
|
||||||
|
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
||||||
|
const resultArray = result.trim().split((' '));
|
||||||
|
|
||||||
|
if (resultArray.length > 0) {
|
||||||
|
const item = parseInt(resultArray[0], 10);
|
||||||
|
|
||||||
|
if (item < 100) outVal = 0;
|
||||||
|
else if ((item > 100) && (item < 5000)) outVal = 1;
|
||||||
|
else if (item >= 5000) outVal = 2;
|
||||||
|
}
|
||||||
|
else return 0;
|
||||||
|
|
||||||
|
return outVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param d
|
||||||
|
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
|
||||||
|
*/
|
||||||
|
reduceData(d) {
|
||||||
|
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
||||||
|
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
||||||
|
|
||||||
|
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
||||||
|
|
||||||
|
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
||||||
|
|
||||||
|
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
||||||
|
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
||||||
|
outObj.details.hashed = SHA(outObj.details.summary);
|
||||||
|
|
||||||
|
outObj.data.read = 0;
|
||||||
|
outObj.data.applied = d.applied || 0;
|
||||||
|
|
||||||
|
outObj.data.jobtype = this.analyseRate(d.salary);
|
||||||
|
outObj.data.autoclass = Corpus.process(d.summary);
|
||||||
|
|
||||||
|
outObj.data.timestamp = d.timestamp * 1000;
|
||||||
|
|
||||||
|
return outObj;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async filterAdverts() {
|
||||||
|
console.log('>> FilterAdverts');
|
||||||
|
console.log(`Currently ${this.items.length} items...`);
|
||||||
|
|
||||||
|
this.items = this.items.filter(filterReject);
|
||||||
|
|
||||||
|
console.log(`After reject ${this.items.length} items...`);
|
||||||
|
|
||||||
|
this.items = this.items.filter(filterAccept);
|
||||||
|
|
||||||
|
console.log(`After accept ${this.items.length} items...`);
|
||||||
|
|
||||||
|
// console.log(this.items);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param newUrl
|
||||||
|
*/
|
||||||
|
setStartUrl(newUrl) {
|
||||||
|
this.url = newUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param page
|
||||||
|
*/
|
||||||
|
loadPage(page) {
|
||||||
|
this.currentPage = page;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param appended
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
makeUrl(appended) {
|
||||||
|
return `https://${ this.siteurl }${appended}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param appended
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
makeProxyUrl(appended) {
|
||||||
|
return `https://${ this.siteurl }${appended}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param url
|
||||||
|
* @param q
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
makeImg(url, q = 75) {
|
||||||
|
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async go() {
|
||||||
|
this.items = [];
|
||||||
|
this.rawItems = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = MasterBase;
|
16
v2/lib/filter_md_jobs.js
Normal file
16
v2/lib/filter_md_jobs.js
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
module.exports = function (item) {
|
||||||
|
const patt = /(full\s?stack|front\s?end|html|html5|es6|react|knockout|ember|vue|riotjs|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig;
|
||||||
|
const result = patt.test(item.title);
|
||||||
|
const resultB = patt.test(item.summary);
|
||||||
|
|
||||||
|
console.log('My Filter:', (result || resultB === true) ? 'Pass' : 'Reject');
|
||||||
|
|
||||||
|
/* if (!(result || resultB === true)) {
|
||||||
|
console.log('Result', result);
|
||||||
|
console.log('ResultB', resultB);
|
||||||
|
console.log(item);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
return (result || resultB === true) ;
|
||||||
|
|
||||||
|
};
|
17
v2/lib/filter_reject.js
Normal file
17
v2/lib/filter_reject.js
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
module.exports = function (item) {
|
||||||
|
const patt = /(Simply Education|Splunk|Coordinators?|Teachers?|Technical Writers?|Data Analyst|WebLogic|WebSphere|Data Scientist|Change Managers?|T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)/ig;
|
||||||
|
|
||||||
|
const engineers = /(Support|Devops|Planning|security|Postgresql|network|sccm|test|data|imac|firewall|vmware)+(?:\s)(?=Engineer)/ig;
|
||||||
|
const developers = /(Big Data|Java Server Side|Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|c#|c\+\+|\.net|bi|go lang|Python)+(?:\s)(?=Developer)/ig;
|
||||||
|
const architects = /(Java|PHP|Microsoft)+(?:\s)(?=Architect)/ig;
|
||||||
|
|
||||||
|
const antiAd = /sja\d+/gi;
|
||||||
|
|
||||||
|
const result = patt.test(item.summary) || engineers.test(item.summary) || developers.test(item.summary) || architects.test(item.summary);
|
||||||
|
const resultB = patt.test(item.title) || engineers.test(item.title) || developers.test(item.title) || architects.test(item.title);
|
||||||
|
const resultC = antiAd.test(item.id);
|
||||||
|
|
||||||
|
console.log('Reject:', (result || resultB || resultC === true) ? 'Reject' : 'Pass');
|
||||||
|
|
||||||
|
return (!(result || resultB || resultC === true));
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user