Dockerising the service
This commit is contained in:
parent
91a51d7fda
commit
39f399593a
25
Docker/dev/Dockerfile
Normal file
25
Docker/dev/Dockerfile
Normal file
@ -0,0 +1,25 @@
|
||||
FROM git.caliban.io/martin/node-python:10
|
||||
#FROM martind2000/node-python3:18
|
||||
ARG VERSION
|
||||
ENV VERSION ${VERSION:-development}
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./Docker/start.sh ./package*.json ./grabber.js ./ecosystem.config.js ./brain.json /app/
|
||||
|
||||
RUN mkdir -p /app/db /app/lib /app/models /app/scrapers /app/server
|
||||
|
||||
# COPY ./src /app/src
|
||||
|
||||
# COPY ./types /app/types
|
||||
|
||||
RUN set -x \
|
||||
&& npm install \
|
||||
&& npm install -g pm2
|
||||
|
||||
RUN chmod +x /app/start.sh
|
||||
|
||||
EXPOSE 8120
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/start.sh"]
|
9
Docker/start.sh
Normal file
9
Docker/start.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
# npm run dev
|
||||
|
||||
# npm run start
|
||||
|
||||
|
||||
while true; do sleep infinity; done
|
BIN
db/jobs.db
BIN
db/jobs.db
Binary file not shown.
27
docker-compose.yml
Normal file
27
docker-compose.yml
Normal file
@ -0,0 +1,27 @@
|
||||
version: '3.5'
|
||||
|
||||
services:
|
||||
jubilee-src:
|
||||
container_name: jobscraper
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Docker/dev/Dockerfile
|
||||
image: jobscraper
|
||||
logging:
|
||||
options:
|
||||
max-size: '1m'
|
||||
max-file: '5'
|
||||
# restart: always
|
||||
# env_file:
|
||||
# - .env
|
||||
|
||||
ports:
|
||||
- '8120:8120'
|
||||
volumes:
|
||||
- ./db:/app/db
|
||||
- ./lib:/app/lib
|
||||
- ./models:/app/models
|
||||
- ./scrapers:/app/scrapers
|
||||
- ./server:/app/server
|
||||
|
||||
|
10
grabber.js
10
grabber.js
@ -22,7 +22,7 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
const s1jobsScraper = new RssS1Jobs();
|
||||
const technojobsScraper = new RssTechnojobs();
|
||||
|
||||
new CronJob('5 6-23/3 * * *', async function() {
|
||||
/* new CronJob('5 6-23/3 * * *', async function() {
|
||||
await indeedScraper.go('london');
|
||||
await totaljobsScraper.go('london');
|
||||
await cwjobsScraper.go('london');
|
||||
@ -35,7 +35,7 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
await indeedScraper.go('milton keynes');
|
||||
await totaljobsScraper.go('milton keynes');
|
||||
await cwjobsScraper.go('milton keynes');
|
||||
}, null, true);
|
||||
}, null, true);*/
|
||||
|
||||
new CronJob('0 6-23/1 * * *', async function() {
|
||||
await jobserveScraper.go('https://www.jobserve.com/MySearch/D48462060FB24B6C.rss');
|
||||
@ -63,8 +63,8 @@ const RssTechnojobs = require('./scrapers/rss.technojobs');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml');
|
||||
await s1jobsScraper.go('http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml');*/
|
||||
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationglasgow/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationLONDON/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
// await technojobsScraper.go('https://www.technojobs.co.uk/rss.php/html%20OR%20node%20OR%20web%20OR%20sql%20OR%20delphi%20OR%20javascript%20OR%20ajax/excludekeywords/locationMilton%20Keynes/radius25/termsin0/salary0/postedwithinall/jobtypeall/searchfieldRSearchIndex/page1');
|
||||
}, null, true);
|
||||
})();
|
||||
|
@ -8,7 +8,7 @@
|
||||
const filterReject = require('../lib/filter_reject');
|
||||
const filterAccept = require('../lib/filter_md_jobs');
|
||||
const dbmanager = require('../lib/dbmanager');
|
||||
const JobsModel = require('../lib/mongoManager');
|
||||
// const JobsModel = require('../lib/mongoManager');
|
||||
|
||||
const SHA = require('crypto-js/sha256');
|
||||
|
||||
@ -67,7 +67,8 @@ class MasterBase {
|
||||
*
|
||||
*/
|
||||
addToMongo() {
|
||||
console.log('>> ADD TO MONGO!');
|
||||
console.log('>> no ADD TO MONGO!');
|
||||
return;
|
||||
|
||||
for(const item of this.items) {
|
||||
// console.log('add', item);
|
||||
|
@ -89,7 +89,7 @@ class MasterRSS extends MasterBase {
|
||||
await this.filterAdverts();
|
||||
|
||||
if (this.items.length > 0) await this.addToDB();
|
||||
if (this.items.length > 0) await this.addToMongo();
|
||||
// if (this.items.length > 0) await this.addToMongo();
|
||||
}
|
||||
else
|
||||
console.log('No items to process');
|
||||
|
@ -133,7 +133,7 @@ class IndeedScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
// await this.addToMongo();
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
|
@ -140,7 +140,7 @@ class IndeedMobileScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
// await this.addToMongo();
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
|
@ -146,7 +146,7 @@ class TotaljobsScraper extends MasterScraper {
|
||||
await this.filterAdverts();
|
||||
|
||||
await this.addToDB();
|
||||
await this.addToMongo();
|
||||
// await this.addToMongo();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5,7 +5,10 @@
|
||||
* Time: 11:08
|
||||
|
||||
*/
|
||||
const Jobs = require('../../lib/mongoManager');
|
||||
// const Jobs = require('../../lib/mongoManager');
|
||||
|
||||
const Jobs = {};
|
||||
|
||||
const { Utils } = require('@rakh/utils');
|
||||
|
||||
const fs = require('fs');
|
||||
@ -19,7 +22,7 @@ var classifier = bayes({
|
||||
});
|
||||
|
||||
function load() {
|
||||
const file = fs.readFileSync('brain.json');
|
||||
const file = fs.readFileSync('/app/brain.json');
|
||||
|
||||
classifier = bayes.fromJson(file);
|
||||
}
|
||||
|
936
server/dist/build/bundle.js
vendored
936
server/dist/build/bundle.js
vendored
File diff suppressed because one or more lines are too long
@ -58,8 +58,8 @@ app.use(bodyParser.json());
|
||||
app.post('/auth', auth.auth);
|
||||
|
||||
require('./routes/jobs.route')(app);
|
||||
require('./routes/jobs.v2.route')(app);
|
||||
require('./routes/apply.v2.route')(app);
|
||||
// require('./routes/jobs.v2.route')(app);
|
||||
// require('./routes/apply.v2.route')(app);
|
||||
require('./routes/vote.route')(app);
|
||||
|
||||
app.listen(serverPort, () => {
|
||||
|
214
v2/lib/base.js
Normal file
214
v2/lib/base.js
Normal file
@ -0,0 +1,214 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 22/05/2020
|
||||
* Time: 12:01
|
||||
|
||||
*/
|
||||
const filterReject = require('../lib/filter_reject');
|
||||
const filterAccept = require('../lib/filter_md_jobs');
|
||||
const dbmanager = require('../lib/dbmanager');
|
||||
// const JobsModel = require('../lib/mongoManager');
|
||||
|
||||
const SHA = require('crypto-js/sha256');
|
||||
|
||||
const { Utils } = require('@rakh/utils');
|
||||
const { Corpus } = require('./corpus');
|
||||
|
||||
class MasterBase {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
constructor() {
|
||||
this.url = '';
|
||||
this.items = [];
|
||||
this.currentPage = null;
|
||||
this.hosturl = '';
|
||||
this.siteid = '';
|
||||
this.useStone = false;
|
||||
this.saveFile = false;
|
||||
this.requestOptions = {
|
||||
'url' : '',
|
||||
'proxy' : 'http://uk.proxymesh.com:31280',
|
||||
'tunnel' : true
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {{summary: string, site: string, postDate: string, location: string, company: string, id: string, title: string, isEasyApply: boolean, salary: string, url: string, timestamp: number}}
|
||||
*/
|
||||
newRecord() {
|
||||
const now = ~~(new Date().getTime() / 1000.0);
|
||||
|
||||
return { 'title': '', 'site': this.siteid || '', 'url':'', 'id':'', 'summary':'', 'postDate':'', 'isEasyApply':false, 'location': '', 'company': '', 'salary': '', 'timestamp':now };
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async addToDB() {
|
||||
for(const item of this.items)
|
||||
// console.log(item);
|
||||
|
||||
dbmanager.insertOne(item)
|
||||
.then((data) => {
|
||||
console.log(data);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error(`${this.siteid} db error`);
|
||||
console.error(err.message || 'Some error occurred while querying the database.');
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
addToMongo() {
|
||||
console.log('>> no ADD TO MONGO!');
|
||||
return;
|
||||
|
||||
for(const item of this.items) {
|
||||
// console.log('add', item);
|
||||
const newObj = this.reduceData(item);
|
||||
const newJob = new JobsModel(newObj);
|
||||
|
||||
newJob.save().then((m) => {
|
||||
console.log('m', m.details.title);
|
||||
}).catch((err) => {
|
||||
console.error('m', err);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param inval
|
||||
* @returns {number}
|
||||
*/
|
||||
analyseRate(inval) {
|
||||
console.log('analyseRate', inval);
|
||||
let outVal = 0;
|
||||
const cleanerReg = /ir35|[+$#,=&:;()\\/\-£a-z]|\.\d{1,2}/gi;
|
||||
const clearSpace = /\s+/g;
|
||||
|
||||
const result = inval.replace(cleanerReg, '').replace(clearSpace, ' ');
|
||||
const resultArray = result.trim().split((' '));
|
||||
|
||||
if (resultArray.length > 0) {
|
||||
const item = parseInt(resultArray[0], 10);
|
||||
|
||||
if (item < 100) outVal = 0;
|
||||
else if ((item > 100) && (item < 5000)) outVal = 1;
|
||||
else if (item >= 5000) outVal = 2;
|
||||
}
|
||||
else return 0;
|
||||
|
||||
return outVal;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param d
|
||||
* @returns {{data: {read: number, autoclass: number, applied: number, jobtype: number, class: number}, details: {}}}
|
||||
*/
|
||||
reduceData(d) {
|
||||
const clearPremium = /(\n+)(Featured|Premium)/gi;
|
||||
const otherStupid = /((↵\s+)+)(Featured|Premium)/gi;
|
||||
|
||||
const outObj = { 'details':{}, 'data':{ 'read':0, 'applied':0, 'jobtype': 0, 'class':0, 'autoclass':0 } };
|
||||
|
||||
outObj.details = Utils.extractFromObj(d, ['title', 'site', 'url', 'id', 'summary', 'company', 'location', 'postdate', 'salary', 'easyapply', 'timestamp']);
|
||||
|
||||
outObj.details.title = outObj.details.title.replace(clearPremium, '');
|
||||
outObj.details.title = outObj.details.title.replace(otherStupid, '');
|
||||
outObj.details.hashed = SHA(outObj.details.summary);
|
||||
|
||||
outObj.data.read = 0;
|
||||
outObj.data.applied = d.applied || 0;
|
||||
|
||||
outObj.data.jobtype = this.analyseRate(d.salary);
|
||||
outObj.data.autoclass = Corpus.process(d.summary);
|
||||
|
||||
outObj.data.timestamp = d.timestamp * 1000;
|
||||
|
||||
return outObj;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async filterAdverts() {
|
||||
console.log('>> FilterAdverts');
|
||||
console.log(`Currently ${this.items.length} items...`);
|
||||
|
||||
this.items = this.items.filter(filterReject);
|
||||
|
||||
console.log(`After reject ${this.items.length} items...`);
|
||||
|
||||
this.items = this.items.filter(filterAccept);
|
||||
|
||||
console.log(`After accept ${this.items.length} items...`);
|
||||
|
||||
// console.log(this.items);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param newUrl
|
||||
*/
|
||||
setStartUrl(newUrl) {
|
||||
this.url = newUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param page
|
||||
*/
|
||||
loadPage(page) {
|
||||
this.currentPage = page;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param appended
|
||||
* @returns {string}
|
||||
*/
|
||||
makeUrl(appended) {
|
||||
return `https://${ this.siteurl }${appended}`;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param appended
|
||||
* @returns {string}
|
||||
*/
|
||||
makeProxyUrl(appended) {
|
||||
return `https://${ this.siteurl }${appended}`;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param url
|
||||
* @param q
|
||||
* @returns {string}
|
||||
*/
|
||||
makeImg(url, q = 75) {
|
||||
return `https://image.silvrtree.co.uk/q${q}/${url}`;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async go() {
|
||||
this.items = [];
|
||||
this.rawItems = [];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module.exports = MasterBase;
|
16
v2/lib/filter_md_jobs.js
Normal file
16
v2/lib/filter_md_jobs.js
Normal file
@ -0,0 +1,16 @@
|
||||
module.exports = function (item) {
|
||||
const patt = /(full\s?stack|front\s?end|html|html5|es6|react|knockout|ember|vue|riotjs|css|javascript|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W/ig;
|
||||
const result = patt.test(item.title);
|
||||
const resultB = patt.test(item.summary);
|
||||
|
||||
console.log('My Filter:', (result || resultB === true) ? 'Pass' : 'Reject');
|
||||
|
||||
/* if (!(result || resultB === true)) {
|
||||
console.log('Result', result);
|
||||
console.log('ResultB', resultB);
|
||||
console.log(item);
|
||||
}*/
|
||||
|
||||
return (result || resultB === true) ;
|
||||
|
||||
};
|
17
v2/lib/filter_reject.js
Normal file
17
v2/lib/filter_reject.js
Normal file
@ -0,0 +1,17 @@
|
||||
module.exports = function (item) {
|
||||
const patt = /(Simply Education|Splunk|Coordinators?|Teachers?|Technical Writers?|Data Analyst|WebLogic|WebSphere|Data Scientist|Change Managers?|T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)/ig;
|
||||
|
||||
const engineers = /(Support|Devops|Planning|security|Postgresql|network|sccm|test|data|imac|firewall|vmware)+(?:\s)(?=Engineer)/ig;
|
||||
const developers = /(Big Data|Java Server Side|Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|c#|c\+\+|\.net|bi|go lang|Python)+(?:\s)(?=Developer)/ig;
|
||||
const architects = /(Java|PHP|Microsoft)+(?:\s)(?=Architect)/ig;
|
||||
|
||||
const antiAd = /sja\d+/gi;
|
||||
|
||||
const result = patt.test(item.summary) || engineers.test(item.summary) || developers.test(item.summary) || architects.test(item.summary);
|
||||
const resultB = patt.test(item.title) || engineers.test(item.title) || developers.test(item.title) || architects.test(item.title);
|
||||
const resultC = antiAd.test(item.id);
|
||||
|
||||
console.log('Reject:', (result || resultB || resultC === true) ? 'Reject' : 'Pass');
|
||||
|
||||
return (!(result || resultB || resultC === true));
|
||||
};
|
Loading…
Reference in New Issue
Block a user