Jobserve and s1Jobs rss scraper added

This commit is contained in:
Martin Donnelly 2020-05-25 00:37:11 +01:00
parent fdb291838b
commit d2f6b3e29c
3 changed files with 113 additions and 1 deletions

34
scrapers/cwjobs.js Normal file
View File

@ -0,0 +1,34 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/05/2020
* Time: 23:43
*/
const TotaljobsScraper = require('./totaljobs');
class CwjobsScraper extends TotaljobsScraper {
constructor() {
super();
this.siteurl = 'www.cwjobs.co.uk';
this.siteid = 'cwjobs';
this.requestOptions = {
'url' : ''
};
}
async go(location = 'london') {
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
// Glasgow
// https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=glasgow&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch
await this.processSite();
}
}
module.exports = CwjobsScraper;

View File

@ -0,0 +1,78 @@
/**
* Created by WebStorm.
* User: martin
* Date: 24/05/2020
* Time: 23:04
*/
const MasterRSS = require('../lib/rss');
class RssTechnojobs extends MasterRSS {
constructor() {
super();
this.siteurl = 'www.technojobs.co.uk';
this.siteid = 'technojobs';
this.useStone = true;
this.requestOptions = {
'url' : ''
};
this.guidRegex = /\/(\d+)/;
this.imgRegex = /src="(.+?)"/g;
this.locationRegEx = /Location:(.*?)?</;
this.companyRegEx = /Advertiser:(?:&nbsp;)?(.*?)(?:Reference|Start|Email)/;
this.rateRegEx = /Salary\/Rate:(.*?)?</;
}
reduceItem(item) {
const newObj = this.newRecord();
/*
console.log('++ reduce');
console.log(item);
*/
const location = this.locationRegEx.exec(item.content);
const rate = this.rateRegEx.exec(item.content);
const id = this.guidRegex.exec(item.link);
const _content = item.content.replace(this.imgRegex, (full, part) => {
return `src="${this.makeImg(part)}"`;
});
if (location !== null) {
const _location = location[1].trim();
if (_location.length <= 30)
newObj.location = _location;
}
if (rate !== null)
newObj.salary = rate[1].trim().slice(0, 55);
if (id !== null)
newObj.id = id[1].trim();
newObj.title = item.title;
newObj.postDate = item.isoDate;
newObj.url = item.link;
newObj.summary = _content;
/*
console.log(newObj);
console.log('-- reduce');
*/
return newObj;
}
async go(url) {
await super.go();
this.setStartUrl(url);
await this.processFeed();
}
}
module.exports = RssTechnojobs;

View File

@ -45,7 +45,7 @@ class TotaljobsScraper extends MasterScraper {
const now = ~~(new Date().getTime() / 1000.0);
// console.log($part.html());
newObj.title = $part('.job-title').text().trim();
newObj.title = $part('.job-title').text().replace(/(\s*\\n)/g,'').replace(/(\s\s+)/g, ' ').trim().toString();
newObj.url = $part('.job-title a').attr('href');
newObj.id = $part('div.job').attr('id').trim();
newObj.summary = $part('p.job-intro').text().trim();