Jobserve and s1Jobs rss scraper added
This commit is contained in:
parent
fdb291838b
commit
d2f6b3e29c
34
scrapers/cwjobs.js
Normal file
34
scrapers/cwjobs.js
Normal file
@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 24/05/2020
|
||||
* Time: 23:43
|
||||
|
||||
*/
|
||||
|
||||
const TotaljobsScraper = require('./totaljobs');
|
||||
|
||||
class CwjobsScraper extends TotaljobsScraper {
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.siteurl = 'www.cwjobs.co.uk';
|
||||
this.siteid = 'cwjobs';
|
||||
this.requestOptions = {
|
||||
'url' : ''
|
||||
};
|
||||
}
|
||||
|
||||
async go(location = 'london') {
|
||||
this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
||||
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
||||
|
||||
// Glasgow
|
||||
// https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=glasgow&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch
|
||||
|
||||
await this.processSite();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module.exports = CwjobsScraper;
|
78
scrapers/rss.technojobs.js
Normal file
78
scrapers/rss.technojobs.js
Normal file
@ -0,0 +1,78 @@
|
||||
/**
|
||||
* Created by WebStorm.
|
||||
* User: martin
|
||||
* Date: 24/05/2020
|
||||
* Time: 23:04
|
||||
|
||||
*/
|
||||
|
||||
const MasterRSS = require('../lib/rss');
|
||||
|
||||
class RssTechnojobs extends MasterRSS {
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.siteurl = 'www.technojobs.co.uk';
|
||||
this.siteid = 'technojobs';
|
||||
this.useStone = true;
|
||||
this.requestOptions = {
|
||||
'url' : ''
|
||||
};
|
||||
|
||||
this.guidRegex = /\/(\d+)/;
|
||||
this.imgRegex = /src="(.+?)"/g;
|
||||
this.locationRegEx = /Location:(.*?)?</;
|
||||
this.companyRegEx = /Advertiser:(?: )?(.*?)(?:Reference|Start|Email)/;
|
||||
this.rateRegEx = /Salary\/Rate:(.*?)?</;
|
||||
}
|
||||
|
||||
reduceItem(item) {
|
||||
const newObj = this.newRecord();
|
||||
|
||||
/*
|
||||
console.log('++ reduce');
|
||||
console.log(item);
|
||||
*/
|
||||
|
||||
const location = this.locationRegEx.exec(item.content);
|
||||
const rate = this.rateRegEx.exec(item.content);
|
||||
const id = this.guidRegex.exec(item.link);
|
||||
|
||||
const _content = item.content.replace(this.imgRegex, (full, part) => {
|
||||
return `src="${this.makeImg(part)}"`;
|
||||
});
|
||||
|
||||
if (location !== null) {
|
||||
const _location = location[1].trim();
|
||||
if (_location.length <= 30)
|
||||
newObj.location = _location;
|
||||
}
|
||||
|
||||
if (rate !== null)
|
||||
newObj.salary = rate[1].trim().slice(0, 55);
|
||||
|
||||
if (id !== null)
|
||||
newObj.id = id[1].trim();
|
||||
|
||||
newObj.title = item.title;
|
||||
newObj.postDate = item.isoDate;
|
||||
newObj.url = item.link;
|
||||
newObj.summary = _content;
|
||||
|
||||
/*
|
||||
console.log(newObj);
|
||||
console.log('-- reduce');
|
||||
*/
|
||||
|
||||
return newObj;
|
||||
}
|
||||
|
||||
async go(url) {
|
||||
await super.go();
|
||||
this.setStartUrl(url);
|
||||
|
||||
await this.processFeed();
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = RssTechnojobs;
|
@ -45,7 +45,7 @@ class TotaljobsScraper extends MasterScraper {
|
||||
const now = ~~(new Date().getTime() / 1000.0);
|
||||
|
||||
// console.log($part.html());
|
||||
newObj.title = $part('.job-title').text().trim();
|
||||
newObj.title = $part('.job-title').text().replace(/(\s*\\n)/g,'').replace(/(\s\s+)/g, ' ').trim().toString();
|
||||
newObj.url = $part('.job-title a').attr('href');
|
||||
newObj.id = $part('div.job').attr('id').trim();
|
||||
newObj.summary = $part('p.job-intro').text().trim();
|
||||
|
Loading…
Reference in New Issue
Block a user