From d2f6b3e29c0e71f27d2257bd98d1663c094ffff2 Mon Sep 17 00:00:00 2001 From: Martin Donnelly Date: Mon, 25 May 2020 00:37:11 +0100 Subject: [PATCH] Jobserve and s1Jobs rss scraper added --- scrapers/cwjobs.js | 34 +++++++++++++++++ scrapers/rss.technojobs.js | 78 ++++++++++++++++++++++++++++++++++++++ scrapers/totaljobs.js | 2 +- 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 scrapers/cwjobs.js create mode 100644 scrapers/rss.technojobs.js diff --git a/scrapers/cwjobs.js b/scrapers/cwjobs.js new file mode 100644 index 0000000..41f0018 --- /dev/null +++ b/scrapers/cwjobs.js @@ -0,0 +1,34 @@ +/** + * Created by WebStorm. + * User: martin + * Date: 24/05/2020 + * Time: 23:43 + + */ + +const TotaljobsScraper = require('./totaljobs'); + +class CwjobsScraper extends TotaljobsScraper { + + constructor() { + super(); + this.siteurl = 'www.cwjobs.co.uk'; + this.siteid = 'cwjobs'; + this.requestOptions = { + 'url' : '' + }; + } + + async go(location = 'london') { + this.setStartUrl(`https://www.cwjobs.co.uk/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`); + // this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch'); + + // Glasgow + // https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=glasgow&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch + + await this.processSite(); + } + +} + +module.exports = CwjobsScraper; diff --git a/scrapers/rss.technojobs.js b/scrapers/rss.technojobs.js new file mode 100644 index 0000000..f130a28 --- /dev/null +++ b/scrapers/rss.technojobs.js @@ -0,0 +1,78 @@ +/** + * Created by WebStorm. + * User: martin + * Date: 24/05/2020 + * Time: 23:04 + + */ + +const MasterRSS = require('../lib/rss'); + +class RssTechnojobs extends MasterRSS { + constructor() { + super(); + + this.siteurl = 'www.technojobs.co.uk'; + this.siteid = 'technojobs'; + this.useStone = true; + this.requestOptions = { + 'url' : '' + }; + + this.guidRegex = /\/(\d+)/; + this.imgRegex = /src="(.+?)"/g; + this.locationRegEx = /Location:(.*?)? { + return `src="${this.makeImg(part)}"`; + }); + + if (location !== null) { + const _location = location[1].trim(); + if (_location.length <= 30) + newObj.location = _location; + } + + if (rate !== null) + newObj.salary = rate[1].trim().slice(0, 55); + + if (id !== null) + newObj.id = id[1].trim(); + + newObj.title = item.title; + newObj.postDate = item.isoDate; + newObj.url = item.link; + newObj.summary = _content; + + /* + console.log(newObj); + console.log('-- reduce'); +*/ + + return newObj; + } + + async go(url) { + await super.go(); + this.setStartUrl(url); + + await this.processFeed(); + } +} + +module.exports = RssTechnojobs; diff --git a/scrapers/totaljobs.js b/scrapers/totaljobs.js index eda621b..2d45ba9 100644 --- a/scrapers/totaljobs.js +++ b/scrapers/totaljobs.js @@ -45,7 +45,7 @@ class TotaljobsScraper extends MasterScraper { const now = ~~(new Date().getTime() / 1000.0); // console.log($part.html()); - newObj.title = $part('.job-title').text().trim(); + newObj.title = $part('.job-title').text().replace(/(\s*\\n)/g,'').replace(/(\s\s+)/g, ' ').trim().toString(); newObj.url = $part('.job-title a').attr('href'); newObj.id = $part('div.job').attr('id').trim(); newObj.summary = $part('p.job-intro').text().trim();