jobscraper/scrapers/rss.jobserve.js
2020-06-01 08:59:29 +01:00

81 lines
1.8 KiB
JavaScript

/**
* Created by WebStorm.
* User: martin
* Date: 22/05/2020
* Time: 12:01
*/
const MasterRSS = require('../lib/rss');
class RssJobserve extends MasterRSS {
constructor() {
super();
this.siteurl = 'www.jobserve.com';
this.siteid = 'jobserve';
this.useStone = false;
this.requestOptions = {
'url' : ''
};
this.guidRegex = /\/en\/(.*?)\./;
this.imgRegex = /src="(.+?)"/g;
this.locationRegEx = /Location:(.*?) /;
this.companyRegEx = /Advertiser:(?: )?(.*?)(?:Reference|Start|Email)/;
this.rateRegEx = /Rate:(?: )?(.*?)(?: |Reference|Start|Email|Type)/;
}
reduceItem(item) {
const newObj = this.newRecord();
/*
console.log('++ reduce');
console.log(item);
*/
const location = this.locationRegEx.exec(item.contentSnippet);
const company = this.companyRegEx.exec(item.contentSnippet);
const rate = this.rateRegEx.exec(item.contentSnippet);
const id = this.guidRegex.exec(item.link);
const _content = item.content.replace(this.imgRegex, (full, part) => {
return `src="${this.makeImg(part)}"`;
});
if (location !== null) {
const _location = location[1].trim();
if (_location.length <= 30)
newObj.location = _location;
}
if (company !== null)
newObj.company = company[1].trim();
if (rate !== null)
newObj.salary = rate[1].trim().slice(0, 55);
if (id !== null)
newObj.id = id[1].trim();
newObj.title = item.title;
newObj.postDate = item.isoDate;
newObj.url = item.link;
newObj.summary = _content;
/* console.log(newObj);
console.log('-- reduce');*/
return newObj;
}
async go(url) {
super.go();
this.setStartUrl(url);
await this.processFeed();
}
}
module.exports = RssJobserve;