jobscraper/scrapers/rss.technojobs.js

79 lines
1.6 KiB
JavaScript
Raw Permalink Normal View History

2020-05-24 23:37:11 +00:00
/**
* Created by WebStorm.
* User: martin
* Date: 24/05/2020
* Time: 23:04
*/
const MasterRSS = require('../lib/rss');
class RssTechnojobs extends MasterRSS {
constructor() {
super();
this.siteurl = 'www.technojobs.co.uk';
this.siteid = 'technojobs';
this.useStone = false;
2020-05-24 23:37:11 +00:00
this.requestOptions = {
'url' : ''
};
this.guidRegex = /\/(\d+)/;
this.imgRegex = /src="(.+?)"/g;
this.locationRegEx = /Location:(.*?)?</;
this.companyRegEx = /Advertiser:(?:&nbsp;)?(.*?)(?:Reference|Start|Email)/;
this.rateRegEx = /Salary\/Rate:(.*?)?</;
}
reduceItem(item) {
const newObj = this.newRecord();
/*
console.log('++ reduce');
console.log(item);
*/
const location = this.locationRegEx.exec(item.content);
const rate = this.rateRegEx.exec(item.content);
const id = this.guidRegex.exec(item.link);
const _content = item.content.replace(this.imgRegex, (full, part) => {
return `src="${this.makeImg(part)}"`;
});
if (location !== null) {
const _location = location[1].trim();
if (_location.length <= 30)
newObj.location = _location;
}
if (rate !== null)
newObj.salary = rate[1].trim().slice(0, 55);
if (id !== null)
newObj.id = id[1].trim();
newObj.title = item.title;
newObj.postDate = item.isoDate;
newObj.url = item.link;
newObj.summary = _content;
/*
console.log(newObj);
console.log('-- reduce');
*/
return newObj;
}
async go(url) {
await super.go();
this.setStartUrl(url);
await this.processFeed();
}
}
module.exports = RssTechnojobs;