171 lines
4.4 KiB
JavaScript
171 lines
4.4 KiB
JavaScript
/**
|
|
* Created by WebStorm.
|
|
* User: martin
|
|
* Date: 15/04/2020
|
|
* Time: 11:55
|
|
|
|
*/
|
|
|
|
const cheerio = require('cheerio');
|
|
|
|
const MasterScraper = require('../lib/scraper');
|
|
|
|
class TotaljobsScraper extends MasterScraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.siteurl = 'www.totaljobs.com';
|
|
this.siteid = 'totaljobs';
|
|
this.requestOptions = {
|
|
'url' : ''
|
|
};
|
|
}
|
|
|
|
// Site specific parts below here
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async breakPage() {
|
|
const $ = this.currentPage;
|
|
const ads = [];
|
|
|
|
const sections = $('div.job');
|
|
|
|
await sections.each(async (index, item) => {
|
|
// console.log($(item).html());
|
|
const ad = await this.extractDetails(item);
|
|
ads.push(ad);
|
|
// console.log('<<<<<<<<<>>>>>>>>>');
|
|
});
|
|
|
|
this.items = [...this.items, ...ads];
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param part
|
|
* @returns {Promise<{}>}
|
|
*/
|
|
async extractDetails(part) {
|
|
const newObj = {};
|
|
const $part = cheerio.load(part);
|
|
const now = ~~(new Date().getTime() / 1000.0);
|
|
|
|
// console.log($part.html());
|
|
newObj.title = $part('.job-title').text().replace(/(\s*\\n)/g,'').replace(/(\s\s+)/g, ' ').trim().toString();
|
|
newObj.url = $part('.job-title a').attr('href');
|
|
newObj.id = $part('div.job').attr('id').trim();
|
|
newObj.summary = $part('p.job-intro').text().trim();
|
|
newObj.company = $part('.company').text().trim() || null;
|
|
newObj.location = $part('.location > span').text().trim();
|
|
newObj.postDate = $part('.date-posted').text().trim();
|
|
newObj.salary = $part('.salary').text().trim();
|
|
newObj.isEasyApply = false;
|
|
|
|
newObj.site = this.siteid;
|
|
newObj.timestamp = now;
|
|
|
|
return newObj;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param item
|
|
* @returns {Promise<*>}
|
|
*/
|
|
async getIndividualPage(item) {
|
|
const newItem = {...item};
|
|
console.log('Getting', item.url);
|
|
await this.getContent(item.url)
|
|
.then((html) => {
|
|
console.log(html);
|
|
const $ = cheerio.load(html);
|
|
newItem.summary = $('div.job-description').text().trim();
|
|
|
|
})
|
|
.catch((err) => console.error(err));
|
|
return newItem;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async getJobPages() {
|
|
const newItems = [];
|
|
for (let item of this.items) {
|
|
console.log(item.title);
|
|
item = await this.getIndividualPage(item);
|
|
newItems.push(item);
|
|
}
|
|
|
|
this.items = [...newItems];
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async checkNext() {
|
|
const $ = this.currentPage;
|
|
const next = $('.pagination > *:last-child').attr('href') || '';
|
|
if (next !== '')
|
|
// next = `https://${ this.siteurl }${next}`;
|
|
this.makeUrl(next);
|
|
|
|
console.log(next);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processSite() {
|
|
console.log('Processing...');
|
|
|
|
let nextPage;
|
|
const previousPage = '';
|
|
// do {
|
|
// previousPage = this.url;
|
|
this.items = [];
|
|
await this.getPage();
|
|
|
|
await this.breakPage();
|
|
|
|
await this.checkNext();
|
|
|
|
// await this.getJobPages();
|
|
// nextPage = await this.checkNext();
|
|
|
|
// if (nextPage === previousPage) nextPage = '';
|
|
|
|
// this.setStartUrl(nextPage);
|
|
// }while (nextPage !== '');
|
|
|
|
await this.filterAdverts();
|
|
|
|
await this.addToDB();
|
|
await this.addToMongo();
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param location
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async go(location = 'london') {
|
|
this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`);
|
|
// this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch');
|
|
|
|
// Glasgow
|
|
// https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=glasgow&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch
|
|
|
|
await this.processSite();
|
|
console.log(`TotalJobs ${location} completed`);
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = TotaljobsScraper;
|