/** * Created by WebStorm. * User: martin * Date: 15/04/2020 * Time: 11:55 */ const cheerio = require('cheerio'); const MasterScraper = require('../lib/scraper'); class TotaljobsScraper extends MasterScraper { constructor() { super(); this.siteurl = 'www.totaljobs.com'; this.siteid = 'totaljobs'; this.requestOptions = { 'url' : '' }; } // Site specific parts below here /** * * @returns {Promise} */ async breakPage() { const $ = this.currentPage; const ads = []; const sections = $('div.job'); await sections.each(async (index, item) => { // console.log($(item).html()); const ad = await this.extractDetails(item); ads.push(ad); // console.log('<<<<<<<<<>>>>>>>>>'); }); this.items = [...this.items, ...ads]; } /** * * @param part * @returns {Promise<{}>} */ async extractDetails(part) { const newObj = {}; const $part = cheerio.load(part); const now = ~~(new Date().getTime() / 1000.0); // console.log($part.html()); newObj.title = $part('.job-title').text().replace(/(\s*\\n)/g,'').replace(/(\s\s+)/g, ' ').trim().toString(); newObj.url = $part('.job-title a').attr('href'); newObj.id = $part('div.job').attr('id').trim(); newObj.summary = $part('p.job-intro').text().trim(); newObj.company = $part('.company').text().trim() || null; newObj.location = $part('.location > span').text().trim(); newObj.postDate = $part('.date-posted').text().trim(); newObj.salary = $part('.salary').text().trim(); newObj.isEasyApply = false; newObj.site = this.siteid; newObj.timestamp = now; return newObj; } /** * * @param item * @returns {Promise<*>} */ async getIndividualPage(item) { const newItem = {...item}; console.log('Getting', item.url); await this.getContent(item.url) .then((html) => { console.log(html); const $ = cheerio.load(html); newItem.summary = $('div.job-description').text().trim(); }) .catch((err) => console.error(err)); return newItem; } /** * * @returns {Promise} */ async getJobPages() { const newItems = []; for (let item of this.items) { console.log(item.title); item = await this.getIndividualPage(item); newItems.push(item); } this.items = [...newItems]; } /** * * @returns {Promise} */ async checkNext() { const $ = this.currentPage; const next = $('.pagination > *:last-child').attr('href') || ''; if (next !== '') // next = `https://${ this.siteurl }${next}`; this.makeUrl(next); console.log(next); } /** * * @returns {Promise} */ async processSite() { console.log('Processing...'); let nextPage; const previousPage = ''; // do { // previousPage = this.url; this.items = []; await this.getPage(); await this.breakPage(); await this.checkNext(); // await this.getJobPages(); // nextPage = await this.checkNext(); // if (nextPage === previousPage) nextPage = ''; // this.setStartUrl(nextPage); // }while (nextPage !== ''); await this.filterAdverts(); await this.addToDB(); await this.addToMongo(); } /** * * @param location * @returns {Promise} */ async go(location = 'london') { this.setStartUrl(`https://www.totaljobs.com/jobs/contract/html-or-vue-or-vuejs-or-web-or-sql-or-delphi-or-vb-or-vbscript-or-php-or-ajax-or-mysql-or-sqlserver-or-javascript-or-node-or-nodejs-or-svelte-or-sveltejs-not-react/in-${encodeURIComponent(location)}?q=Html+Or+Vue+Or+Vuejs+Or+Web+Or+Sql+Or+Delphi+Or+Vb+Or+Vbscript+Or+Php+Or+Ajax+Or+Mysql+Or+Sqlserver+Or+Javascript+Or+Node+Or+Nodejs+Or+Svelte+Or+Sveltejs+NOT+React&postedwithin=3&radius=20`); // this.setStartUrl('https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=javascript+nodejs&as_not=&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=25&l=london&fromage=7&limit=10&sort=date&psf=advsrch&from=advancedsearch'); // Glasgow // https://www.indeed.co.uk/jobs?as_and=&as_phr=&as_any=Html+Web+Sql+Delphi+Vb+Vbscript+Php+Ajax+Mysql+Sqlserver+Javascript+Nodejs+vuejs+sveltejs&as_not=React&as_ttl=&as_cmp=&jt=contract&st=&as_src=&salary=&radius=0&l=glasgow&fromage=1&limit=50&sort=&psf=advsrch&from=advancedsearch await this.processSite(); console.log(`TotalJobs ${location} completed`); } } module.exports = TotaljobsScraper;