2020-05-19 09:05:04 +00:00
|
|
|
/**
|
|
|
|
* Created by WebStorm.
|
|
|
|
* User: martin
|
|
|
|
* Date: 15/04/2020
|
|
|
|
* Time: 11:56
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
const tape = require('tape');
|
|
|
|
const _test = require('tape-promise').default; // <---- notice 'default'
|
|
|
|
const test = _test(tape); // decorate tape
|
|
|
|
|
|
|
|
const fs = require('fs');
|
|
|
|
const cheerio = require('cheerio');
|
|
|
|
|
|
|
|
const IndeedScraper = require('../scrapers/indeed');
|
|
|
|
|
|
|
|
const indeedScraper = new IndeedScraper();
|
|
|
|
|
|
|
|
// const page = fs.readFileSync('data/indeed/indeed-2020-04-16--092311.html');
|
|
|
|
const page = fs.readFileSync('data/indeed/page2.html');
|
|
|
|
|
|
|
|
test.test('Test Indeed scraper', async t => {
|
2020-06-01 08:25:13 +00:00
|
|
|
const $ = cheerio.load(page);
|
2020-05-19 09:05:04 +00:00
|
|
|
|
|
|
|
indeedScraper.loadPage($);
|
|
|
|
|
|
|
|
await indeedScraper.breakPage();
|
|
|
|
|
|
|
|
// await indeedScraper.getJobPages();
|
|
|
|
|
|
|
|
// console.log(await indeedScraper.checkNext());
|
|
|
|
|
|
|
|
// console.log(indeedScraper.items);
|
|
|
|
|
2020-07-21 11:05:01 +00:00
|
|
|
await indeedScraper.filterAdverts();
|
2020-05-19 09:05:04 +00:00
|
|
|
|
2020-06-01 08:25:13 +00:00
|
|
|
// await indeedScraper.addToDB();
|
2020-05-19 09:05:04 +00:00
|
|
|
|
|
|
|
t.end();
|
|
|
|
});
|
2020-07-21 11:05:01 +00:00
|
|
|
|
|
|
|
test.test('Test full run Indeed scraper', async t => {
|
|
|
|
await indeedScraper.go('london');
|
|
|
|
|
|
|
|
t.end();
|
|
|
|
});
|