/** * Created by WebStorm. * User: martin * Date: 15/04/2020 * Time: 11:55 */ const fs = require('fs'); const MasterBase = require('./base'); const cheerio = require('cheerio'); const got = require('got'); const fecha = require('fecha'); class MasterScraper extends MasterBase { constructor() { super(); } getContent(url, useStone = false) { /* let headers = new Headers({ "Accept" : "application/json", "Content-Type" : "application/json", "User-Agent" : "MY-UA-STRING" }); fetch(url, { method : 'GET', headers : headers // ... etc }).then( ... */ // return new pending promise return new Promise((resolve, reject) => { // select http or https module, depending on reqested url // const lib = url.startsWith('https') ? require('https') : require('http'); const options = Object.assign({}, this.requestOptions); if (useStone) options.url = `http://45.33.114.116:8080/${encodeURIComponent(url)}`; else options.url = url; console.log(options); got(options.url).then((response) => { resolve(response.body); }) .catch((e) => { reject(e.response.body); }); }); }; async getPage() { console.log('>> getPage: fetching', this.url); const now = fecha.format(new Date(), 'YYYY-MM-DD--hhmmss'); const filename = `${this.siteid}-${now}.html`; await this.getContent(this.url, this.useStone) .then((html) => { fs.writeFileSync(filename, html); const $ = cheerio.load(html); this.loadPage($); }) .catch((err) => console.error(err)); } // Site specific parts below here async breakPage() { } async extractDetails(part) { } async checkNext() { } async processSite() { } async getIndividualPage() { } async getJobPages() { } async go() { } } module.exports = MasterScraper;