const Scraper = require('../lib/scraper'); const cheerio = require('cheerio'); const path = require('path'); const logger = require('log4js').getLogger('RC'); const LocalStorage = require('node-localstorage').LocalStorage; const fs = require('fs'); const HtmlDiffer = require('html-differ').HtmlDiffer; const diffLogger = require('html-differ/lib/logger'); const Diff = require('text-diff'); logger.level = process.env.LOGGER_LEVEL || 'debug'; class OutletScrape extends Scraper { constructor() { super(); this.setID('OUTLET'); this.run = this._debounce(async () => { await this.__run(); }, 5000); } async process() { const options = { ignoreAttributes: ['value', 'id', 'd'], compareAttributesAsJSON: [], ignoreWhitespaces: true, ignoreComments: true, ignoreEndTags: false, ignoreDuplicateAttributes: false }; const oldFile = `${this.path}/previous.html`; // var basefile = fs.readFileSync('1.html', 'utf-8') // const body = await this.page.content(); const innerText = await this.page.evaluate(() => { return { 'body': document.body.innerText }; }); // logger.debug(innerText.body); if (!fs.existsSync(oldFile)) { fs.writeFileSync(oldFile, body.body, 'utf-8'); } else { const previousFile = fs.readFileSync(oldFile, 'utf-8'); var diff = new Diff(); // options may be passed to constructor; see below var textDiff = diff.main(previousFile, innerText.body); // produces diff array const levenshtein = diff.levenshtein(textDiff); logger.debug('levenshtein:', levenshtein); if (levenshtein !== 0) { logger.debug(diff.prettyHtml(textDiff)); fs.writeFileSync(oldFile, innerText.body, 'utf-8'); } } } async start() { await super._start(); try{ this.startPage = 'https://www.harmankardon.co.uk/outlet/'; // this.startPage = 'https://silvrtree.co.uk/slack'; const mouseDownDuration = OutletScrape.notARobot(); this.setPath(path.resolve(`${__dirname }/../artefacts/outlet`)); await this._initBrowser(true); await this._createBrowserPage(); // await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true }); await this.page.setViewport({ 'width': 1200, 'height': 800 }); await this._goto(this.startPage); await this._randomWait(this.page, 3, 5); // await this.page.waitForSelector('#SI_ID_Head_FromPrice'); logger.debug('loaded..'); // await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/ } catch(e) { throw new Error(e); } } /** * Grab the Pdf's and screenshots * @returns {Promise} */ async __run() { try { logger.debug('run'); await this.start(); await this.process(); logger.debug('Done...'); // await this._randomWait(this.page, 5, 10); // await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null); // const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions']; /*for (const section of sections) await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section); this.emit('done');*/ } catch(e) { throw new Error(e); } } } module.exports = OutletScrape;