const Scraper = require('../lib/scraper'); const path = require('path'); const logger = require('log4js').getLogger('RC'); const fs = require('fs'); const dateFormat = require('dateformat'); const _ = require('lodash'); const jsonfile = require('jsonfile'); const Diff = require('text-diff'); const time = require("time-since"); const pug = require('pug'); const PNG = require('pngjs').PNG; const pixelmatch = require('pixelmatch'); const email = require('smtp-email-sender')({ 'host': 'mail.caliban.io', 'port': '465', 'auth': { 'user': 'aida@caliban.io', 'pass': 'WaF#E+5am7.)\\csD', 'type': 'LOGIN' // PLAIN, LOGIN, MD5 etc... }, 'secure': 'secure' }); logger.level = process.env.LOGGER_LEVEL || 'debug'; class ChangeDetection extends Scraper { constructor() { super(); this.setID('CD'); this.run = this._debounce(async () => { await this.__run(); }, 5000); } pugTest(data, newpath) { logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data)); } async sendSMTP(data, newPath) { const now = new Date(); const attachments = [ { path:data.diffPNG } ]; const html = pug.renderFile(`${newPath}/` + 'pug/email.pug', data); email({ 'from': 'Aida ', 'to': 'Martin ', 'subject': `ChangeDetection: ${data.name}`, 'html': html, attachments: attachments }); } async generateDiffScreenshot(previous, today) { let {dir, root, ext, name} = path.parse(today); const img1 = PNG.sync.read(fs.readFileSync(previous)); const img2 = PNG.sync.read(fs.readFileSync(today)); const {width, height} = img1; const diff = new PNG({width, height}); pixelmatch(img1.data, img2.data, diff.data, width, height, {threshold: 0.1}); name = name.concat('_diff'); const endFilename = path.format({dir, root, ext, name}); logger.debug('diffFilename', endFilename); fs.writeFileSync(endFilename, PNG.sync.write(diff)); return endFilename; } async processItem(item) { logger.debug(`Processing ${item.name}...`); const now = new Date(); const filename = _.kebabCase(item.name); const oldFile = `${this.path}/${filename}.html`; const stats = this.stats.get(filename) || { 'lastSaved': now, 'lastChanged':null }; await this._goto(item.url); await this._randomWait(this.page, 3, 5); const innerText = await this.page.evaluate(() => { return { 'body': document.body.innerText }; }); const timestamp = dateFormat(now, 'yyyymmddHHMM'); const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`; if (!fs.existsSync(oldFile)) { fs.writeFileSync(oldFile, innerText.body, 'utf-8'); stats.screenshot = screenshotPath; await this._makeScreenshotV2(this.page, screenshotPath, null); this.stats.set(filename, stats); } else { const previousFile = fs.readFileSync(oldFile, 'utf-8'); const diff = new Diff(); // options may be passed to constructor; see below const textDiff = diff.main(previousFile, innerText.body); // produces diff array const cleanedDiff = diff.cleanupSemantic(textDiff); const levenshtein = diff.levenshtein(textDiff); logger.debug('levenshtein:', levenshtein); logger.debug('cleanedDiff:',cleanedDiff ); if (levenshtein !== 0) { logger.info('Changed...'); stats.previousScreenshot = stats.screenshot; stats.previousChange = stats.lastSaved; stats.lastSaved = now; stats.lastChanged = now; stats.screenshot = screenshotPath; stats.changed = diff.prettyHtml(textDiff); stats.levenshtein = levenshtein; stats.since = time.since(new Date(stats.previousChange)).days(); await this._makeScreenshotV2(this.page, screenshotPath, null); stats.diffPNG = await this.generateDiffScreenshot(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png')); await this._randomWait(this.page, 3, 5); fs.writeFileSync(oldFile, innerText.body, 'utf-8'); this.stats.set(filename, stats); const pugData = {...stats, ...item}; // console.log(pugData); await this.sendSMTP(pugData, './'); } else { logger.debug('No change...'); } } } async processItems() { for (const item of this.settings) await this.processItem(item); } async processOld() { const options = { 'ignoreAttributes': ['value', 'id', 'd'], 'compareAttributesAsJSON': [], 'ignoreWhitespaces': true, 'ignoreComments': true, 'ignoreEndTags': false, 'ignoreDuplicateAttributes': false }; const oldFile = `${this.path}/previous.html`; const innerText = await this.page.evaluate(() => { return { 'body': document.body.innerText }; }); if (!fs.existsSync(oldFile)) fs.writeFileSync(oldFile, body.body, 'utf-8'); else { const previousFile = fs.readFileSync(oldFile, 'utf-8'); const diff = new Diff(); // options may be passed to constructor; see below const textDiff = diff.main(previousFile, innerText.body); // produces diff array const levenshtein = diff.levenshtein(textDiff); logger.debug('levenshtein:', levenshtein); if (levenshtein !== 0) { logger.debug(diff.prettyHtml(textDiff)); fs.writeFileSync(oldFile, innerText.body, 'utf-8'); } } } async start() { await super._start(); try{ this.startPage = 'https://www.harmankardon.co.uk/outlet/'; // this.startPage = 'https://silvrtree.co.uk/slack'; const mouseDownDuration = ChangeDetection.notARobot(); await this.setPath(path.resolve(`${__dirname }/../artefacts`)); await this._createDirectory(`${this.path}/screenshots`); await this._initBrowser(true); await this._createBrowserPage(); // await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true }); await this.page.setViewport({ 'width': 1200, 'height': 800 }); // await this._goto(this.startPage); await this._randomWait(this.page, 3, 5); // await this.page.waitForSelector('#SI_ID_Head_FromPrice'); logger.debug('Started..'); // await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/ } catch(e) { throw new Error(e); } } async loadSettings() { logger.debug('Load settings...'); const statsFile = `${this.path}/stats.json`; this.settings = jsonfile.readFileSync('settings.json'); let stats = []; if (fs.existsSync(statsFile)) stats = jsonfile.readFileSync(statsFile) || []; this.stats = new Map(stats); } async saveSettings() { logger.debug('Save settings...'); const statsFile = `${this.path}/stats.json`; const stats = [...this.stats]; // logger.debug(stats); jsonfile.writeFileSync(statsFile, stats); } /** * Grab the Pdf's and screenshots * @returns {Promise} */ async __run() { // try { logger.debug('run'); await this.start(); // await this.process(); await this.loadSettings(); logger.debug('Running...'); await this.processItems(); await this.saveSettings(); await this._done(); /*} catch(e) { throw new Error(e); }*/ } } module.exports = ChangeDetection;