2019-10-21 22:38:27 +00:00
|
|
|
const Scraper = require('../lib/scraper');
|
|
|
|
const path = require('path');
|
|
|
|
const logger = require('log4js').getLogger('RC');
|
|
|
|
|
|
|
|
const fs = require('fs');
|
|
|
|
const dateFormat = require('dateformat');
|
|
|
|
|
|
|
|
const _ = require('lodash');
|
|
|
|
const jsonfile = require('jsonfile');
|
|
|
|
|
|
|
|
const Diff = require('text-diff');
|
|
|
|
|
|
|
|
const time = require("time-since");
|
|
|
|
const pug = require('pug');
|
2019-10-23 15:20:00 +00:00
|
|
|
|
|
|
|
const PNG = require('pngjs').PNG;
|
|
|
|
const pixelmatch = require('pixelmatch');
|
|
|
|
|
|
|
|
|
2019-10-21 22:38:27 +00:00
|
|
|
const email = require('smtp-email-sender')({
|
|
|
|
'host': 'mail.caliban.io',
|
|
|
|
'port': '465',
|
|
|
|
'auth': {
|
|
|
|
'user': 'aida@caliban.io',
|
|
|
|
'pass': 'WaF#E+5am7.)\\csD',
|
|
|
|
'type': 'LOGIN' // PLAIN, LOGIN, MD5 etc...
|
|
|
|
},
|
|
|
|
'secure': 'secure'
|
|
|
|
});
|
|
|
|
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
|
|
|
|
|
|
|
class ChangeDetection extends Scraper {
|
|
|
|
|
|
|
|
constructor() {
|
|
|
|
super();
|
|
|
|
|
|
|
|
this.setID('CD');
|
|
|
|
|
|
|
|
this.run = this._debounce(async () => {
|
|
|
|
await this.__run();
|
|
|
|
}, 5000);
|
|
|
|
}
|
|
|
|
|
|
|
|
pugTest(data, newpath) {
|
|
|
|
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
|
|
|
|
}
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
async sendSMTP(data, newPath) {
|
2019-10-21 22:38:27 +00:00
|
|
|
const now = new Date();
|
|
|
|
|
|
|
|
const attachments = [
|
|
|
|
{
|
2019-10-23 15:20:00 +00:00
|
|
|
path:data.diffPNG
|
2019-10-21 22:38:27 +00:00
|
|
|
}
|
|
|
|
];
|
|
|
|
|
|
|
|
const html = pug.renderFile(`${newPath}/` + 'pug/email.pug', data);
|
|
|
|
email({
|
|
|
|
'from': 'Aida <aida@caliban.io>',
|
|
|
|
'to': 'Martin <martind2000@gmail.com>',
|
|
|
|
'subject': `ChangeDetection: ${data.name}`,
|
|
|
|
'html': html,
|
|
|
|
attachments: attachments
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
async generateDiffScreenshot(previous, today) {
|
|
|
|
|
|
|
|
let {dir, root, ext, name} = path.parse(today);
|
|
|
|
|
|
|
|
const img1 = PNG.sync.read(fs.readFileSync(previous));
|
|
|
|
const img2 = PNG.sync.read(fs.readFileSync(today));
|
|
|
|
const {width, height} = img1;
|
|
|
|
const diff = new PNG({width, height});
|
|
|
|
|
|
|
|
pixelmatch(img1.data, img2.data, diff.data, width, height, {threshold: 0.1});
|
|
|
|
|
|
|
|
|
|
|
|
name = name.concat('_diff');
|
|
|
|
|
|
|
|
|
|
|
|
const endFilename = path.format({dir, root, ext, name});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logger.debug('diffFilename', endFilename);
|
|
|
|
|
|
|
|
fs.writeFileSync(endFilename, PNG.sync.write(diff));
|
|
|
|
|
|
|
|
return endFilename;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-10-21 22:38:27 +00:00
|
|
|
async processItem(item) {
|
|
|
|
logger.debug(`Processing ${item.name}...`);
|
|
|
|
|
|
|
|
const now = new Date();
|
|
|
|
const filename = _.kebabCase(item.name);
|
|
|
|
const oldFile = `${this.path}/${filename}.html`;
|
|
|
|
const stats = this.stats.get(filename) || { 'lastSaved': now, 'lastChanged':null };
|
|
|
|
|
|
|
|
await this._goto(item.url);
|
|
|
|
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
|
|
|
|
const innerText = await this.page.evaluate(() => {
|
|
|
|
return {
|
|
|
|
'body': document.body.innerText
|
|
|
|
};
|
|
|
|
});
|
|
|
|
|
|
|
|
if (!fs.existsSync(oldFile)) {
|
|
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
|
|
this.stats.set(filename, stats);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
|
|
|
|
|
|
|
const diff = new Diff(); // options may be passed to constructor; see below
|
|
|
|
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
2019-10-23 15:20:00 +00:00
|
|
|
const cleanedDiff = diff.cleanupSemantic(textDiff);
|
2019-10-21 22:38:27 +00:00
|
|
|
const levenshtein = diff.levenshtein(textDiff);
|
|
|
|
|
|
|
|
logger.debug('levenshtein:', levenshtein);
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
logger.debug('cleanedDiff:',cleanedDiff );
|
|
|
|
|
2019-10-21 22:38:27 +00:00
|
|
|
if (levenshtein !== 0) {
|
|
|
|
logger.info('Changed...');
|
|
|
|
const timestamp = dateFormat(now, 'yyyymmddHHMM');
|
|
|
|
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
stats.previousScreenshot = stats.screenshot;
|
2019-10-21 22:38:27 +00:00
|
|
|
stats.previousChange = stats.lastSaved;
|
|
|
|
stats.lastSaved = now;
|
|
|
|
stats.lastChanged = now;
|
|
|
|
stats.screenshot = screenshotPath;
|
|
|
|
stats.changed = diff.prettyHtml(textDiff);
|
|
|
|
stats.levenshtein = levenshtein;
|
|
|
|
stats.since = time.since(new Date(stats.previousChange)).days();
|
|
|
|
|
|
|
|
await this._makeScreenshotV2(this.page, screenshotPath, null);
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
stats.diffPNG = await this.generateDiffScreenshot(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png'));
|
|
|
|
|
2019-10-21 22:38:27 +00:00
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
|
|
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
|
|
this.stats.set(filename, stats);
|
|
|
|
|
|
|
|
const pugData = {...stats, ...item};
|
|
|
|
|
2019-10-23 15:20:00 +00:00
|
|
|
// console.log(pugData);
|
|
|
|
await this.sendSMTP(pugData, './');
|
|
|
|
} else {
|
|
|
|
logger.debug('No change...');
|
2019-10-21 22:38:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async processItems() {
|
|
|
|
for (const item of this.settings)
|
|
|
|
await this.processItem(item);
|
|
|
|
}
|
|
|
|
|
|
|
|
async processOld() {
|
|
|
|
const options = {
|
|
|
|
'ignoreAttributes': ['value', 'id', 'd'],
|
|
|
|
'compareAttributesAsJSON': [],
|
|
|
|
'ignoreWhitespaces': true,
|
|
|
|
'ignoreComments': true,
|
|
|
|
'ignoreEndTags': false,
|
|
|
|
'ignoreDuplicateAttributes': false
|
|
|
|
};
|
|
|
|
|
|
|
|
const oldFile = `${this.path}/previous.html`;
|
|
|
|
|
|
|
|
|
|
|
|
const innerText = await this.page.evaluate(() => {
|
|
|
|
return {
|
|
|
|
'body': document.body.innerText
|
|
|
|
};
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!fs.existsSync(oldFile))
|
|
|
|
fs.writeFileSync(oldFile, body.body, 'utf-8');
|
|
|
|
else {
|
|
|
|
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
|
|
|
|
|
|
|
const diff = new Diff(); // options may be passed to constructor; see below
|
|
|
|
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
|
|
|
const levenshtein = diff.levenshtein(textDiff);
|
|
|
|
|
|
|
|
logger.debug('levenshtein:', levenshtein);
|
|
|
|
|
|
|
|
if (levenshtein !== 0) {
|
|
|
|
logger.debug(diff.prettyHtml(textDiff));
|
|
|
|
|
|
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async start() {
|
|
|
|
await super._start();
|
|
|
|
try{
|
|
|
|
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
|
|
|
|
|
|
|
|
// this.startPage = 'https://silvrtree.co.uk/slack';
|
|
|
|
const mouseDownDuration = ChangeDetection.notARobot();
|
|
|
|
|
|
|
|
await this.setPath(path.resolve(`${__dirname }/../artefacts`));
|
|
|
|
|
|
|
|
await this._createDirectory(`${this.path}/screenshots`);
|
|
|
|
|
|
|
|
await this._initBrowser(true);
|
|
|
|
await this._createBrowserPage();
|
|
|
|
|
|
|
|
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
|
|
|
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
|
|
// await this._goto(this.startPage);
|
|
|
|
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
|
|
|
|
logger.debug('Started..');
|
|
|
|
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
|
|
|
|
}
|
|
|
|
catch(e) {
|
|
|
|
throw new Error(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async loadSettings() {
|
|
|
|
logger.debug('Load settings...');
|
|
|
|
const statsFile = `${this.path}/stats.json`;
|
|
|
|
|
|
|
|
this.settings = jsonfile.readFileSync('settings.json');
|
|
|
|
|
|
|
|
let stats = [];
|
|
|
|
|
|
|
|
if (fs.existsSync(statsFile))
|
|
|
|
stats = jsonfile.readFileSync(statsFile) || [];
|
|
|
|
|
|
|
|
this.stats = new Map(stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
async saveSettings() {
|
|
|
|
logger.debug('Save settings...');
|
|
|
|
const statsFile = `${this.path}/stats.json`;
|
|
|
|
|
|
|
|
const stats = [...this.stats];
|
|
|
|
// logger.debug(stats);
|
|
|
|
jsonfile.writeFileSync(statsFile, stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Grab the Pdf's and screenshots
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
*/
|
|
|
|
async __run() {
|
2019-10-23 15:50:29 +00:00
|
|
|
// try {
|
2019-10-21 22:38:27 +00:00
|
|
|
logger.debug('run');
|
|
|
|
await this.start();
|
|
|
|
|
|
|
|
// await this.process();
|
|
|
|
|
|
|
|
await this.loadSettings();
|
|
|
|
|
|
|
|
logger.debug('Running...');
|
|
|
|
|
|
|
|
await this.processItems();
|
|
|
|
|
|
|
|
await this.saveSettings();
|
|
|
|
|
|
|
|
await this._done();
|
2019-10-23 15:50:29 +00:00
|
|
|
/*}
|
2019-10-21 22:38:27 +00:00
|
|
|
catch(e) {
|
|
|
|
throw new Error(e);
|
2019-10-23 15:50:29 +00:00
|
|
|
}*/
|
2019-10-21 22:38:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = ChangeDetection;
|