cd3248340d
Build working docker-compose.yml working
337 lines
8.7 KiB
JavaScript
337 lines
8.7 KiB
JavaScript
const Scraper = require('../lib/scraper');
|
|
const path = require('path');
|
|
const logger = require('log4js').getLogger('RC');
|
|
|
|
const fs = require('fs');
|
|
const dateFormat = require('dateformat');
|
|
|
|
const _ = require('lodash');
|
|
const jsonfile = require('jsonfile');
|
|
|
|
const Diff = require('text-diff');
|
|
|
|
const time = require('time-since');
|
|
const pug = require('pug');
|
|
|
|
const PNG = require('pngjs').PNG;
|
|
const pixelmatch = require('pixelmatch');
|
|
const compareImages = require('resemblejs/compareImages');
|
|
|
|
const email = require('smtp-email-sender')({
|
|
'host': process.env.HOST,
|
|
'port': '465',
|
|
'auth': {
|
|
'user': process.env.USER,
|
|
'pass': process.env.PASS,
|
|
'type': 'LOGIN' // PLAIN, LOGIN, MD5 etc...
|
|
},
|
|
'secure': 'secure'
|
|
});
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
|
|
|
class ChangeDetection extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
|
|
this.setID('CD');
|
|
|
|
/* this.run = _.debounce(async () => {
|
|
await this.__run();
|
|
}, 5000);*/
|
|
|
|
this.run = async () => {
|
|
await this.__run();
|
|
};
|
|
}
|
|
|
|
pugTest(data, newpath) {
|
|
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
|
|
}
|
|
|
|
async sendSMTP(data, newPath) {
|
|
const now = new Date();
|
|
|
|
const attachments = [
|
|
{
|
|
'path':data.diffPNG
|
|
}
|
|
];
|
|
|
|
const html = pug.renderFile(`${newPath}/` + 'pug/email.pug', data);
|
|
email({
|
|
'from': 'Aida <aida@caliban.io>',
|
|
'to': 'Martin <martind2000@gmail.com>',
|
|
'subject': `ChangeDetection: ${data.name}`,
|
|
'html': html,
|
|
'attachments': attachments
|
|
});
|
|
}
|
|
|
|
async generateDiffScreenshotV2(previous, today) {
|
|
const { dir, root, ext } = path.parse(today);
|
|
let { name } = path.parse(today);
|
|
|
|
const options = {
|
|
'output': {
|
|
'errorColor': {
|
|
'red': 255,
|
|
'green': 0,
|
|
'blue': 255
|
|
},
|
|
'errorType': 'movement',
|
|
'transparency': 0.3,
|
|
'largeImageThreshold': 1200,
|
|
'useCrossOrigin': false,
|
|
'outputDiff': true
|
|
},
|
|
'scaleToSameSize': false,
|
|
'ignore': 'colors'
|
|
};
|
|
|
|
// const img1 = PNG.sync.read(fs.readFileSync(previous));
|
|
// const img2 = PNG.sync.read(fs.readFileSync(today));
|
|
|
|
const img1 = fs.readFileSync(previous);
|
|
const img2 = fs.readFileSync(today);
|
|
|
|
const data = await compareImages(img1, img2,
|
|
options
|
|
);
|
|
|
|
name = name.concat('_diff');
|
|
|
|
const endFilename = path.format({ dir, root, ext, name });
|
|
|
|
logger.debug('diffFilename', endFilename);
|
|
|
|
fs.writeFileSync(endFilename, data.getBuffer());
|
|
|
|
return endFilename;
|
|
}
|
|
|
|
async generateDiffScreenshot(previous, today) {
|
|
const { dir, root, ext } = path.parse(today);
|
|
let { name } = path.parse(today);
|
|
|
|
const img1 = PNG.sync.read(fs.readFileSync(previous));
|
|
const img2 = PNG.sync.read(fs.readFileSync(today));
|
|
const { width, height } = img1;
|
|
const diff = new PNG({ width, height });
|
|
|
|
pixelmatch(img1.data, img2.data, diff.data, width, height, { 'threshold': 0.1 });
|
|
|
|
name = name.concat('_diff');
|
|
|
|
const endFilename = path.format({ dir, root, ext, name });
|
|
|
|
logger.debug('diffFilename', endFilename);
|
|
|
|
fs.writeFileSync(endFilename, PNG.sync.write(diff));
|
|
|
|
return endFilename;
|
|
}
|
|
|
|
async processItem(item) {
|
|
logger.debug(`Processing ${item.name}...`);
|
|
|
|
const insRegEx = /<ins>/g;
|
|
|
|
const now = new Date();
|
|
const filename = _.kebabCase(item.name);
|
|
const oldFile = `${this.path}/${filename}.html`;
|
|
const stats = this.stats.get(filename) || { 'lastSaved': now, 'lastChanged':null };
|
|
|
|
await this._goto(item.url);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const innerText = await this.page.evaluate(() => {
|
|
return {
|
|
'body': document.body.innerText
|
|
};
|
|
});
|
|
|
|
const timestamp = dateFormat(now, 'yyyymmddHHMM');
|
|
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
|
|
|
|
if (!fs.existsSync(oldFile)) {
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
stats.screenshot = screenshotPath;
|
|
await this._makeScreenshotV2(this.page, screenshotPath, null);
|
|
|
|
this.stats.set(filename, stats);
|
|
}
|
|
else {
|
|
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
|
|
|
const diff = new Diff(); // options may be passed to constructor; see below
|
|
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
|
const cleanedDiff = diff.cleanupSemantic(textDiff);
|
|
const levenshtein = diff.levenshtein(textDiff);
|
|
|
|
// logger.debug('textDiff:', textDiff);
|
|
|
|
logger.debug('levenshtein:', levenshtein);
|
|
|
|
logger.debug('cleanedDiff:', cleanedDiff );
|
|
|
|
if (levenshtein !== 0) {
|
|
logger.info('Changed...');
|
|
|
|
stats.previousScreenshot = stats.screenshot;
|
|
stats.previousChange = stats.lastSaved;
|
|
stats.lastSaved = now;
|
|
stats.lastChanged = now;
|
|
stats.screenshot = screenshotPath;
|
|
stats.changed = diff.prettyHtml(textDiff);
|
|
stats.levenshtein = levenshtein;
|
|
stats.since = time.since(new Date(stats.previousChange)).days();
|
|
|
|
stats.changed = stats.changed.replace(insRegEx, `<ins style="background-color: #ffff99;display:inline;">`);
|
|
|
|
await this._makeScreenshotV2(this.page, screenshotPath, null);
|
|
|
|
stats.diffPNG = await this.generateDiffScreenshotV2(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png'));
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
this.stats.set(filename, stats);
|
|
|
|
const pugData = { ...stats, ...item}; // eslint-disable-line
|
|
|
|
// console.log(pugData);
|
|
await this.sendSMTP(pugData, './');
|
|
}
|
|
else
|
|
logger.debug('No change...');
|
|
}
|
|
}
|
|
|
|
async processItems() {
|
|
for (const item of this.settings)
|
|
await this.processItem(item);
|
|
}
|
|
|
|
async processOld() {
|
|
const options = {
|
|
'ignoreAttributes': ['value', 'id', 'd'],
|
|
'compareAttributesAsJSON': [],
|
|
'ignoreWhitespaces': true,
|
|
'ignoreComments': true,
|
|
'ignoreEndTags': false,
|
|
'ignoreDuplicateAttributes': false
|
|
};
|
|
|
|
const oldFile = `${this.path}/previous.html`;
|
|
|
|
const innerText = await this.page.evaluate(() => {
|
|
return {
|
|
'body': document.body.innerText
|
|
};
|
|
});
|
|
|
|
if (!fs.existsSync(oldFile))
|
|
fs.writeFileSync(oldFile, body.body, 'utf-8');
|
|
else {
|
|
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
|
|
|
const diff = new Diff(); // options may be passed to constructor; see below
|
|
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
|
const levenshtein = diff.levenshtein(textDiff);
|
|
|
|
logger.debug('levenshtein:', levenshtein);
|
|
|
|
if (levenshtein !== 0) {
|
|
logger.debug(diff.prettyHtml(textDiff));
|
|
|
|
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
|
}
|
|
}
|
|
}
|
|
|
|
async start() {
|
|
await super._start();
|
|
try{
|
|
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
|
|
|
|
// this.startPage = 'https://silvrtree.co.uk/slack';
|
|
const mouseDownDuration = ChangeDetection.notARobot();
|
|
|
|
await this.setPath(path.resolve(`${__dirname }/../artefacts`));
|
|
|
|
await this._createDirectory(`${this.path}/screenshots`);
|
|
|
|
await this._initBrowser(true);
|
|
await this._createBrowserPage();
|
|
|
|
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
// await this._goto(this.startPage);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
|
|
logger.debug('Started..');
|
|
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
async loadSettings() {
|
|
logger.debug('Load settings...');
|
|
const statsFile = `${this.path}/stats.json`;
|
|
|
|
this.settings = jsonfile.readFileSync('settings.json');
|
|
|
|
let stats = [];
|
|
|
|
if (fs.existsSync(statsFile))
|
|
stats = jsonfile.readFileSync(statsFile) || [];
|
|
|
|
this.stats = new Map(stats);
|
|
}
|
|
|
|
async saveSettings() {
|
|
logger.debug('Save settings...');
|
|
const statsFile = `${this.path}/stats.json`;
|
|
|
|
const stats = [...this.stats];
|
|
// logger.debug(stats);
|
|
jsonfile.writeFileSync(statsFile, stats);
|
|
}
|
|
|
|
/**
|
|
* Grab the Pdf's and screenshots
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async __run() {
|
|
// try {
|
|
logger.debug('run');
|
|
await this.start();
|
|
|
|
// await this.process();
|
|
|
|
await this.loadSettings();
|
|
|
|
logger.debug('Running...');
|
|
|
|
await this.processItems();
|
|
|
|
await this.saveSettings();
|
|
|
|
await this._done();
|
|
|
|
/* }
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}*/
|
|
}
|
|
}
|
|
|
|
module.exports = ChangeDetection;
|