diff --git a/.gitignore b/.gitignore index 7fb3689..eb38d52 100644 --- a/.gitignore +++ b/.gitignore @@ -140,11 +140,11 @@ fabric.properties -artefacts/screenshots/*.png +# artefacts/screenshots/*.png artefacts/*.txt artefacts/*.json -artefacts/*.html -artefacts/* +# artefacts/*.html +# artefacts/* /tests/*.zip diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c80db0e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM node:stretch +ARG VERSION +ENV VERSION ${VERSION:-development} + +RUN echo udev hold | dpkg --set-selections;\ + echo initscripts hold | dpkg --set-selections;\ + apt-get -yq update;\ + DEBIAN_FRONTEND=noninteractive apt-get install -yq -f --no-install-recommends build-essential dnsutils git xorg blackbox libasound2 libnss3-dev libxss1 libatk-bridge2.0-0 libgtk2.0-common libgtk-3-0 ;\ + apt-get autoremove -yq ;\ + apt-get clean -yq + +WORKDIR /app + +COPY start.sh package.json *.js settings.json /app/ +COPY lib/ /app/lib +COPY scrapers/ /app/scrapers +COPY pug/ /app/pug + +RUN npm install pm2 -g && npm install + +# RUN npm install + +# COPY start.sh /app/ + +RUN chmod +x /app/start.sh + +ENTRYPOINT ["/app/start.sh"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6b11ba4 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +PROJECT = changedetection +VERSION = $(shell git rev-parse --short HEAD) + +APP_IMAGE = $(PROJECT):$(VERSION) +NO_CACHE = false + +#build docker image +build: + # docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE) + # tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js + docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE) +.PHONY: build + +#push docker image to registry +push: build + docker push $(APP_IMAGE) +.PHONY: push + +#push docker image to registry +run: build + docker run $(APP_IMAGE) +.PHONY: run +ver: + @echo '$(VERSION)' + #echo $ERSION +.PHONY: ver + +tar: + # docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE) + tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js + +.PHONY: build diff --git a/ecosystem.config.js b/ecosystem.config.js new file mode 100644 index 0000000..9830de4 --- /dev/null +++ b/ecosystem.config.js @@ -0,0 +1,97 @@ +require('dotenv').config(); +const dateFormat = require('dateformat'); + +function buildApps() { + // proxies = ['uk', 'fr', 'de', 'nl', 'ch']; + + const debugCron = process.env['debugCron'] || false; + const cronBump = process.env['cronBump'] || false; + const baseDate = new Date(); + let startCronMS = baseDate.getTime() + ( 5 * (60 * 1000)); + + console.log(`debugCron:${debugCron} // cronBump:${cronBump}`); + const apps = []; + const list = [ + { 'cron':'IE_CRON', 'start':'IE', 'name':'IE', 'script':'ie.js', 'proxy': 'uk', 'crontime': '0 0 * * *' }, // 00:04:40 + { 'cron':'LU_CRON', 'start':'LU', 'name':'LU', 'script':'lu.js', 'proxy': 'uk', 'crontime': '10 0 * * *' }, // "01:09:45.187" + { 'cron':'IT_CRON', 'start':'IT', 'name':'IT', 'script':'it.js', 'proxy': 'uk', 'crontime': '10 1 * * *' }, // 04:51:37 - uk free at 6:30 + { 'cron':'CZ_CRON', 'start':'CZ', 'name':'CZ', 'script':'cz.js', 'proxy': 'uk', 'crontime': '20 6 * * *' }, // "00:24:01.696" + { 'cron':'PT_CRON', 'start':'PT', 'name':'PT', 'script':'pt.js', 'proxy': 'uk', 'crontime': '0 7 * * *' }, // "00:53:02.432" + { 'cron':'CY_CRON', 'start':'CY', 'name':'CY', 'script':'cy.js', 'proxy': 'fr', 'crontime': '0 0 * * *' }, // 00:01:03 + { 'cron':'SE_CRON', 'start':'SE', 'name':'SE', 'script':'se.js', 'proxy': 'fr', 'crontime': '5 0 * * *' }, // 00:43:45 + { 'cron':'FR_CRON', 'start':'FR', 'name':'FR', 'script':'fr.js', 'proxy': 'fr', 'crontime': '0 1 * * *' }, // 01:22:29 + { 'cron':'LT_CRON', 'start':'LT', 'name':'LT', 'script':'lt.js', 'proxy': 'fr', 'crontime': '30 2 * * *' }, // "00:54:28.134" + { 'cron':'SK_CRON', 'start':'SK', 'name':'SK', 'script':'sk.js', 'proxy': 'fr', 'crontime': '30 3 * * *' }, // 00:24:03 - fr free at 4:00 + { 'cron':'DE_CRON', 'start':'DE', 'name':'DE', 'script':'de.js', 'proxy': 'de', 'crontime': '0 0 * * *' }, // 03:55:38 - de free at 4:00 + { 'cron':'NL_CRON', 'start':'NL', 'name':'NL', 'script':'nl.js', 'proxy': 'nl', 'crontime': '0 0 * * *' }, // 07:23:19 - nl free at 7:30 + { 'cron':'PL_CRON', 'start':'PL', 'name':'PL', 'script':'pl.js', 'proxy': 'ch', 'crontime': '0 0 * * *' }, // 17:59:18 - ch free at 18:00 + { 'cron':'LV_CRON', 'start':'LV', 'name':'LV', 'script':'lv.js', 'proxy': 'nl', 'crontime': '30 7 * * *' }, // 13:56.232 - nl free at 7:45 + { 'cron':'DK_CRON', 'start':'DK', 'name':'DK', 'script':'dk.js', 'proxy': 'de', 'crontime': '0 4 * * *' }, // 11:08.616 - de free at 4:15 + { 'cron':'ES_CRON', 'start':'ES', 'name':'ES', 'script':'es.js', 'proxy': 'de', 'crontime': '15 4 * * *' }, // 36:44.523- de free at 4:55 + { 'cron':'EE_CRON', 'start':'EE', 'name':'EE', 'script':'ee.js', 'proxy': 'de', 'crontime': '0 5 * * *' }, // 05:22:04.226 - de free after 10:30 + { 'cron':'NO_CRON', 'start':'NO', 'name':'NO', 'script':'no.js', 'proxy': 'fr', 'crontime': '0 4 * * *' }, // 05:12:57.792 - fr free after 9:20 + { 'cron':'GI_CRON', 'start':'GI', 'name':'GI', 'script':'gi.js', 'proxy': 'uk' }, + { 'cron':'GR_CRON', 'start':'GR', 'name':'GR', 'script':'gr.js', 'proxy': 'uk' }, + { 'cron':'MT_CRON', 'start':'MT', 'name':'MT', 'script':'mt.js', 'proxy': 'uk' }, + { 'cron':'BG_CRON', 'start':'BG', 'name':'BG', 'script':'bg.js', 'proxy': 'uk' }, + { 'cron':'AT_CRON', 'start':'AT', 'name':'AT', 'script':'at.js', 'proxy': 'uk' }, + { 'cron':'FI_CRON', 'start':'FI', 'name':'FI', 'script':'fi.js', 'proxy': 'uk' }, + { 'cron':'BE_CRON', 'start':'BE', 'name':'BE', 'script':'be.js', 'proxy': 'uk' } + ]; + + apps.push({ + 'name' : 'watcher', + 'script' : 'helpers/watcher.js', + + 'env': { + 'NODE_ENV': 'production' + }, + 'autorestart' : true, + 'max_restarts': 3, + 'restart_delay': 4000 + }); + + for(const item of list) + + if ((typeof process.env[item.cron] !== 'undefined' || process.env.SCRAPE_START === item.start)) { + const proxyUri = `${item.proxy}.proxymesh.com:31280`; + + const newItem = { + 'name' : item.name, + 'script' : item.script, + + 'env': { + 'NODE_ENV': 'production', + 'PROXY_URI' : proxyUri + }, + 'autorestart' : true, + 'max_restarts': 3, + 'restart_delay': 4000 + }; + + if (typeof process.env[item.cron] !== 'undefined') { + newItem.env[item.cron] = (debugCron !== false) ? process.env[item.cron] : item.crontime; + if (cronBump !== false) { + newItem.env[item.cron] = dateFormat(startCronMS, 'M H "* * *"'); + + startCronMS = startCronMS + ( 2 * (60 * 1000)); + } + } + + apps.push(newItem); + } + + const version = process.env.VERSION || 'NO VERSION!'; + + console.log('*****************************'); + console.log(`LAUNCHING VERSION: ${version}`); + console.log('*****************************'); + + console.log(JSON.stringify(apps)); + + return apps; +} + +module.exports = { + 'apps' : buildApps() +}; diff --git a/package-lock.json b/package-lock.json index 4e5a6b1..dbcaef7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1538,6 +1538,19 @@ "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA=" }, + "pixelmatch": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/pixelmatch/-/pixelmatch-5.1.0.tgz", + "integrity": "sha512-HqtgvuWN12tBzKJf7jYsc38Ha28Q2NYpmBL9WostEGgDHJqbTLkjydZXL1ZHM02ZnB+Dkwlxo87HBY38kMiD6A==", + "requires": { + "pngjs": "^3.4.0" + } + }, + "pngjs": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-3.4.0.tgz", + "integrity": "sha512-NCrCHhWmnQklfH4MtJMRjZ2a8c80qXeMlQMv2uVp9ISJMTt562SbGd6n2oq0PaPgKm7Z6pL9E2UlLIhC+SHL3w==" + }, "prelude-ls": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", diff --git a/package.json b/package.json index e49185e..cd64a4f 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,8 @@ "lodash": "^4.17.15", "log4js": "^5.1.0", "node-localstorage": "^1.3.1", + "pixelmatch": "^5.1.0", + "pngjs": "^3.4.0", "pug": "^2.0.4", "puppeteer": "^1.19.0", "smtp-email-sender": "^1.0.0", diff --git a/scrapers/scraper.js b/scrapers/scraper.js index 1551cfe..1f5e0f7 100644 --- a/scrapers/scraper.js +++ b/scrapers/scraper.js @@ -12,6 +12,11 @@ const Diff = require('text-diff'); const time = require("time-since"); const pug = require('pug'); + +const PNG = require('pngjs').PNG; +const pixelmatch = require('pixelmatch'); + + const email = require('smtp-email-sender')({ 'host': 'mail.caliban.io', 'port': '465', @@ -41,12 +46,12 @@ class ChangeDetection extends Scraper { logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data)); } - sendSMTP(data, newPath) { + async sendSMTP(data, newPath) { const now = new Date(); const attachments = [ { - path:`${data.screenshot}.png` + path:data.diffPNG } ]; @@ -60,6 +65,35 @@ class ChangeDetection extends Scraper { }); } + async generateDiffScreenshot(previous, today) { + + let {dir, root, ext, name} = path.parse(today); + + const img1 = PNG.sync.read(fs.readFileSync(previous)); + const img2 = PNG.sync.read(fs.readFileSync(today)); + const {width, height} = img1; + const diff = new PNG({width, height}); + + pixelmatch(img1.data, img2.data, diff.data, width, height, {threshold: 0.1}); + + + name = name.concat('_diff'); + + + const endFilename = path.format({dir, root, ext, name}); + + + + logger.debug('diffFilename', endFilename); + + fs.writeFileSync(endFilename, PNG.sync.write(diff)); + + return endFilename; + + + } + + async processItem(item) { logger.debug(`Processing ${item.name}...`); @@ -87,15 +121,19 @@ class ChangeDetection extends Scraper { const diff = new Diff(); // options may be passed to constructor; see below const textDiff = diff.main(previousFile, innerText.body); // produces diff array + const cleanedDiff = diff.cleanupSemantic(textDiff); const levenshtein = diff.levenshtein(textDiff); logger.debug('levenshtein:', levenshtein); + logger.debug('cleanedDiff:',cleanedDiff ); + if (levenshtein !== 0) { logger.info('Changed...'); const timestamp = dateFormat(now, 'yyyymmddHHMM'); const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`; + stats.previousScreenshot = stats.screenshot; stats.previousChange = stats.lastSaved; stats.lastSaved = now; stats.lastChanged = now; @@ -106,6 +144,8 @@ class ChangeDetection extends Scraper { await this._makeScreenshotV2(this.page, screenshotPath, null); + stats.diffPNG = await this.generateDiffScreenshot(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png')); + await this._randomWait(this.page, 3, 5); fs.writeFileSync(oldFile, innerText.body, 'utf-8'); @@ -113,8 +153,10 @@ class ChangeDetection extends Scraper { const pugData = {...stats, ...item}; - console.log(pugData); - this.pugTest(pugData, './'); + // console.log(pugData); + await this.sendSMTP(pugData, './'); + } else { + logger.debug('No change...'); } } } diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..30156f0 --- /dev/null +++ b/start.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -ex + +eval "$(aws ssm get-parameters-by-path --region $REGION --path "/$SERVICE_NAME/$ENV/" --query 'Parameters[*].{Name:Name,Value:Value}' --output text | sed 's/\/'"$SERVICE_NAME"'\/'"$ENV"'\///g' | awk -F '\t' '{ print "export " $1 "=" "\""$2"\";" }')" + +npm show puppeteer version + +pm2-runtime start ecosystem.config.js --raw --env production