diff snap shop and docker stuff

This commit is contained in:
Martin Donnelly 2019-10-23 16:20:00 +01:00
parent 9858a90912
commit 36fc54a52e
8 changed files with 228 additions and 7 deletions

6
.gitignore vendored
View File

@ -140,11 +140,11 @@ fabric.properties
artefacts/screenshots/*.png
# artefacts/screenshots/*.png
artefacts/*.txt
artefacts/*.json
artefacts/*.html
artefacts/*
# artefacts/*.html
# artefacts/*
/tests/*.zip

27
Dockerfile Normal file
View File

@ -0,0 +1,27 @@
FROM node:stretch
ARG VERSION
ENV VERSION ${VERSION:-development}
RUN echo udev hold | dpkg --set-selections;\
echo initscripts hold | dpkg --set-selections;\
apt-get -yq update;\
DEBIAN_FRONTEND=noninteractive apt-get install -yq -f --no-install-recommends build-essential dnsutils git xorg blackbox libasound2 libnss3-dev libxss1 libatk-bridge2.0-0 libgtk2.0-common libgtk-3-0 ;\
apt-get autoremove -yq ;\
apt-get clean -yq
WORKDIR /app
COPY start.sh package.json *.js settings.json /app/
COPY lib/ /app/lib
COPY scrapers/ /app/scrapers
COPY pug/ /app/pug
RUN npm install pm2 -g && npm install
# RUN npm install
# COPY start.sh /app/
RUN chmod +x /app/start.sh
ENTRYPOINT ["/app/start.sh"]

32
Makefile Normal file
View File

@ -0,0 +1,32 @@
PROJECT = changedetection
VERSION = $(shell git rev-parse --short HEAD)
APP_IMAGE = $(PROJECT):$(VERSION)
NO_CACHE = false
#build docker image
build:
# docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
# tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js
docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
.PHONY: build
#push docker image to registry
push: build
docker push $(APP_IMAGE)
.PHONY: push
#push docker image to registry
run: build
docker run $(APP_IMAGE)
.PHONY: run
ver:
@echo '$(VERSION)'
#echo $ERSION
.PHONY: ver
tar:
# docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js
.PHONY: build

97
ecosystem.config.js Normal file
View File

@ -0,0 +1,97 @@
require('dotenv').config();
const dateFormat = require('dateformat');
function buildApps() {
// proxies = ['uk', 'fr', 'de', 'nl', 'ch'];
const debugCron = process.env['debugCron'] || false;
const cronBump = process.env['cronBump'] || false;
const baseDate = new Date();
let startCronMS = baseDate.getTime() + ( 5 * (60 * 1000));
console.log(`debugCron:${debugCron} // cronBump:${cronBump}`);
const apps = [];
const list = [
{ 'cron':'IE_CRON', 'start':'IE', 'name':'IE', 'script':'ie.js', 'proxy': 'uk', 'crontime': '0 0 * * *' }, // 00:04:40
{ 'cron':'LU_CRON', 'start':'LU', 'name':'LU', 'script':'lu.js', 'proxy': 'uk', 'crontime': '10 0 * * *' }, // "01:09:45.187"
{ 'cron':'IT_CRON', 'start':'IT', 'name':'IT', 'script':'it.js', 'proxy': 'uk', 'crontime': '10 1 * * *' }, // 04:51:37 - uk free at 6:30
{ 'cron':'CZ_CRON', 'start':'CZ', 'name':'CZ', 'script':'cz.js', 'proxy': 'uk', 'crontime': '20 6 * * *' }, // "00:24:01.696"
{ 'cron':'PT_CRON', 'start':'PT', 'name':'PT', 'script':'pt.js', 'proxy': 'uk', 'crontime': '0 7 * * *' }, // "00:53:02.432"
{ 'cron':'CY_CRON', 'start':'CY', 'name':'CY', 'script':'cy.js', 'proxy': 'fr', 'crontime': '0 0 * * *' }, // 00:01:03
{ 'cron':'SE_CRON', 'start':'SE', 'name':'SE', 'script':'se.js', 'proxy': 'fr', 'crontime': '5 0 * * *' }, // 00:43:45
{ 'cron':'FR_CRON', 'start':'FR', 'name':'FR', 'script':'fr.js', 'proxy': 'fr', 'crontime': '0 1 * * *' }, // 01:22:29
{ 'cron':'LT_CRON', 'start':'LT', 'name':'LT', 'script':'lt.js', 'proxy': 'fr', 'crontime': '30 2 * * *' }, // "00:54:28.134"
{ 'cron':'SK_CRON', 'start':'SK', 'name':'SK', 'script':'sk.js', 'proxy': 'fr', 'crontime': '30 3 * * *' }, // 00:24:03 - fr free at 4:00
{ 'cron':'DE_CRON', 'start':'DE', 'name':'DE', 'script':'de.js', 'proxy': 'de', 'crontime': '0 0 * * *' }, // 03:55:38 - de free at 4:00
{ 'cron':'NL_CRON', 'start':'NL', 'name':'NL', 'script':'nl.js', 'proxy': 'nl', 'crontime': '0 0 * * *' }, // 07:23:19 - nl free at 7:30
{ 'cron':'PL_CRON', 'start':'PL', 'name':'PL', 'script':'pl.js', 'proxy': 'ch', 'crontime': '0 0 * * *' }, // 17:59:18 - ch free at 18:00
{ 'cron':'LV_CRON', 'start':'LV', 'name':'LV', 'script':'lv.js', 'proxy': 'nl', 'crontime': '30 7 * * *' }, // 13:56.232 - nl free at 7:45
{ 'cron':'DK_CRON', 'start':'DK', 'name':'DK', 'script':'dk.js', 'proxy': 'de', 'crontime': '0 4 * * *' }, // 11:08.616 - de free at 4:15
{ 'cron':'ES_CRON', 'start':'ES', 'name':'ES', 'script':'es.js', 'proxy': 'de', 'crontime': '15 4 * * *' }, // 36:44.523- de free at 4:55
{ 'cron':'EE_CRON', 'start':'EE', 'name':'EE', 'script':'ee.js', 'proxy': 'de', 'crontime': '0 5 * * *' }, // 05:22:04.226 - de free after 10:30
{ 'cron':'NO_CRON', 'start':'NO', 'name':'NO', 'script':'no.js', 'proxy': 'fr', 'crontime': '0 4 * * *' }, // 05:12:57.792 - fr free after 9:20
{ 'cron':'GI_CRON', 'start':'GI', 'name':'GI', 'script':'gi.js', 'proxy': 'uk' },
{ 'cron':'GR_CRON', 'start':'GR', 'name':'GR', 'script':'gr.js', 'proxy': 'uk' },
{ 'cron':'MT_CRON', 'start':'MT', 'name':'MT', 'script':'mt.js', 'proxy': 'uk' },
{ 'cron':'BG_CRON', 'start':'BG', 'name':'BG', 'script':'bg.js', 'proxy': 'uk' },
{ 'cron':'AT_CRON', 'start':'AT', 'name':'AT', 'script':'at.js', 'proxy': 'uk' },
{ 'cron':'FI_CRON', 'start':'FI', 'name':'FI', 'script':'fi.js', 'proxy': 'uk' },
{ 'cron':'BE_CRON', 'start':'BE', 'name':'BE', 'script':'be.js', 'proxy': 'uk' }
];
apps.push({
'name' : 'watcher',
'script' : 'helpers/watcher.js',
'env': {
'NODE_ENV': 'production'
},
'autorestart' : true,
'max_restarts': 3,
'restart_delay': 4000
});
for(const item of list)
if ((typeof process.env[item.cron] !== 'undefined' || process.env.SCRAPE_START === item.start)) {
const proxyUri = `${item.proxy}.proxymesh.com:31280`;
const newItem = {
'name' : item.name,
'script' : item.script,
'env': {
'NODE_ENV': 'production',
'PROXY_URI' : proxyUri
},
'autorestart' : true,
'max_restarts': 3,
'restart_delay': 4000
};
if (typeof process.env[item.cron] !== 'undefined') {
newItem.env[item.cron] = (debugCron !== false) ? process.env[item.cron] : item.crontime;
if (cronBump !== false) {
newItem.env[item.cron] = dateFormat(startCronMS, 'M H "* * *"');
startCronMS = startCronMS + ( 2 * (60 * 1000));
}
}
apps.push(newItem);
}
const version = process.env.VERSION || 'NO VERSION!';
console.log('*****************************');
console.log(`LAUNCHING VERSION: ${version}`);
console.log('*****************************');
console.log(JSON.stringify(apps));
return apps;
}
module.exports = {
'apps' : buildApps()
};

13
package-lock.json generated
View File

@ -1538,6 +1538,19 @@
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
"integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
},
"pixelmatch": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/pixelmatch/-/pixelmatch-5.1.0.tgz",
"integrity": "sha512-HqtgvuWN12tBzKJf7jYsc38Ha28Q2NYpmBL9WostEGgDHJqbTLkjydZXL1ZHM02ZnB+Dkwlxo87HBY38kMiD6A==",
"requires": {
"pngjs": "^3.4.0"
}
},
"pngjs": {
"version": "3.4.0",
"resolved": "https://registry.npmjs.org/pngjs/-/pngjs-3.4.0.tgz",
"integrity": "sha512-NCrCHhWmnQklfH4MtJMRjZ2a8c80qXeMlQMv2uVp9ISJMTt562SbGd6n2oq0PaPgKm7Z6pL9E2UlLIhC+SHL3w=="
},
"prelude-ls": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz",

View File

@ -17,6 +17,8 @@
"lodash": "^4.17.15",
"log4js": "^5.1.0",
"node-localstorage": "^1.3.1",
"pixelmatch": "^5.1.0",
"pngjs": "^3.4.0",
"pug": "^2.0.4",
"puppeteer": "^1.19.0",
"smtp-email-sender": "^1.0.0",

View File

@ -12,6 +12,11 @@ const Diff = require('text-diff');
const time = require("time-since");
const pug = require('pug');
const PNG = require('pngjs').PNG;
const pixelmatch = require('pixelmatch');
const email = require('smtp-email-sender')({
'host': 'mail.caliban.io',
'port': '465',
@ -41,12 +46,12 @@ class ChangeDetection extends Scraper {
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
}
sendSMTP(data, newPath) {
async sendSMTP(data, newPath) {
const now = new Date();
const attachments = [
{
path:`${data.screenshot}.png`
path:data.diffPNG
}
];
@ -60,6 +65,35 @@ class ChangeDetection extends Scraper {
});
}
async generateDiffScreenshot(previous, today) {
let {dir, root, ext, name} = path.parse(today);
const img1 = PNG.sync.read(fs.readFileSync(previous));
const img2 = PNG.sync.read(fs.readFileSync(today));
const {width, height} = img1;
const diff = new PNG({width, height});
pixelmatch(img1.data, img2.data, diff.data, width, height, {threshold: 0.1});
name = name.concat('_diff');
const endFilename = path.format({dir, root, ext, name});
logger.debug('diffFilename', endFilename);
fs.writeFileSync(endFilename, PNG.sync.write(diff));
return endFilename;
}
async processItem(item) {
logger.debug(`Processing ${item.name}...`);
@ -87,15 +121,19 @@ class ChangeDetection extends Scraper {
const diff = new Diff(); // options may be passed to constructor; see below
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
const cleanedDiff = diff.cleanupSemantic(textDiff);
const levenshtein = diff.levenshtein(textDiff);
logger.debug('levenshtein:', levenshtein);
logger.debug('cleanedDiff:',cleanedDiff );
if (levenshtein !== 0) {
logger.info('Changed...');
const timestamp = dateFormat(now, 'yyyymmddHHMM');
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
stats.previousScreenshot = stats.screenshot;
stats.previousChange = stats.lastSaved;
stats.lastSaved = now;
stats.lastChanged = now;
@ -106,6 +144,8 @@ class ChangeDetection extends Scraper {
await this._makeScreenshotV2(this.page, screenshotPath, null);
stats.diffPNG = await this.generateDiffScreenshot(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png'));
await this._randomWait(this.page, 3, 5);
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
@ -113,8 +153,10 @@ class ChangeDetection extends Scraper {
const pugData = {...stats, ...item};
console.log(pugData);
this.pugTest(pugData, './');
// console.log(pugData);
await this.sendSMTP(pugData, './');
} else {
logger.debug('No change...');
}
}
}

8
start.sh Normal file
View File

@ -0,0 +1,8 @@
#!/bin/sh
set -ex
eval "$(aws ssm get-parameters-by-path --region $REGION --path "/$SERVICE_NAME/$ENV/" --query 'Parameters[*].{Name:Name,Value:Value}' --output text | sed 's/\/'"$SERVICE_NAME"'\/'"$ENV"'\///g' | awk -F '\t' '{ print "export " $1 "=" "\""$2"\";" }')"
npm show puppeteer version
pm2-runtime start ecosystem.config.js --raw --env production