diff snap shop and docker stuff
This commit is contained in:
parent
9858a90912
commit
36fc54a52e
6
.gitignore
vendored
6
.gitignore
vendored
@ -140,11 +140,11 @@ fabric.properties
|
||||
|
||||
|
||||
|
||||
artefacts/screenshots/*.png
|
||||
# artefacts/screenshots/*.png
|
||||
artefacts/*.txt
|
||||
artefacts/*.json
|
||||
artefacts/*.html
|
||||
artefacts/*
|
||||
# artefacts/*.html
|
||||
# artefacts/*
|
||||
|
||||
/tests/*.zip
|
||||
|
||||
|
27
Dockerfile
Normal file
27
Dockerfile
Normal file
@ -0,0 +1,27 @@
|
||||
FROM node:stretch
|
||||
ARG VERSION
|
||||
ENV VERSION ${VERSION:-development}
|
||||
|
||||
RUN echo udev hold | dpkg --set-selections;\
|
||||
echo initscripts hold | dpkg --set-selections;\
|
||||
apt-get -yq update;\
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -yq -f --no-install-recommends build-essential dnsutils git xorg blackbox libasound2 libnss3-dev libxss1 libatk-bridge2.0-0 libgtk2.0-common libgtk-3-0 ;\
|
||||
apt-get autoremove -yq ;\
|
||||
apt-get clean -yq
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY start.sh package.json *.js settings.json /app/
|
||||
COPY lib/ /app/lib
|
||||
COPY scrapers/ /app/scrapers
|
||||
COPY pug/ /app/pug
|
||||
|
||||
RUN npm install pm2 -g && npm install
|
||||
|
||||
# RUN npm install
|
||||
|
||||
# COPY start.sh /app/
|
||||
|
||||
RUN chmod +x /app/start.sh
|
||||
|
||||
ENTRYPOINT ["/app/start.sh"]
|
32
Makefile
Normal file
32
Makefile
Normal file
@ -0,0 +1,32 @@
|
||||
PROJECT = changedetection
|
||||
VERSION = $(shell git rev-parse --short HEAD)
|
||||
|
||||
APP_IMAGE = $(PROJECT):$(VERSION)
|
||||
NO_CACHE = false
|
||||
|
||||
#build docker image
|
||||
build:
|
||||
# docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
|
||||
# tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js
|
||||
docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
|
||||
.PHONY: build
|
||||
|
||||
#push docker image to registry
|
||||
push: build
|
||||
docker push $(APP_IMAGE)
|
||||
.PHONY: push
|
||||
|
||||
#push docker image to registry
|
||||
run: build
|
||||
docker run $(APP_IMAGE)
|
||||
.PHONY: run
|
||||
ver:
|
||||
@echo '$(VERSION)'
|
||||
#echo $ERSION
|
||||
.PHONY: ver
|
||||
|
||||
tar:
|
||||
# docker build . -t $(APP_IMAGE) --build-arg VERSION=$(VERSION) --no-cache=$(NO_CACHE)
|
||||
tar -C ./ -czvf ./archive.tar.gz 'package.json' 'ncas/' 'helpers/' -X *.js
|
||||
|
||||
.PHONY: build
|
97
ecosystem.config.js
Normal file
97
ecosystem.config.js
Normal file
@ -0,0 +1,97 @@
|
||||
require('dotenv').config();
|
||||
const dateFormat = require('dateformat');
|
||||
|
||||
function buildApps() {
|
||||
// proxies = ['uk', 'fr', 'de', 'nl', 'ch'];
|
||||
|
||||
const debugCron = process.env['debugCron'] || false;
|
||||
const cronBump = process.env['cronBump'] || false;
|
||||
const baseDate = new Date();
|
||||
let startCronMS = baseDate.getTime() + ( 5 * (60 * 1000));
|
||||
|
||||
console.log(`debugCron:${debugCron} // cronBump:${cronBump}`);
|
||||
const apps = [];
|
||||
const list = [
|
||||
{ 'cron':'IE_CRON', 'start':'IE', 'name':'IE', 'script':'ie.js', 'proxy': 'uk', 'crontime': '0 0 * * *' }, // 00:04:40
|
||||
{ 'cron':'LU_CRON', 'start':'LU', 'name':'LU', 'script':'lu.js', 'proxy': 'uk', 'crontime': '10 0 * * *' }, // "01:09:45.187"
|
||||
{ 'cron':'IT_CRON', 'start':'IT', 'name':'IT', 'script':'it.js', 'proxy': 'uk', 'crontime': '10 1 * * *' }, // 04:51:37 - uk free at 6:30
|
||||
{ 'cron':'CZ_CRON', 'start':'CZ', 'name':'CZ', 'script':'cz.js', 'proxy': 'uk', 'crontime': '20 6 * * *' }, // "00:24:01.696"
|
||||
{ 'cron':'PT_CRON', 'start':'PT', 'name':'PT', 'script':'pt.js', 'proxy': 'uk', 'crontime': '0 7 * * *' }, // "00:53:02.432"
|
||||
{ 'cron':'CY_CRON', 'start':'CY', 'name':'CY', 'script':'cy.js', 'proxy': 'fr', 'crontime': '0 0 * * *' }, // 00:01:03
|
||||
{ 'cron':'SE_CRON', 'start':'SE', 'name':'SE', 'script':'se.js', 'proxy': 'fr', 'crontime': '5 0 * * *' }, // 00:43:45
|
||||
{ 'cron':'FR_CRON', 'start':'FR', 'name':'FR', 'script':'fr.js', 'proxy': 'fr', 'crontime': '0 1 * * *' }, // 01:22:29
|
||||
{ 'cron':'LT_CRON', 'start':'LT', 'name':'LT', 'script':'lt.js', 'proxy': 'fr', 'crontime': '30 2 * * *' }, // "00:54:28.134"
|
||||
{ 'cron':'SK_CRON', 'start':'SK', 'name':'SK', 'script':'sk.js', 'proxy': 'fr', 'crontime': '30 3 * * *' }, // 00:24:03 - fr free at 4:00
|
||||
{ 'cron':'DE_CRON', 'start':'DE', 'name':'DE', 'script':'de.js', 'proxy': 'de', 'crontime': '0 0 * * *' }, // 03:55:38 - de free at 4:00
|
||||
{ 'cron':'NL_CRON', 'start':'NL', 'name':'NL', 'script':'nl.js', 'proxy': 'nl', 'crontime': '0 0 * * *' }, // 07:23:19 - nl free at 7:30
|
||||
{ 'cron':'PL_CRON', 'start':'PL', 'name':'PL', 'script':'pl.js', 'proxy': 'ch', 'crontime': '0 0 * * *' }, // 17:59:18 - ch free at 18:00
|
||||
{ 'cron':'LV_CRON', 'start':'LV', 'name':'LV', 'script':'lv.js', 'proxy': 'nl', 'crontime': '30 7 * * *' }, // 13:56.232 - nl free at 7:45
|
||||
{ 'cron':'DK_CRON', 'start':'DK', 'name':'DK', 'script':'dk.js', 'proxy': 'de', 'crontime': '0 4 * * *' }, // 11:08.616 - de free at 4:15
|
||||
{ 'cron':'ES_CRON', 'start':'ES', 'name':'ES', 'script':'es.js', 'proxy': 'de', 'crontime': '15 4 * * *' }, // 36:44.523- de free at 4:55
|
||||
{ 'cron':'EE_CRON', 'start':'EE', 'name':'EE', 'script':'ee.js', 'proxy': 'de', 'crontime': '0 5 * * *' }, // 05:22:04.226 - de free after 10:30
|
||||
{ 'cron':'NO_CRON', 'start':'NO', 'name':'NO', 'script':'no.js', 'proxy': 'fr', 'crontime': '0 4 * * *' }, // 05:12:57.792 - fr free after 9:20
|
||||
{ 'cron':'GI_CRON', 'start':'GI', 'name':'GI', 'script':'gi.js', 'proxy': 'uk' },
|
||||
{ 'cron':'GR_CRON', 'start':'GR', 'name':'GR', 'script':'gr.js', 'proxy': 'uk' },
|
||||
{ 'cron':'MT_CRON', 'start':'MT', 'name':'MT', 'script':'mt.js', 'proxy': 'uk' },
|
||||
{ 'cron':'BG_CRON', 'start':'BG', 'name':'BG', 'script':'bg.js', 'proxy': 'uk' },
|
||||
{ 'cron':'AT_CRON', 'start':'AT', 'name':'AT', 'script':'at.js', 'proxy': 'uk' },
|
||||
{ 'cron':'FI_CRON', 'start':'FI', 'name':'FI', 'script':'fi.js', 'proxy': 'uk' },
|
||||
{ 'cron':'BE_CRON', 'start':'BE', 'name':'BE', 'script':'be.js', 'proxy': 'uk' }
|
||||
];
|
||||
|
||||
apps.push({
|
||||
'name' : 'watcher',
|
||||
'script' : 'helpers/watcher.js',
|
||||
|
||||
'env': {
|
||||
'NODE_ENV': 'production'
|
||||
},
|
||||
'autorestart' : true,
|
||||
'max_restarts': 3,
|
||||
'restart_delay': 4000
|
||||
});
|
||||
|
||||
for(const item of list)
|
||||
|
||||
if ((typeof process.env[item.cron] !== 'undefined' || process.env.SCRAPE_START === item.start)) {
|
||||
const proxyUri = `${item.proxy}.proxymesh.com:31280`;
|
||||
|
||||
const newItem = {
|
||||
'name' : item.name,
|
||||
'script' : item.script,
|
||||
|
||||
'env': {
|
||||
'NODE_ENV': 'production',
|
||||
'PROXY_URI' : proxyUri
|
||||
},
|
||||
'autorestart' : true,
|
||||
'max_restarts': 3,
|
||||
'restart_delay': 4000
|
||||
};
|
||||
|
||||
if (typeof process.env[item.cron] !== 'undefined') {
|
||||
newItem.env[item.cron] = (debugCron !== false) ? process.env[item.cron] : item.crontime;
|
||||
if (cronBump !== false) {
|
||||
newItem.env[item.cron] = dateFormat(startCronMS, 'M H "* * *"');
|
||||
|
||||
startCronMS = startCronMS + ( 2 * (60 * 1000));
|
||||
}
|
||||
}
|
||||
|
||||
apps.push(newItem);
|
||||
}
|
||||
|
||||
const version = process.env.VERSION || 'NO VERSION!';
|
||||
|
||||
console.log('*****************************');
|
||||
console.log(`LAUNCHING VERSION: ${version}`);
|
||||
console.log('*****************************');
|
||||
|
||||
console.log(JSON.stringify(apps));
|
||||
|
||||
return apps;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
'apps' : buildApps()
|
||||
};
|
13
package-lock.json
generated
13
package-lock.json
generated
@ -1538,6 +1538,19 @@
|
||||
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
||||
"integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
|
||||
},
|
||||
"pixelmatch": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/pixelmatch/-/pixelmatch-5.1.0.tgz",
|
||||
"integrity": "sha512-HqtgvuWN12tBzKJf7jYsc38Ha28Q2NYpmBL9WostEGgDHJqbTLkjydZXL1ZHM02ZnB+Dkwlxo87HBY38kMiD6A==",
|
||||
"requires": {
|
||||
"pngjs": "^3.4.0"
|
||||
}
|
||||
},
|
||||
"pngjs": {
|
||||
"version": "3.4.0",
|
||||
"resolved": "https://registry.npmjs.org/pngjs/-/pngjs-3.4.0.tgz",
|
||||
"integrity": "sha512-NCrCHhWmnQklfH4MtJMRjZ2a8c80qXeMlQMv2uVp9ISJMTt562SbGd6n2oq0PaPgKm7Z6pL9E2UlLIhC+SHL3w=="
|
||||
},
|
||||
"prelude-ls": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz",
|
||||
|
@ -17,6 +17,8 @@
|
||||
"lodash": "^4.17.15",
|
||||
"log4js": "^5.1.0",
|
||||
"node-localstorage": "^1.3.1",
|
||||
"pixelmatch": "^5.1.0",
|
||||
"pngjs": "^3.4.0",
|
||||
"pug": "^2.0.4",
|
||||
"puppeteer": "^1.19.0",
|
||||
"smtp-email-sender": "^1.0.0",
|
||||
|
@ -12,6 +12,11 @@ const Diff = require('text-diff');
|
||||
|
||||
const time = require("time-since");
|
||||
const pug = require('pug');
|
||||
|
||||
const PNG = require('pngjs').PNG;
|
||||
const pixelmatch = require('pixelmatch');
|
||||
|
||||
|
||||
const email = require('smtp-email-sender')({
|
||||
'host': 'mail.caliban.io',
|
||||
'port': '465',
|
||||
@ -41,12 +46,12 @@ class ChangeDetection extends Scraper {
|
||||
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
|
||||
}
|
||||
|
||||
sendSMTP(data, newPath) {
|
||||
async sendSMTP(data, newPath) {
|
||||
const now = new Date();
|
||||
|
||||
const attachments = [
|
||||
{
|
||||
path:`${data.screenshot}.png`
|
||||
path:data.diffPNG
|
||||
}
|
||||
];
|
||||
|
||||
@ -60,6 +65,35 @@ class ChangeDetection extends Scraper {
|
||||
});
|
||||
}
|
||||
|
||||
async generateDiffScreenshot(previous, today) {
|
||||
|
||||
let {dir, root, ext, name} = path.parse(today);
|
||||
|
||||
const img1 = PNG.sync.read(fs.readFileSync(previous));
|
||||
const img2 = PNG.sync.read(fs.readFileSync(today));
|
||||
const {width, height} = img1;
|
||||
const diff = new PNG({width, height});
|
||||
|
||||
pixelmatch(img1.data, img2.data, diff.data, width, height, {threshold: 0.1});
|
||||
|
||||
|
||||
name = name.concat('_diff');
|
||||
|
||||
|
||||
const endFilename = path.format({dir, root, ext, name});
|
||||
|
||||
|
||||
|
||||
logger.debug('diffFilename', endFilename);
|
||||
|
||||
fs.writeFileSync(endFilename, PNG.sync.write(diff));
|
||||
|
||||
return endFilename;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
async processItem(item) {
|
||||
logger.debug(`Processing ${item.name}...`);
|
||||
|
||||
@ -87,15 +121,19 @@ class ChangeDetection extends Scraper {
|
||||
|
||||
const diff = new Diff(); // options may be passed to constructor; see below
|
||||
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
||||
const cleanedDiff = diff.cleanupSemantic(textDiff);
|
||||
const levenshtein = diff.levenshtein(textDiff);
|
||||
|
||||
logger.debug('levenshtein:', levenshtein);
|
||||
|
||||
logger.debug('cleanedDiff:',cleanedDiff );
|
||||
|
||||
if (levenshtein !== 0) {
|
||||
logger.info('Changed...');
|
||||
const timestamp = dateFormat(now, 'yyyymmddHHMM');
|
||||
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
|
||||
|
||||
stats.previousScreenshot = stats.screenshot;
|
||||
stats.previousChange = stats.lastSaved;
|
||||
stats.lastSaved = now;
|
||||
stats.lastChanged = now;
|
||||
@ -106,6 +144,8 @@ class ChangeDetection extends Scraper {
|
||||
|
||||
await this._makeScreenshotV2(this.page, screenshotPath, null);
|
||||
|
||||
stats.diffPNG = await this.generateDiffScreenshot(stats.previousScreenshot.concat('.png'), screenshotPath.concat('.png'));
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
|
||||
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
||||
@ -113,8 +153,10 @@ class ChangeDetection extends Scraper {
|
||||
|
||||
const pugData = {...stats, ...item};
|
||||
|
||||
console.log(pugData);
|
||||
this.pugTest(pugData, './');
|
||||
// console.log(pugData);
|
||||
await this.sendSMTP(pugData, './');
|
||||
} else {
|
||||
logger.debug('No change...');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
8
start.sh
Normal file
8
start.sh
Normal file
@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
eval "$(aws ssm get-parameters-by-path --region $REGION --path "/$SERVICE_NAME/$ENV/" --query 'Parameters[*].{Name:Name,Value:Value}' --output text | sed 's/\/'"$SERVICE_NAME"'\/'"$ENV"'\///g' | awk -F '\t' '{ print "export " $1 "=" "\""$2"\";" }')"
|
||||
|
||||
npm show puppeteer version
|
||||
|
||||
pm2-runtime start ecosystem.config.js --raw --env production
|
Loading…
Reference in New Issue
Block a user