init
This commit is contained in:
commit
9858a90912
55
.eslintrc
Normal file
55
.eslintrc
Normal file
@ -0,0 +1,55 @@
|
||||
{
|
||||
"parserOptions": {
|
||||
"ecmaVersion": 2017,
|
||||
"sourceType": "module",
|
||||
"ecmaFeatures": {
|
||||
"jsx": false
|
||||
}
|
||||
},
|
||||
"env": {
|
||||
"browser": false,
|
||||
"node": true,
|
||||
"es6": true
|
||||
},
|
||||
"rules": {
|
||||
"arrow-spacing": "error",
|
||||
"block-scoped-var": "error",
|
||||
"block-spacing": "error",
|
||||
"brace-style": ["error", "stroustrup", {}],
|
||||
"camelcase": "error",
|
||||
"comma-dangle": ["error", "never"],
|
||||
"comma-spacing": ["error", { "before": false, "after": true }],
|
||||
"comma-style": [1, "last"],
|
||||
"consistent-this": [1, "_this"],
|
||||
"curly": [1, "multi"],
|
||||
"eol-last": 1,
|
||||
"eqeqeq": 1,
|
||||
"func-names": 1,
|
||||
"indent": ["error", 2, { "SwitchCase": 1 }],
|
||||
"lines-around-comment": ["error", { "beforeBlockComment": true, "allowArrayStart": true }],
|
||||
"max-len": [1, 180, 2], // 2 spaces per tab, max 80 chars per line
|
||||
"new-cap": 1,
|
||||
"newline-before-return": "error",
|
||||
"no-array-constructor": 1,
|
||||
"no-inner-declarations": [1, "both"],
|
||||
"no-mixed-spaces-and-tabs": 1,
|
||||
"no-multi-spaces": 2,
|
||||
"no-new-object": 1,
|
||||
"no-shadow-restricted-names": 1,
|
||||
"object-curly-spacing": ["error", "always"],
|
||||
"padded-blocks": ["error", { "blocks": "never", "switches": "always" }],
|
||||
"prefer-const": "error",
|
||||
"prefer-template": "error",
|
||||
"one-var": 0,
|
||||
"quote-props": ["error", "always"],
|
||||
"quotes": [1, "single"],
|
||||
"radix": 1,
|
||||
"semi": [1, "always"],
|
||||
"space-before-blocks": [1, "always"],
|
||||
"space-infix-ops": 1,
|
||||
"vars-on-top": 1,
|
||||
"no-multiple-empty-lines": ["error", { "max": 1, "maxEOF": 1 }],
|
||||
"spaced-comment": ["error", "always", { "markers": ["/"] }]
|
||||
}
|
||||
|
||||
}
|
162
.gitignore
vendored
Normal file
162
.gitignore
vendored
Normal file
@ -0,0 +1,162 @@
|
||||
# Created by .ignore support plugin (hsz.mobi)
|
||||
### Node template
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Typescript v1 declaration files
|
||||
typings/
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
|
||||
### macOS template
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
### JetBrains template
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
.idea/
|
||||
# User-specific stuff:
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/dictionaries
|
||||
|
||||
# Sensitive or high-churn files:
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.xml
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
|
||||
# Gradle:
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# CMake
|
||||
cmake-build-debug/
|
||||
|
||||
# Mongo Explorer plugin:
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
## File-based project format:
|
||||
*.iws
|
||||
|
||||
## Plugin-specific files:
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
|
||||
|
||||
artefacts/screenshots/*.png
|
||||
artefacts/*.txt
|
||||
artefacts/*.json
|
||||
artefacts/*.html
|
||||
artefacts/*
|
||||
|
||||
/tests/*.zip
|
||||
|
||||
/output/
|
||||
/dist/
|
||||
!/tests/data/
|
||||
/tests/sink/
|
||||
/debug/
|
||||
/update.sh
|
||||
/setup/web/
|
||||
/backup/
|
||||
|
||||
/archive.tar.gz
|
||||
/user/
|
||||
/zip
|
34
changedetection.js
Normal file
34
changedetection.js
Normal file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env node
|
||||
const CronJob = require('cron').CronJob;
|
||||
|
||||
// load env variables from file
|
||||
require('dotenv').config();
|
||||
|
||||
const ChangeDetection = require('./scrapers/scraper');
|
||||
|
||||
async function run() {
|
||||
const cdScraper = new ChangeDetection();
|
||||
|
||||
if (typeof(process.env.CD_CRON) === 'string' ) {
|
||||
console.log(`${cdScraper.id} cron set for ${process.env.CD_CRON}`);
|
||||
new CronJob(process.env.CD_CRON, async function() {
|
||||
console.log('go');
|
||||
await cdScraper.run();
|
||||
}, null, true);
|
||||
}
|
||||
|
||||
if (process.env.SCRAPE_START === cdScraper.id)
|
||||
{
|
||||
console.log('go');
|
||||
await cdScraper.run();
|
||||
}
|
||||
|
||||
console.log('Change Detection Launched');
|
||||
}
|
||||
|
||||
process.once('uncaughtException', function caught(err) {
|
||||
console.error('Uncaught', err);
|
||||
});
|
||||
|
||||
run();
|
||||
|
791
lib/scraper.js
Normal file
791
lib/scraper.js
Normal file
@ -0,0 +1,791 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
const url = require('url');
|
||||
const log4js = require('log4js');
|
||||
let logger = log4js.getLogger('Scraper');
|
||||
const EventEmitter = require('events');
|
||||
const dateFormat = require('dateformat');
|
||||
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
||||
|
||||
class Scraper extends EventEmitter {
|
||||
|
||||
constructor() {
|
||||
super(); // must call super for "this" to be defined.
|
||||
|
||||
this.filters = [
|
||||
'livefyre',
|
||||
'moatad',
|
||||
'analytics',
|
||||
'controltag',
|
||||
'chartbeat',
|
||||
'siteimprove',
|
||||
'hotjar',
|
||||
'/plugins/cookie-notice/',
|
||||
'addthis',
|
||||
'facebook.',
|
||||
'linkedin',
|
||||
'googletagmanager',
|
||||
'swiftypecdn.com',
|
||||
'-social-tracking.',
|
||||
'demdex.net',
|
||||
'adobedtm.com'
|
||||
];
|
||||
|
||||
this.perf = {
|
||||
'started': 0,
|
||||
'finished': 0,
|
||||
'time': 0,
|
||||
'scraped': 0
|
||||
};
|
||||
|
||||
this.browserCrashed = false;
|
||||
this.crashLog = new Map([]);
|
||||
|
||||
this.page = null;
|
||||
}
|
||||
|
||||
setID(newID) {
|
||||
logger = log4js.getLogger(`Scraper (${newID})`);
|
||||
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
||||
|
||||
this.id = newID;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param path
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
|
||||
async emptyPath(path) {
|
||||
if (process.env.NODE_ENV === 'production')
|
||||
await del([path]).then(paths => {
|
||||
logger.warn('Deleted files and folders:\n', paths.join('\n'));
|
||||
});
|
||||
}
|
||||
|
||||
async setPath(newPath) {
|
||||
const now = new Date();
|
||||
const timestamp = dateFormat(now, 'yyyymmdd');
|
||||
|
||||
await this.emptyPath(newPath);
|
||||
|
||||
// this.path = `${newPath}/${timestamp}`;
|
||||
this.path = `${newPath}`;
|
||||
this.debugPath = `${__dirname }/../debug/${this.id}`;
|
||||
await this._createDirectory(this.path);
|
||||
await this._createDirectory(this.debugPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* 'Human' like click delay
|
||||
* @returns {number}
|
||||
*/
|
||||
static notARobot() {
|
||||
return 90 + Math.floor(Math.random() * (30 - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
canDetach() {
|
||||
this.detatchable = true;
|
||||
}
|
||||
|
||||
async _killRunningBrowser() {
|
||||
// if (typeof(this.browser) !== 'undefined' && this.browser !== null) {
|
||||
if (this.browser)
|
||||
try{
|
||||
logger.info('Trying to close hanging / running browser');
|
||||
|
||||
await this._forcePageClose();
|
||||
|
||||
await this.browser.removeAllListeners('disconnected');
|
||||
|
||||
await this.browser.close();
|
||||
}
|
||||
catch(err) {
|
||||
logger.error('Closing browser', err);
|
||||
}
|
||||
finally {
|
||||
this.browser = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param headless
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _initBrowser(headless = true) {
|
||||
// Force headless when running in production
|
||||
|
||||
const realHeadless = (process.env.NODE_ENV === 'production') ? true : headless;
|
||||
|
||||
await this._killRunningBrowser();
|
||||
|
||||
this.browserCrashed = false;
|
||||
|
||||
logger.info('Puppeteer.launch', realHeadless);
|
||||
|
||||
logger.debug('Using proxy:', process.env.PROXY_URI);
|
||||
this.browser = await puppeteer.launch({
|
||||
'headless': realHeadless,
|
||||
'args': [
|
||||
// Use proxy so FCA wont block us
|
||||
`--proxy-server=${process.env.PROXY_URI}`,
|
||||
'--disable-dev-shm-usage',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--disable-gpu',
|
||||
'--window-size=1920x1080',
|
||||
'--hide-scrollbars',
|
||||
'--disable-default-apps'
|
||||
]
|
||||
}).catch((err) => {
|
||||
logger.error('Puppeteer failed to launch');
|
||||
logger.error(err);
|
||||
});
|
||||
|
||||
const browserVersion = await this.browser.version();
|
||||
|
||||
logger.info(`Browser version ${browserVersion}`);
|
||||
|
||||
this.browser.on('disconnected', () => {
|
||||
logger.warn('Browser has become detached!');
|
||||
|
||||
if (this.detatchable === false) {
|
||||
this.browserCrashed = true;
|
||||
|
||||
logger.warn('browser.onDisconnected::emit recover');
|
||||
this.emit('recover');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async _forcePageClose() {
|
||||
// if (this.page !== null) {
|
||||
if (this.page)
|
||||
|
||||
try{
|
||||
logger.warn('Browser Page exists: DESTROYING');
|
||||
|
||||
await this.page.removeAllListeners('close');
|
||||
// this.page.on('close', () => {});
|
||||
|
||||
await this.page.close().catch((e) => {
|
||||
logger.debug(e);
|
||||
});
|
||||
}
|
||||
catch( err) {
|
||||
logger.error(err);
|
||||
}
|
||||
finally {
|
||||
this.page = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _createBrowserPage() {
|
||||
this._forcePageClose();
|
||||
|
||||
this.page = await this.browser.newPage();
|
||||
|
||||
try{
|
||||
await this.page.setDefaultNavigationTimeout(90000);
|
||||
|
||||
await this.page.setDefaultTimeout(90000);
|
||||
}
|
||||
catch(err) {
|
||||
logger.debug(err);
|
||||
}
|
||||
|
||||
await this.page.setRequestInterception(true);
|
||||
|
||||
this.page.on('request', (request) => {
|
||||
const url = request.url();
|
||||
logger.trace('request', url);
|
||||
const shouldAbort = this.filters.some((urlPart) => url.includes(urlPart));
|
||||
if (shouldAbort) request.abort();
|
||||
else request.continue();
|
||||
});
|
||||
|
||||
this.page.on('dialog', async dialog => {
|
||||
logger.warn('Dialog Box', dialog.message());
|
||||
await dialog.dismiss();
|
||||
});
|
||||
|
||||
this.page.on('error', async err => {
|
||||
logger.warn('Page crashed', err);
|
||||
if (!this.detatchable) {
|
||||
await this._uploadError();
|
||||
logger.warn('page.onError::emit recover');
|
||||
this.emit('recover');
|
||||
}
|
||||
});
|
||||
|
||||
this.page.on('pageerror', async err => {
|
||||
logger.trace('pageerror', err);
|
||||
});
|
||||
|
||||
this.page.on('requestfailed', async err => {
|
||||
const url = err['_url'];
|
||||
const blocked = this.filters.some((urlPart) => url.includes(urlPart));
|
||||
|
||||
if (blocked)
|
||||
logger.trace('🚫', err['_url']);
|
||||
else
|
||||
logger.warn('requestfailed', err['_url']);
|
||||
});
|
||||
|
||||
this.page.on('close', () => {
|
||||
logger.warn('Browser Page has closed');
|
||||
|
||||
if (this.detatchable === false) {
|
||||
logger.warn('page.onClose::emit recover');
|
||||
this.emit('recover');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _makeResponsive() {
|
||||
const viewPort = {
|
||||
'name': 'Responsive',
|
||||
'userAgent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3494.0 Safari/537.36',
|
||||
'viewport': {
|
||||
'width': 1200,
|
||||
'height': 1200,
|
||||
'deviceScaleFactor': 4.5,
|
||||
'isMobile': true,
|
||||
'hasTouch': true,
|
||||
'isLandscape': true
|
||||
}
|
||||
};
|
||||
|
||||
await this.page.setViewport(viewPort.viewport);
|
||||
|
||||
await this.page.setDefaultNavigationTimeout(90000);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param id
|
||||
* @returns {string}
|
||||
* @private
|
||||
*/
|
||||
_makeFileName(id) {
|
||||
const noWhiteSpace = /\W/g;
|
||||
const maxChars = 175;
|
||||
const entity = removeAccents.remove(id.replace(noWhiteSpace, ' ').trim());
|
||||
|
||||
const _crc = crc.crc32(id).toString(16);
|
||||
|
||||
const output = [this.modePrefix[this.mode], camelCase(entity)].join('');
|
||||
|
||||
return (output.length > maxChars) ? output.substring(0, maxChars).concat('_', _crc) : output;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param id
|
||||
* @returns {Promise<string>}
|
||||
* @private
|
||||
*/
|
||||
async _makeFilePath(id) {
|
||||
return `${this.path}/${this._makeFileName(id)}`.substring(0, 240);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param page
|
||||
* @param destPath
|
||||
* @param waitFor
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _makeScreenshotV2(page, destPath, waitFor = null) {
|
||||
try{
|
||||
if (waitFor)
|
||||
await page.waitFor(waitFor);
|
||||
|
||||
if(!this.page) {
|
||||
logger.warn('_makeScreenshotV2: No Page -- Not taking screenshot');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug('Snapshot', `${destPath}.png`);
|
||||
await page.setViewport({ 'width': 1200, 'height': 800 });
|
||||
await page.screenshot({ 'path': `${destPath}.png`, 'fullPage': true }).catch(err => {
|
||||
logger.error('Screenshot', err);
|
||||
});
|
||||
}
|
||||
catch( err) {
|
||||
logger.error('_makeScreenshotV2', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param page
|
||||
* @param minTime
|
||||
* @param maxTime
|
||||
* @param msg
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _randomWait(page, minTime = 2, maxTime = 10, msg = '') {
|
||||
const insertedMsg = (msg.length > 0) ? `${this.id} ${msg} - ` : `${this.id} `;
|
||||
|
||||
const waitTime = Math.floor(Math.random() * (maxTime - minTime + 1) + minTime);
|
||||
logger.debug(`${insertedMsg}Waiting ${waitTime} seconds...`);
|
||||
await page.waitFor(waitTime * 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param page
|
||||
* @param waitTime
|
||||
* @param msg
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _microWait(page, waitTime, msg = '') {
|
||||
const insertedMsg = (msg.length > 0) ? `${msg} - ` : '';
|
||||
|
||||
if (msg !== '') logger.debug(`${insertedMsg}Waiting ${waitTime * 100} ms...`);
|
||||
await page.waitFor(waitTime * 100);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param page
|
||||
* @param waitTime
|
||||
* @param msg
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _nanoWait(page, waitTime, msg = '') {
|
||||
const insertedMsg = (msg.length > 0) ? `${msg} - ` : '';
|
||||
|
||||
if (msg !== '') logger.debug(`${insertedMsg}Waiting ${waitTime * 10} ms...`);
|
||||
await page.waitFor(waitTime * 10);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @param data
|
||||
* @returns {Promise<*>}
|
||||
* @private
|
||||
*/
|
||||
async _saveToFile(destPath, data) {
|
||||
// use for artefacts saving only
|
||||
return new Promise((resolve, reject) => {
|
||||
const fullPath = `${__dirname}/../artefacts/${destPath}`;
|
||||
fs.writeFile(fullPath, data, function(err) {
|
||||
if(err)
|
||||
reject(err);
|
||||
else
|
||||
resolve(`File saved to '${fullPath}'`);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @param data
|
||||
* @returns {Promise<*>}
|
||||
* @private
|
||||
*/
|
||||
async _dumpFile(destPath, data) {
|
||||
return new Promise((resolve, reject) => {
|
||||
fs.writeFile(destPath, data, function(err) {
|
||||
if(err)
|
||||
reject(err);
|
||||
else
|
||||
resolve(`File saved to '${destPath}'`);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @returns {Promise<string>}
|
||||
* @private
|
||||
*/
|
||||
async _createTimestampDirectory(destPath = null) {
|
||||
const now = new Date();
|
||||
|
||||
const timestamp = dateFormat(now, 'yyyymmddHHMM');
|
||||
const fullPath = `${destPath}/${timestamp}`;
|
||||
|
||||
logger.info('fullPath', fullPath);
|
||||
|
||||
if (!fs.existsSync(fullPath))
|
||||
fs.ensureDirSync(fullPath);
|
||||
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @returns {Promise<*>}
|
||||
* @private
|
||||
*/
|
||||
async _createDirectory(destPath = null) {
|
||||
try{
|
||||
if (!fs.existsSync(destPath))
|
||||
fs.ensureDirSync(destPath);
|
||||
}
|
||||
catch( err) {
|
||||
logger.error('_createDirectory', err);
|
||||
}
|
||||
|
||||
return destPath;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @param filename
|
||||
* @returns {Promise<*>}
|
||||
* @private
|
||||
*/
|
||||
async _createArchive(destPath = null, filename = null, glob = false) {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!destPath || !filename) {
|
||||
const e = new Error('Missing paths');
|
||||
logger.error(e);
|
||||
reject(e);
|
||||
}
|
||||
const archive = archiver(filename, {
|
||||
'zlib': { 'level': 9 } // Sets the compression level.
|
||||
});
|
||||
|
||||
if (glob)
|
||||
archive.glob(`${destPath}`);
|
||||
else
|
||||
archive.directory(`${destPath}/`);
|
||||
|
||||
archive.finalize().then(() => {
|
||||
logger.debug('Archive finished');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param destPath
|
||||
* @param filename
|
||||
* @param glob
|
||||
* @returns {Promise<*>}
|
||||
* @private
|
||||
*/
|
||||
async _createArchiveV2(destPath = null, filename = null, glob = false) {
|
||||
logger.debug('=== _createArchiveV2 :: STREAMING ===');
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!destPath || !filename) {
|
||||
const e = new Error('Missing paths');
|
||||
logger.error(e);
|
||||
reject(e);
|
||||
}
|
||||
|
||||
const output = fs.createWriteStream(filename);
|
||||
|
||||
const archive = archiver('zip', {
|
||||
'TransformOptions': {
|
||||
'objectMode':true
|
||||
},
|
||||
'zlib': { 'level': 6 } // Sets the compression level.
|
||||
});
|
||||
|
||||
archive.pipe(output);
|
||||
|
||||
if (glob)
|
||||
archive.glob(`${destPath}`);
|
||||
else
|
||||
archive.directory(`${destPath}/`);
|
||||
|
||||
archive.finalize().then(() => {
|
||||
logger.debug('Archive finished');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param urlStr
|
||||
* @returns {*}
|
||||
*/
|
||||
explodeURL (urlStr = null) {
|
||||
if (!urlStr || urlStr === '')
|
||||
return (null);
|
||||
|
||||
try {
|
||||
const workURL = url.parse(urlStr);
|
||||
|
||||
return tldExtract.parse_host( workURL.host);
|
||||
}
|
||||
catch(e) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Params from a url string
|
||||
*/
|
||||
_getParamsFromUrl(url) {
|
||||
url = decodeURI(url);
|
||||
if (typeof url === 'string') {
|
||||
const params = url.split('?');
|
||||
|
||||
const obj = {};
|
||||
if (params.length > 1) {
|
||||
const eachParamsArr = params[1].split('&');
|
||||
|
||||
if (eachParamsArr && eachParamsArr.length)
|
||||
eachParamsArr.map(param => {
|
||||
const keyValuePair = param.split('=');
|
||||
const key = keyValuePair[0];
|
||||
const value = keyValuePair[1];
|
||||
obj[key] = value;
|
||||
});
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param text
|
||||
* @returns {string}
|
||||
* @private
|
||||
*/
|
||||
_cleanUp(text) {
|
||||
if (!text) return '';
|
||||
const regexNewLine = /\n/;
|
||||
const regexCollapseWS = /\s+/g;
|
||||
|
||||
return text.replace(regexNewLine, '').replace(regexCollapseWS, ' ').trim();
|
||||
}
|
||||
|
||||
_makeFieldName(text) {
|
||||
const removePunctuation = /([^A-Za-z0-9\s])+/g;
|
||||
|
||||
if (!text) return '';
|
||||
let workString = this._cleanUp(text);
|
||||
workString = removeAccents.remove(workString);
|
||||
workString = workString.replace(removePunctuation, '');
|
||||
|
||||
workString = camelCase(workString);
|
||||
|
||||
return workString;
|
||||
}
|
||||
|
||||
async _renameFile(origFN, newFN) {
|
||||
await checkFileExists(origFN)
|
||||
.then(async exists => {
|
||||
console.log(`file exists: ${exists}`);
|
||||
|
||||
if (exists)
|
||||
await fs.renameSync(origFN, newFN);
|
||||
}).catch((e) => {
|
||||
logger.error(e);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @private
|
||||
*/
|
||||
async _start() {
|
||||
logger.debug(`<=- START ${this.id}-=>`);
|
||||
const now = new Date();
|
||||
this.perf.started = now.getTime();
|
||||
|
||||
this.on('recover', async () => {
|
||||
await this.recover();
|
||||
});
|
||||
|
||||
// await this._createLock();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _done() {
|
||||
logger.info('<=- DONE -=>');
|
||||
|
||||
// OK To close the browser window now
|
||||
this.canDetach();
|
||||
|
||||
await this._forcePageClose();
|
||||
|
||||
await this._killRunningBrowser();
|
||||
|
||||
await this._complete();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _complete() {
|
||||
try {
|
||||
if (global.gc) global.gc();
|
||||
}
|
||||
catch (e) {
|
||||
logger.warn('`node --expose-gc`');
|
||||
}
|
||||
|
||||
logger.info('<=- COMPLETE -=>');
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param url
|
||||
* @param options
|
||||
* @param noRecover
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _goto(url, options = {}, noRecover = false) {
|
||||
this.lastUrl = url;
|
||||
|
||||
const newOptions = Object.assign({ 'timeout':90000, 'waitUntil':'networkidle0' }, options);
|
||||
|
||||
logger.debug(newOptions);
|
||||
|
||||
try {
|
||||
logger.info('Goto:', url);
|
||||
await this.page.goto(url, newOptions).catch((err) => {
|
||||
logger.error('GOTO', err);
|
||||
|
||||
if (err.message.indexOf('net::ERR_FAILED') !== -1)
|
||||
this.browserCrashed = true;
|
||||
|
||||
if (!noRecover)
|
||||
this.emit('recover');
|
||||
});
|
||||
}
|
||||
catch (error) {
|
||||
logger.error(error);
|
||||
logger.error(url, options);
|
||||
// if (error === 'net::ERR_CONNECTION_TIMED_OUT')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fn
|
||||
* @param time
|
||||
* @returns {Function}
|
||||
* @private
|
||||
*/
|
||||
_debounce(fn, time) {
|
||||
let timeout;
|
||||
|
||||
return function (...args) { // <-- not an arrow function
|
||||
const functionCall = () => fn.apply(this, args);
|
||||
|
||||
clearTimeout(timeout);
|
||||
timeout = setTimeout(functionCall, time);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param callback
|
||||
* @param limit
|
||||
* @returns {Function}
|
||||
* @private
|
||||
*/
|
||||
_throttle (callback, limit) {
|
||||
var wait = false;
|
||||
|
||||
return function () {
|
||||
if (!wait) {
|
||||
callback.apply(null, arguments);
|
||||
wait = true;
|
||||
setTimeout(function () {
|
||||
wait = false;
|
||||
}, limit);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param func
|
||||
* @returns {function(): *}
|
||||
* @private
|
||||
*/
|
||||
|
||||
_once(func) {
|
||||
var alreadyCalled = false;
|
||||
var result;
|
||||
|
||||
return function() {
|
||||
if (!alreadyCalled) {
|
||||
result = func.apply(this, arguments);
|
||||
alreadyCalled = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param restartURL
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async restart(restartURL) {
|
||||
const rURL = restartURL || this.lastUrl;
|
||||
logger.info(`Restarting ${this.id} // Going to ${rURL}`);
|
||||
|
||||
await this._goto(rURL);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param filename
|
||||
* @param data
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async saveFile(filename, data) {
|
||||
try{
|
||||
fs.writeFileSync(filename, data);
|
||||
}
|
||||
catch( err) {
|
||||
logger.error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Scraper;
|
33
outlet.js
Normal file
33
outlet.js
Normal file
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env node
|
||||
const CronJob = require('cron').CronJob;
|
||||
|
||||
// load env variables from file
|
||||
require('dotenv').config();
|
||||
|
||||
const OutletScrape = require('./scrapers/outlet');
|
||||
|
||||
async function run() {
|
||||
const outlet = new OutletScrape();
|
||||
|
||||
if (typeof(process.env.outlet) === 'string' ) {
|
||||
console.log(`${outlet.id} cron set for ${process.env.OUTLET_CRON}`);
|
||||
new CronJob(process.env.OUTLET_CRON, async function() {
|
||||
await outlet.run();
|
||||
}, null, true);
|
||||
}
|
||||
|
||||
if (process.env.SCRAPE_START === outlet.id)
|
||||
{
|
||||
console.log('go');
|
||||
await outlet.run();
|
||||
}
|
||||
|
||||
console.log('Outlet Launched');
|
||||
}
|
||||
|
||||
process.once('uncaughtException', function caught(err) {
|
||||
console.error('Uncaught', err);
|
||||
});
|
||||
|
||||
run();
|
||||
|
2250
package-lock.json
generated
Normal file
2250
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
33
package.json
Normal file
33
package.json
Normal file
@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "changedetection",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"fecha": "^3.0.3",
|
||||
"fs-extra": "latest",
|
||||
"html-differ": "^1.4.0",
|
||||
"jsonfile": "^5.0.0",
|
||||
"lodash": "^4.17.15",
|
||||
"log4js": "^5.1.0",
|
||||
"node-localstorage": "^1.3.1",
|
||||
"pug": "^2.0.4",
|
||||
"puppeteer": "^1.19.0",
|
||||
"smtp-email-sender": "^1.0.0",
|
||||
"text-diff": "^1.0.1",
|
||||
"time-since": "^1.0.7",
|
||||
"underscore": "^1.9.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cron": "^1.7.2",
|
||||
"dateformat": "^3.0.3",
|
||||
"dotenv": "^8.2.0",
|
||||
"eslint": "^6.5.1"
|
||||
}
|
||||
}
|
414
pug/App.css
Normal file
414
pug/App.css
Normal file
@ -0,0 +1,414 @@
|
||||
@import url('https://fonts.googleapis.com/css?family=Roboto');
|
||||
|
||||
/* Global Styles */
|
||||
:root {
|
||||
--primary-color: #dc3545;
|
||||
--dark-color: #333333;
|
||||
--light-color: #f4f4f4;
|
||||
--danger-color: #dc3545;
|
||||
--success-color: #28a745;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'Roboto', sans-serif;
|
||||
font-size: 1rem;
|
||||
line-height: 1.6;
|
||||
background-color: #fff;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
a {
|
||||
color: var(--primary-color);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* Utilities */
|
||||
.container {
|
||||
max-width: 1100px;
|
||||
margin: auto;
|
||||
overflow: hidden;
|
||||
padding: 0 2rem;
|
||||
}
|
||||
|
||||
/* Text Styles*/
|
||||
.x-large {
|
||||
font-size: 4rem;
|
||||
line-height: 1.2;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.large {
|
||||
font-size: 3rem;
|
||||
line-height: 1.2;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.lead {
|
||||
font-size: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.text-center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.text-primary {
|
||||
color: var(--primary-color);
|
||||
}
|
||||
|
||||
.text-dark {
|
||||
color: var(--dark-color);
|
||||
}
|
||||
|
||||
.text-success, ins {
|
||||
color: var(--success-color);
|
||||
}
|
||||
|
||||
.text-danger, del {
|
||||
color: var(--danger-color);
|
||||
}
|
||||
|
||||
.text-center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.text-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.text-left {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
/* Center All */
|
||||
.all-center {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
width: 100%;
|
||||
margin: auto;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* Cards */
|
||||
.card {
|
||||
padding: 1rem;
|
||||
border: #ccc 1px dotted;
|
||||
margin: 0.7rem 0;
|
||||
}
|
||||
|
||||
/* List */
|
||||
.list {
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
.list li {
|
||||
padding-bottom: 0.3rem;
|
||||
}
|
||||
|
||||
/* Padding */
|
||||
.p {
|
||||
padding: 0.5rem;
|
||||
}
|
||||
.p-1 {
|
||||
padding: 1rem;
|
||||
}
|
||||
.p-2 {
|
||||
padding: 2rem;
|
||||
}
|
||||
.p-3 {
|
||||
padding: 3rem;
|
||||
}
|
||||
.py {
|
||||
padding: 0.5rem 0;
|
||||
}
|
||||
.py-1 {
|
||||
padding: 1rem 0;
|
||||
}
|
||||
.py-2 {
|
||||
padding: 2rem 0;
|
||||
}
|
||||
.py-3 {
|
||||
padding: 3rem 0;
|
||||
}
|
||||
|
||||
/* Margin */
|
||||
.m {
|
||||
margin: 0.5rem;
|
||||
}
|
||||
.m-1 {
|
||||
margin: 1rem;
|
||||
}
|
||||
.m-2 {
|
||||
margin: 2rem;
|
||||
}
|
||||
.m-3 {
|
||||
margin: 3rem;
|
||||
}
|
||||
.my {
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
.my-1 {
|
||||
margin: 1rem 0;
|
||||
}
|
||||
.my-2 {
|
||||
margin: 2rem 0;
|
||||
}
|
||||
.my-3 {
|
||||
margin: 3rem 0;
|
||||
}
|
||||
|
||||
/* Grid */
|
||||
.grid-2 {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
grid-gap: 1rem;
|
||||
}
|
||||
|
||||
.grid-3 {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
grid-gap: 1rem;
|
||||
}
|
||||
|
||||
.grid-4 {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
grid-gap: 1rem;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
background: var(--light-color);
|
||||
color: #333;
|
||||
padding: 0.4rem 1.3rem;
|
||||
font-size: 1rem;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
margin-right: 0.5rem;
|
||||
transition: opacity 0.2s ease-in;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.btn-link {
|
||||
background: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.btn-block {
|
||||
display: block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.btn-sm {
|
||||
font-size: 0.8rem;
|
||||
padding: 0.3rem 1rem;
|
||||
margin-right: 0.2rem;
|
||||
}
|
||||
|
||||
.badge {
|
||||
display: inline-block;
|
||||
font-size: 0.8rem;
|
||||
padding: 0.2rem 0.7rem;
|
||||
text-align: center;
|
||||
margin: 0.3rem;
|
||||
background: var(--light-color);
|
||||
color: #333;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
.alert {
|
||||
padding: 0.7rem;
|
||||
margin: 1rem 0;
|
||||
opacity: 0.9;
|
||||
background: var(--light-color);
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.btn-primary,
|
||||
.bg-primary,
|
||||
.badge-primary,
|
||||
.alert-primary {
|
||||
background: var(--primary-color);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.btn-light,
|
||||
.bg-light,
|
||||
.badge-light,
|
||||
.alert-light {
|
||||
background: var(--light-color);
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.btn-dark,
|
||||
.bg-dark,
|
||||
.badge-dark,
|
||||
.alert-dark {
|
||||
background: var(--dark-color);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.btn-danger,
|
||||
.bg-danger,
|
||||
.badge-danger,
|
||||
.alert-danger {
|
||||
background: var(--danger-color);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.btn-success,
|
||||
.bg-success,
|
||||
.badge-success,
|
||||
.alert-success {
|
||||
background: var(--success-color);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.btn-white,
|
||||
.bg-white,
|
||||
.badge-white,
|
||||
.alert-white {
|
||||
background: #fff;
|
||||
color: #333;
|
||||
border: #ccc solid 1px;
|
||||
}
|
||||
|
||||
.btn:hover {
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
.bg-light,
|
||||
.badge-light {
|
||||
border: #ccc solid 1px;
|
||||
}
|
||||
|
||||
.round-img {
|
||||
border-radius: 50%;
|
||||
}
|
||||
|
||||
/* Forms */
|
||||
input {
|
||||
margin: 1.2rem 0;
|
||||
}
|
||||
|
||||
.form-text {
|
||||
display: block;
|
||||
margin-top: 0.3rem;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
input[type='text'],
|
||||
input[type='email'],
|
||||
input[type='password'],
|
||||
input[type='date'],
|
||||
select,
|
||||
textarea {
|
||||
display: block;
|
||||
width: 100%;
|
||||
padding: 0.4rem;
|
||||
font-size: 1.2rem;
|
||||
border: 1px solid #ccc;
|
||||
}
|
||||
|
||||
input[type='submit'],
|
||||
button {
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
table th,
|
||||
table td {
|
||||
padding: 1rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
table th {
|
||||
background: var(--light-color);
|
||||
}
|
||||
|
||||
/* Navbar */
|
||||
.navbar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 0.7rem 2rem;
|
||||
z-index: 1;
|
||||
width: 100%;
|
||||
opacity: 0.9;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.navbar ul {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.navbar a {
|
||||
color: #fff;
|
||||
padding: 0.45rem;
|
||||
margin: 0 0.25rem;
|
||||
}
|
||||
|
||||
.navbar a:hover {
|
||||
color: var(--light-color);
|
||||
}
|
||||
|
||||
.navbar .welcome span {
|
||||
margin-right: 0.6rem;
|
||||
}
|
||||
|
||||
/* Mobile Styles */
|
||||
@media (max-width: 700px) {
|
||||
.hide-sm {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.grid-2,
|
||||
.grid-3,
|
||||
.grid-4 {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
/* Text Styles */
|
||||
.x-large {
|
||||
font-size: 3rem;
|
||||
}
|
||||
|
||||
.large {
|
||||
font-size: 2rem;
|
||||
}
|
||||
|
||||
.lead {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* Navbar */
|
||||
.navbar {
|
||||
display: block;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.navbar ul {
|
||||
text-align: center;
|
||||
justify-content: center;
|
||||
}
|
||||
}
|
1
pug/email.css
Normal file
1
pug/email.css
Normal file
@ -0,0 +1 @@
|
||||
@import url('https://fonts.googleapis.com/css?family=Roboto');:root {--primary-color: #dc3545;--dark-color: #333333;--light-color: #f4f4f4;--danger-color: #dc3545;--success-color: #28a745;}* {box-sizing: border-box;margin: 0;padding: 0;}body {font-family: 'Roboto', sans-serif;font-size: 1rem;line-height: 1.6;background-color: #fff;color: #333;}a {color: var(--primary-color);text-decoration: none;}a:hover {color: #666;}ul {list-style: none;}img {width: 100%;}.container {max-width: 1100px;margin: auto;overflow: hidden;padding: 0 2rem;}.x-large {font-size: 4rem;line-height: 1.2;margin-bottom: 1rem;}.large {font-size: 3rem;line-height: 1.2;margin-bottom: 1rem;}.lead {font-size: 1.5rem;margin-bottom: 1rem;}.text-center {text-align: center;}.text-primary {color: var(--primary-color);}.text-dark {color: var(--dark-color);}.text-success, ins {color: var(--success-color);}.text-danger, del {color: var(--danger-color);}.text-center {text-align: center;}.text-right {text-align: right;}.text-left {text-align: left;}.all-center {display: flex;flex-direction: column;width: 100%;margin: auto;justify-content: center;align-items: center;text-align: center;}.card {padding: 1rem;border: #ccc 1px dotted;margin: 0.7rem 0;}.list {margin: 0.5rem 0;}.list li {padding-bottom: 0.3rem;}.p {padding: 0.5rem;}.p-1 {padding: 1rem;}.p-2 {padding: 2rem;}.p-3 {padding: 3rem;}.py {padding: 0.5rem 0;}.py-1 {padding: 1rem 0;}.py-2 {padding: 2rem 0;}.py-3 {padding: 3rem 0;}.m {margin: 0.5rem;}.m-1 {margin: 1rem;}.m-2 {margin: 2rem;}.m-3 {margin: 3rem;}.my {margin: 0.5rem 0;}.my-1 {margin: 1rem 0;}.my-2 {margin: 2rem 0;}.my-3 {margin: 3rem 0;}.grid-2 {display: grid;grid-template-columns: repeat(2, 1fr);grid-gap: 1rem;}.grid-3 {display: grid;grid-template-columns: repeat(3, 1fr);grid-gap: 1rem;}.grid-4 {display: grid;grid-template-columns: repeat(4, 1fr);grid-gap: 1rem;}.btn {display: inline-block;background: var(--light-color);color: #333;padding: 0.4rem 1.3rem;font-size: 1rem;border: none;cursor: pointer;margin-right: 0.5rem;transition: opacity 0.2s ease-in;outline: none;}.btn-link {background: none;padding: 0;margin: 0;}.btn-block {display: block;width: 100%;}.btn-sm {font-size: 0.8rem;padding: 0.3rem 1rem;margin-right: 0.2rem;}.badge {display: inline-block;font-size: 0.8rem;padding: 0.2rem 0.7rem;text-align: center;margin: 0.3rem;background: var(--light-color);color: #333;border-radius: 5px;}.alert {padding: 0.7rem;margin: 1rem 0;opacity: 0.9;background: var(--light-color);color: #333;}.btn-primary, .bg-primary, .badge-primary, .alert-primary {background: var(--primary-color);color: #fff;}.btn-light, .bg-light, .badge-light, .alert-light {background: var(--light-color);color: #333;}.btn-dark, .bg-dark, .badge-dark, .alert-dark {background: var(--dark-color);color: #fff;}.btn-danger, .bg-danger, .badge-danger, .alert-danger {background: var(--danger-color);color: #fff;}.btn-success, .bg-success, .badge-success, .alert-success {background: var(--success-color);color: #fff;}.btn-white, .bg-white, .badge-white, .alert-white {background: #fff;color: #333;border: #ccc solid 1px;}.btn:hover {opacity: 0.8;}.bg-light, .badge-light {border: #ccc solid 1px;}.round-img {border-radius: 50%;}input {margin: 1.2rem 0;}.form-text {display: block;margin-top: 0.3rem;color: #888;}input[type='text'], input[type='email'], input[type='password'], input[type='date'], select, textarea {display: block;width: 100%;padding: 0.4rem;font-size: 1.2rem;border: 1px solid #ccc;}input[type='submit'], button {font: inherit;}table th, table td {padding: 1rem;text-align: left;}table th {background: var(--light-color);}.navbar {display: flex;justify-content: space-between;align-items: center;padding: 0.7rem 2rem;z-index: 1;width: 100%;opacity: 0.9;margin-bottom: 1rem;}.navbar ul {display: flex;}.navbar a {color: #fff;padding: 0.45rem;margin: 0 0.25rem;}.navbar a:hover {color: var(--light-color);}.navbar .welcome span {margin-right: 0.6rem;}@media (max-width: 700px) {.hide-sm {display: none;}.grid-2, .grid-3, .grid-4 {grid-template-columns: 1fr;}.x-large {font-size: 3rem;}.large {font-size: 2rem;}.lead {font-size: 1rem;}.navbar {display: block;text-align: center;}.navbar ul {text-align: center;justify-content: center;}}
|
24
pug/email.pug
Normal file
24
pug/email.pug
Normal file
@ -0,0 +1,24 @@
|
||||
html(lang="en")
|
||||
head
|
||||
style
|
||||
include email.css
|
||||
meta(charset='utf-8')
|
||||
title
|
||||
ChangeDetection !{name}
|
||||
body.container
|
||||
h1 ChangeDetection: !{name}
|
||||
|
||||
p.m At your request we are sending you this alert to let you know that a page you have been monitoring has changed.
|
||||
|
||||
p.m The following monitored page has changed:
|
||||
|
||||
a.m(href=url)= url
|
||||
|
||||
case since
|
||||
when 1: p.m It has been 1 day since the page last changed.
|
||||
default: p.m It has been !{since} days since the page last changed.
|
||||
|
||||
p.m See below for the details of the change
|
||||
|
||||
blockquote.card !{changed}
|
||||
|
18
pug/test.pug
Normal file
18
pug/test.pug
Normal file
@ -0,0 +1,18 @@
|
||||
html(lang="en")
|
||||
head
|
||||
link(rel="stylesheet", href="https://www.gitcdn.xyz/repo/bradtraversy/github-finder/master/src/App.css")
|
||||
meta(charset='utf-8')
|
||||
title
|
||||
ChangeDetection !{name}
|
||||
body.container
|
||||
h1 ChangeDetection: !{name}
|
||||
|
||||
p.m At your request we are sending you this alert to let you know that a page you have been monitoring has changed.
|
||||
|
||||
p.m The following monitored page has changed:
|
||||
|
||||
a.m(href=url)= url
|
||||
|
||||
p.m See below for the details of the change
|
||||
|
||||
blockquote.card !{changed}
|
33
rc.js
Normal file
33
rc.js
Normal file
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env node
|
||||
const CronJob = require('cron').CronJob;
|
||||
|
||||
// load env variables from file
|
||||
require('dotenv').config();
|
||||
|
||||
const Ireland = require('./scrapers/rc');
|
||||
|
||||
async function run() {
|
||||
const ieScraper = new Ireland();
|
||||
|
||||
if (typeof(process.env.IE_CRON) === 'string' ) {
|
||||
console.log(`${ieScraper.id} cron set for ${process.env.IE_CRON}`);
|
||||
new CronJob(process.env.IE_CRON, async function() {
|
||||
await ieScraper.run();
|
||||
}, null, true);
|
||||
}
|
||||
|
||||
if (process.env.SCRAPE_START === ieScraper.id)
|
||||
{
|
||||
console.log('go');
|
||||
await ieScraper.run();
|
||||
}
|
||||
|
||||
console.log('RC Launched');
|
||||
}
|
||||
|
||||
process.once('uncaughtException', function caught(err) {
|
||||
console.error('Uncaught', err);
|
||||
});
|
||||
|
||||
run();
|
||||
|
26
realsettings.json
Normal file
26
realsettings.json
Normal file
@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.harmankardon.co.uk/outlet/",
|
||||
"name": "Outlet"
|
||||
},
|
||||
{
|
||||
"url": "http://www.harmankardon.co.uk/sale-uk/",
|
||||
"name": "harmankardon Sale UK"
|
||||
},
|
||||
{
|
||||
"url": "http://www.seanharry.com/events/ultimates/",
|
||||
"name": "Starfury Ultimates"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "http://www.seanharry.com/vampire/index.html",
|
||||
"name": "Starfury Vampire"
|
||||
},
|
||||
{
|
||||
"url": "https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView",
|
||||
"name": "Cruise"
|
||||
},{
|
||||
"url": "https://www.silvrtree.co.uk/cinema/1",
|
||||
"name": "Imax Glasgow"
|
||||
}
|
||||
]
|
154
scrapers/outlet.js
Normal file
154
scrapers/outlet.js
Normal file
@ -0,0 +1,154 @@
|
||||
const Scraper = require('../lib/scraper');
|
||||
const cheerio = require('cheerio');
|
||||
const path = require('path');
|
||||
const logger = require('log4js').getLogger('RC');
|
||||
const LocalStorage = require('node-localstorage').LocalStorage;
|
||||
const fs = require('fs');
|
||||
|
||||
const HtmlDiffer = require('html-differ').HtmlDiffer;
|
||||
|
||||
const diffLogger = require('html-differ/lib/logger');
|
||||
|
||||
|
||||
const Diff = require('text-diff');
|
||||
|
||||
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
||||
|
||||
class OutletScrape extends Scraper {
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.setID('OUTLET');
|
||||
|
||||
this.run = this._debounce(async () => {
|
||||
await this.__run();
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
|
||||
async process() {
|
||||
|
||||
const options = {
|
||||
ignoreAttributes: ['value', 'id', 'd'],
|
||||
compareAttributesAsJSON: [],
|
||||
ignoreWhitespaces: true,
|
||||
ignoreComments: true,
|
||||
ignoreEndTags: false,
|
||||
ignoreDuplicateAttributes: false
|
||||
};
|
||||
|
||||
const oldFile = `${this.path}/previous.html`;
|
||||
// var basefile = fs.readFileSync('1.html', 'utf-8')
|
||||
|
||||
// const body = await this.page.content();
|
||||
|
||||
const innerText = await this.page.evaluate(() => {
|
||||
return {
|
||||
'body': document.body.innerText
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
// logger.debug(innerText.body);
|
||||
|
||||
if (!fs.existsSync(oldFile)) {
|
||||
fs.writeFileSync(oldFile, body.body, 'utf-8');
|
||||
} else
|
||||
{
|
||||
|
||||
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
||||
|
||||
var diff = new Diff(); // options may be passed to constructor; see below
|
||||
var textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
||||
const levenshtein = diff.levenshtein(textDiff);
|
||||
|
||||
|
||||
logger.debug('levenshtein:', levenshtein);
|
||||
|
||||
if (levenshtein !== 0) {
|
||||
logger.debug(diff.prettyHtml(textDiff));
|
||||
|
||||
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super._start();
|
||||
try{
|
||||
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
|
||||
|
||||
// this.startPage = 'https://silvrtree.co.uk/slack';
|
||||
const mouseDownDuration = OutletScrape.notARobot();
|
||||
|
||||
|
||||
|
||||
this.setPath(path.resolve(`${__dirname }/../artefacts/outlet`));
|
||||
|
||||
|
||||
|
||||
await this._initBrowser(true);
|
||||
await this._createBrowserPage();
|
||||
|
||||
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
||||
|
||||
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
||||
await this._goto(this.startPage);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
|
||||
logger.debug('loaded..');
|
||||
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Grab the Pdf's and screenshots
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async __run() {
|
||||
try {
|
||||
|
||||
logger.debug('run');
|
||||
await this.start();
|
||||
|
||||
await this.process();
|
||||
|
||||
|
||||
|
||||
logger.debug('Done...');
|
||||
|
||||
// await this._randomWait(this.page, 5, 10);
|
||||
// await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null);
|
||||
|
||||
// const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions'];
|
||||
|
||||
/*for (const section of sections)
|
||||
await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section);
|
||||
|
||||
this.emit('done');*/
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = OutletScrape;
|
129
scrapers/rc.js
Normal file
129
scrapers/rc.js
Normal file
@ -0,0 +1,129 @@
|
||||
const Scraper = require('../lib/scraper');
|
||||
const cheerio = require('cheerio');
|
||||
const path = require('path');
|
||||
const logger = require('log4js').getLogger('RC');
|
||||
const LocalStorage = require('node-localstorage').LocalStorage;
|
||||
const fs = require('fs');
|
||||
|
||||
const Diff = require('text-diff');
|
||||
|
||||
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
||||
|
||||
class RCScrape extends Scraper {
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.setID('RC');
|
||||
|
||||
this.run = this._debounce(async () => {
|
||||
await this.__run();
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
|
||||
async process() {
|
||||
|
||||
const options = {
|
||||
ignoreAttributes: ['value', 'id', 'd'],
|
||||
compareAttributesAsJSON: [],
|
||||
ignoreWhitespaces: true,
|
||||
ignoreComments: true,
|
||||
ignoreEndTags: false,
|
||||
ignoreDuplicateAttributes: false
|
||||
};
|
||||
|
||||
const oldFile = `${this.path}/previous.html`;
|
||||
// var basefile = fs.readFileSync('1.html', 'utf-8')
|
||||
|
||||
// const body = await this.page.content();
|
||||
|
||||
const innerText = await this.page.evaluate(() => {
|
||||
return {
|
||||
'body': document.body.innerText
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
// logger.debug(innerText.body);
|
||||
|
||||
if (!fs.existsSync(oldFile)) {
|
||||
fs.writeFileSync(oldFile, body.body, 'utf-8');
|
||||
} else
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super._start();
|
||||
try{
|
||||
this.startPage = 'https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView';
|
||||
|
||||
// this.startPage = 'https://silvrtree.co.uk/slack';
|
||||
const mouseDownDuration = RCScrape.notARobot();
|
||||
|
||||
|
||||
|
||||
this.setPath(path.resolve(`${__dirname }/../artefacts/rc`));
|
||||
|
||||
|
||||
|
||||
await this._initBrowser(true);
|
||||
await this._createBrowserPage();
|
||||
|
||||
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
||||
|
||||
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
||||
await this._goto(this.startPage);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
|
||||
logger.debug('loaded..');
|
||||
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Grab the Pdf's and screenshots
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async __run() {
|
||||
try {
|
||||
|
||||
logger.debug('run');
|
||||
await this.start();
|
||||
|
||||
await this.process();
|
||||
|
||||
// await this._randomWait(this.page, 5, 10);
|
||||
// await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null);
|
||||
|
||||
// const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions'];
|
||||
|
||||
/*for (const section of sections)
|
||||
await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section);
|
||||
|
||||
this.emit('done');*/
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = RCScrape;
|
247
scrapers/scraper.js
Normal file
247
scrapers/scraper.js
Normal file
@ -0,0 +1,247 @@
|
||||
const Scraper = require('../lib/scraper');
|
||||
const path = require('path');
|
||||
const logger = require('log4js').getLogger('RC');
|
||||
|
||||
const fs = require('fs');
|
||||
const dateFormat = require('dateformat');
|
||||
|
||||
const _ = require('lodash');
|
||||
const jsonfile = require('jsonfile');
|
||||
|
||||
const Diff = require('text-diff');
|
||||
|
||||
const time = require("time-since");
|
||||
const pug = require('pug');
|
||||
const email = require('smtp-email-sender')({
|
||||
'host': 'mail.caliban.io',
|
||||
'port': '465',
|
||||
'auth': {
|
||||
'user': 'aida@caliban.io',
|
||||
'pass': 'WaF#E+5am7.)\\csD',
|
||||
'type': 'LOGIN' // PLAIN, LOGIN, MD5 etc...
|
||||
},
|
||||
'secure': 'secure'
|
||||
});
|
||||
|
||||
logger.level = process.env.LOGGER_LEVEL || 'debug';
|
||||
|
||||
class ChangeDetection extends Scraper {
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.setID('CD');
|
||||
|
||||
this.run = this._debounce(async () => {
|
||||
await this.__run();
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
pugTest(data, newpath) {
|
||||
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
|
||||
}
|
||||
|
||||
sendSMTP(data, newPath) {
|
||||
const now = new Date();
|
||||
|
||||
const attachments = [
|
||||
{
|
||||
path:`${data.screenshot}.png`
|
||||
}
|
||||
];
|
||||
|
||||
const html = pug.renderFile(`${newPath}/` + 'pug/email.pug', data);
|
||||
email({
|
||||
'from': 'Aida <aida@caliban.io>',
|
||||
'to': 'Martin <martind2000@gmail.com>',
|
||||
'subject': `ChangeDetection: ${data.name}`,
|
||||
'html': html,
|
||||
attachments: attachments
|
||||
});
|
||||
}
|
||||
|
||||
async processItem(item) {
|
||||
logger.debug(`Processing ${item.name}...`);
|
||||
|
||||
const now = new Date();
|
||||
const filename = _.kebabCase(item.name);
|
||||
const oldFile = `${this.path}/${filename}.html`;
|
||||
const stats = this.stats.get(filename) || { 'lastSaved': now, 'lastChanged':null };
|
||||
|
||||
await this._goto(item.url);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
|
||||
const innerText = await this.page.evaluate(() => {
|
||||
return {
|
||||
'body': document.body.innerText
|
||||
};
|
||||
});
|
||||
|
||||
if (!fs.existsSync(oldFile)) {
|
||||
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
||||
this.stats.set(filename, stats);
|
||||
}
|
||||
else {
|
||||
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
||||
|
||||
const diff = new Diff(); // options may be passed to constructor; see below
|
||||
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
||||
const levenshtein = diff.levenshtein(textDiff);
|
||||
|
||||
logger.debug('levenshtein:', levenshtein);
|
||||
|
||||
if (levenshtein !== 0) {
|
||||
logger.info('Changed...');
|
||||
const timestamp = dateFormat(now, 'yyyymmddHHMM');
|
||||
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
|
||||
|
||||
stats.previousChange = stats.lastSaved;
|
||||
stats.lastSaved = now;
|
||||
stats.lastChanged = now;
|
||||
stats.screenshot = screenshotPath;
|
||||
stats.changed = diff.prettyHtml(textDiff);
|
||||
stats.levenshtein = levenshtein;
|
||||
stats.since = time.since(new Date(stats.previousChange)).days();
|
||||
|
||||
await this._makeScreenshotV2(this.page, screenshotPath, null);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
|
||||
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
||||
this.stats.set(filename, stats);
|
||||
|
||||
const pugData = {...stats, ...item};
|
||||
|
||||
console.log(pugData);
|
||||
this.pugTest(pugData, './');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async processItems() {
|
||||
for (const item of this.settings)
|
||||
await this.processItem(item);
|
||||
}
|
||||
|
||||
async processOld() {
|
||||
const options = {
|
||||
'ignoreAttributes': ['value', 'id', 'd'],
|
||||
'compareAttributesAsJSON': [],
|
||||
'ignoreWhitespaces': true,
|
||||
'ignoreComments': true,
|
||||
'ignoreEndTags': false,
|
||||
'ignoreDuplicateAttributes': false
|
||||
};
|
||||
|
||||
const oldFile = `${this.path}/previous.html`;
|
||||
|
||||
|
||||
const innerText = await this.page.evaluate(() => {
|
||||
return {
|
||||
'body': document.body.innerText
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
|
||||
if (!fs.existsSync(oldFile))
|
||||
fs.writeFileSync(oldFile, body.body, 'utf-8');
|
||||
else {
|
||||
const previousFile = fs.readFileSync(oldFile, 'utf-8');
|
||||
|
||||
const diff = new Diff(); // options may be passed to constructor; see below
|
||||
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
|
||||
const levenshtein = diff.levenshtein(textDiff);
|
||||
|
||||
logger.debug('levenshtein:', levenshtein);
|
||||
|
||||
if (levenshtein !== 0) {
|
||||
logger.debug(diff.prettyHtml(textDiff));
|
||||
|
||||
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super._start();
|
||||
try{
|
||||
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
|
||||
|
||||
// this.startPage = 'https://silvrtree.co.uk/slack';
|
||||
const mouseDownDuration = ChangeDetection.notARobot();
|
||||
|
||||
await this.setPath(path.resolve(`${__dirname }/../artefacts`));
|
||||
|
||||
await this._createDirectory(`${this.path}/screenshots`);
|
||||
|
||||
await this._initBrowser(true);
|
||||
await this._createBrowserPage();
|
||||
|
||||
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
||||
|
||||
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
||||
// await this._goto(this.startPage);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
|
||||
logger.debug('Started..');
|
||||
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
async loadSettings() {
|
||||
logger.debug('Load settings...');
|
||||
const statsFile = `${this.path}/stats.json`;
|
||||
|
||||
this.settings = jsonfile.readFileSync('settings.json');
|
||||
|
||||
let stats = [];
|
||||
|
||||
if (fs.existsSync(statsFile))
|
||||
stats = jsonfile.readFileSync(statsFile) || [];
|
||||
|
||||
this.stats = new Map(stats);
|
||||
}
|
||||
|
||||
async saveSettings() {
|
||||
logger.debug('Save settings...');
|
||||
const statsFile = `${this.path}/stats.json`;
|
||||
|
||||
const stats = [...this.stats];
|
||||
// logger.debug(stats);
|
||||
jsonfile.writeFileSync(statsFile, stats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab the Pdf's and screenshots
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async __run() {
|
||||
try {
|
||||
logger.debug('run');
|
||||
await this.start();
|
||||
|
||||
// await this.process();
|
||||
|
||||
await this.loadSettings();
|
||||
|
||||
logger.debug('Running...');
|
||||
|
||||
await this.processItems();
|
||||
|
||||
await this.saveSettings();
|
||||
|
||||
await this._done();
|
||||
}
|
||||
catch(e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ChangeDetection;
|
26
settings.json
Normal file
26
settings.json
Normal file
@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.harmankardon.co.uk/outlet/",
|
||||
"name": "Outlet"
|
||||
},
|
||||
{
|
||||
"url": "http://www.harmankardon.co.uk/sale-uk/",
|
||||
"name": "harmankardon Sale UK"
|
||||
},
|
||||
{
|
||||
"url": "http://www.seanharry.com/events/ultimates/",
|
||||
"name": "Starfury Ultimates"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "http://www.seanharry.com/vampire/index.html",
|
||||
"name": "Starfury Vampire"
|
||||
},
|
||||
{
|
||||
"url": "https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView",
|
||||
"name": "Cruise"
|
||||
},{
|
||||
"url": "https://www.silvrtree.co.uk/cinema/1",
|
||||
"name": "Imax Glasgow"
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue
Block a user