This commit is contained in:
Martin Donnelly 2019-10-21 23:38:27 +01:00
commit 9858a90912
18 changed files with 4430 additions and 0 deletions

55
.eslintrc Normal file
View File

@ -0,0 +1,55 @@
{
"parserOptions": {
"ecmaVersion": 2017,
"sourceType": "module",
"ecmaFeatures": {
"jsx": false
}
},
"env": {
"browser": false,
"node": true,
"es6": true
},
"rules": {
"arrow-spacing": "error",
"block-scoped-var": "error",
"block-spacing": "error",
"brace-style": ["error", "stroustrup", {}],
"camelcase": "error",
"comma-dangle": ["error", "never"],
"comma-spacing": ["error", { "before": false, "after": true }],
"comma-style": [1, "last"],
"consistent-this": [1, "_this"],
"curly": [1, "multi"],
"eol-last": 1,
"eqeqeq": 1,
"func-names": 1,
"indent": ["error", 2, { "SwitchCase": 1 }],
"lines-around-comment": ["error", { "beforeBlockComment": true, "allowArrayStart": true }],
"max-len": [1, 180, 2], // 2 spaces per tab, max 80 chars per line
"new-cap": 1,
"newline-before-return": "error",
"no-array-constructor": 1,
"no-inner-declarations": [1, "both"],
"no-mixed-spaces-and-tabs": 1,
"no-multi-spaces": 2,
"no-new-object": 1,
"no-shadow-restricted-names": 1,
"object-curly-spacing": ["error", "always"],
"padded-blocks": ["error", { "blocks": "never", "switches": "always" }],
"prefer-const": "error",
"prefer-template": "error",
"one-var": 0,
"quote-props": ["error", "always"],
"quotes": [1, "single"],
"radix": 1,
"semi": [1, "always"],
"space-before-blocks": [1, "always"],
"space-infix-ops": 1,
"vars-on-top": 1,
"no-multiple-empty-lines": ["error", { "max": 1, "maxEOF": 1 }],
"spaced-comment": ["error", "always", { "markers": ["/"] }]
}
}

162
.gitignore vendored Normal file
View File

@ -0,0 +1,162 @@
# Created by .ignore support plugin (hsz.mobi)
### Node template
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Typescript v1 declaration files
typings/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
### macOS template
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
.idea/
# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
artefacts/screenshots/*.png
artefacts/*.txt
artefacts/*.json
artefacts/*.html
artefacts/*
/tests/*.zip
/output/
/dist/
!/tests/data/
/tests/sink/
/debug/
/update.sh
/setup/web/
/backup/
/archive.tar.gz
/user/
/zip

34
changedetection.js Normal file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env node
const CronJob = require('cron').CronJob;
// load env variables from file
require('dotenv').config();
const ChangeDetection = require('./scrapers/scraper');
async function run() {
const cdScraper = new ChangeDetection();
if (typeof(process.env.CD_CRON) === 'string' ) {
console.log(`${cdScraper.id} cron set for ${process.env.CD_CRON}`);
new CronJob(process.env.CD_CRON, async function() {
console.log('go');
await cdScraper.run();
}, null, true);
}
if (process.env.SCRAPE_START === cdScraper.id)
{
console.log('go');
await cdScraper.run();
}
console.log('Change Detection Launched');
}
process.once('uncaughtException', function caught(err) {
console.error('Uncaught', err);
});
run();

791
lib/scraper.js Normal file
View File

@ -0,0 +1,791 @@
const fs = require('fs-extra');
const path = require('path');
const url = require('url');
const log4js = require('log4js');
let logger = log4js.getLogger('Scraper');
const EventEmitter = require('events');
const dateFormat = require('dateformat');
const puppeteer = require('puppeteer');
logger.level = process.env.LOGGER_LEVEL || 'debug';
class Scraper extends EventEmitter {
constructor() {
super(); // must call super for "this" to be defined.
this.filters = [
'livefyre',
'moatad',
'analytics',
'controltag',
'chartbeat',
'siteimprove',
'hotjar',
'/plugins/cookie-notice/',
'addthis',
'facebook.',
'linkedin',
'googletagmanager',
'swiftypecdn.com',
'-social-tracking.',
'demdex.net',
'adobedtm.com'
];
this.perf = {
'started': 0,
'finished': 0,
'time': 0,
'scraped': 0
};
this.browserCrashed = false;
this.crashLog = new Map([]);
this.page = null;
}
setID(newID) {
logger = log4js.getLogger(`Scraper (${newID})`);
logger.level = process.env.LOGGER_LEVEL || 'warn';
this.id = newID;
}
/**
*
* @param path
* @returns {Promise<void>}
*/
async emptyPath(path) {
if (process.env.NODE_ENV === 'production')
await del([path]).then(paths => {
logger.warn('Deleted files and folders:\n', paths.join('\n'));
});
}
async setPath(newPath) {
const now = new Date();
const timestamp = dateFormat(now, 'yyyymmdd');
await this.emptyPath(newPath);
// this.path = `${newPath}/${timestamp}`;
this.path = `${newPath}`;
this.debugPath = `${__dirname }/../debug/${this.id}`;
await this._createDirectory(this.path);
await this._createDirectory(this.debugPath);
}
/**
* 'Human' like click delay
* @returns {number}
*/
static notARobot() {
return 90 + Math.floor(Math.random() * (30 - 1));
}
/**
*
*/
canDetach() {
this.detatchable = true;
}
async _killRunningBrowser() {
// if (typeof(this.browser) !== 'undefined' && this.browser !== null) {
if (this.browser)
try{
logger.info('Trying to close hanging / running browser');
await this._forcePageClose();
await this.browser.removeAllListeners('disconnected');
await this.browser.close();
}
catch(err) {
logger.error('Closing browser', err);
}
finally {
this.browser = null;
}
}
/**
*
* @param headless
* @returns {Promise<void>}
* @private
*/
async _initBrowser(headless = true) {
// Force headless when running in production
const realHeadless = (process.env.NODE_ENV === 'production') ? true : headless;
await this._killRunningBrowser();
this.browserCrashed = false;
logger.info('Puppeteer.launch', realHeadless);
logger.debug('Using proxy:', process.env.PROXY_URI);
this.browser = await puppeteer.launch({
'headless': realHeadless,
'args': [
// Use proxy so FCA wont block us
`--proxy-server=${process.env.PROXY_URI}`,
'--disable-dev-shm-usage',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--disable-default-apps'
]
}).catch((err) => {
logger.error('Puppeteer failed to launch');
logger.error(err);
});
const browserVersion = await this.browser.version();
logger.info(`Browser version ${browserVersion}`);
this.browser.on('disconnected', () => {
logger.warn('Browser has become detached!');
if (this.detatchable === false) {
this.browserCrashed = true;
logger.warn('browser.onDisconnected::emit recover');
this.emit('recover');
}
});
}
async _forcePageClose() {
// if (this.page !== null) {
if (this.page)
try{
logger.warn('Browser Page exists: DESTROYING');
await this.page.removeAllListeners('close');
// this.page.on('close', () => {});
await this.page.close().catch((e) => {
logger.debug(e);
});
}
catch( err) {
logger.error(err);
}
finally {
this.page = null;
}
}
/**
*
* @returns {Promise<void>}
* @private
*/
async _createBrowserPage() {
this._forcePageClose();
this.page = await this.browser.newPage();
try{
await this.page.setDefaultNavigationTimeout(90000);
await this.page.setDefaultTimeout(90000);
}
catch(err) {
logger.debug(err);
}
await this.page.setRequestInterception(true);
this.page.on('request', (request) => {
const url = request.url();
logger.trace('request', url);
const shouldAbort = this.filters.some((urlPart) => url.includes(urlPart));
if (shouldAbort) request.abort();
else request.continue();
});
this.page.on('dialog', async dialog => {
logger.warn('Dialog Box', dialog.message());
await dialog.dismiss();
});
this.page.on('error', async err => {
logger.warn('Page crashed', err);
if (!this.detatchable) {
await this._uploadError();
logger.warn('page.onError::emit recover');
this.emit('recover');
}
});
this.page.on('pageerror', async err => {
logger.trace('pageerror', err);
});
this.page.on('requestfailed', async err => {
const url = err['_url'];
const blocked = this.filters.some((urlPart) => url.includes(urlPart));
if (blocked)
logger.trace('🚫', err['_url']);
else
logger.warn('requestfailed', err['_url']);
});
this.page.on('close', () => {
logger.warn('Browser Page has closed');
if (this.detatchable === false) {
logger.warn('page.onClose::emit recover');
this.emit('recover');
}
});
}
/**
*
* @returns {Promise<void>}
* @private
*/
async _makeResponsive() {
const viewPort = {
'name': 'Responsive',
'userAgent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3494.0 Safari/537.36',
'viewport': {
'width': 1200,
'height': 1200,
'deviceScaleFactor': 4.5,
'isMobile': true,
'hasTouch': true,
'isLandscape': true
}
};
await this.page.setViewport(viewPort.viewport);
await this.page.setDefaultNavigationTimeout(90000);
}
/**
*
* @param id
* @returns {string}
* @private
*/
_makeFileName(id) {
const noWhiteSpace = /\W/g;
const maxChars = 175;
const entity = removeAccents.remove(id.replace(noWhiteSpace, ' ').trim());
const _crc = crc.crc32(id).toString(16);
const output = [this.modePrefix[this.mode], camelCase(entity)].join('');
return (output.length > maxChars) ? output.substring(0, maxChars).concat('_', _crc) : output;
}
/**
*
* @param id
* @returns {Promise<string>}
* @private
*/
async _makeFilePath(id) {
return `${this.path}/${this._makeFileName(id)}`.substring(0, 240);
}
/**
*
* @param page
* @param destPath
* @param waitFor
* @returns {Promise<void>}
* @private
*/
async _makeScreenshotV2(page, destPath, waitFor = null) {
try{
if (waitFor)
await page.waitFor(waitFor);
if(!this.page) {
logger.warn('_makeScreenshotV2: No Page -- Not taking screenshot');
return;
}
logger.debug('Snapshot', `${destPath}.png`);
await page.setViewport({ 'width': 1200, 'height': 800 });
await page.screenshot({ 'path': `${destPath}.png`, 'fullPage': true }).catch(err => {
logger.error('Screenshot', err);
});
}
catch( err) {
logger.error('_makeScreenshotV2', err);
}
}
/**
*
* @param page
* @param minTime
* @param maxTime
* @param msg
* @returns {Promise<void>}
* @private
*/
async _randomWait(page, minTime = 2, maxTime = 10, msg = '') {
const insertedMsg = (msg.length > 0) ? `${this.id} ${msg} - ` : `${this.id} `;
const waitTime = Math.floor(Math.random() * (maxTime - minTime + 1) + minTime);
logger.debug(`${insertedMsg}Waiting ${waitTime} seconds...`);
await page.waitFor(waitTime * 1000);
}
/**
*
* @param page
* @param waitTime
* @param msg
* @returns {Promise<void>}
* @private
*/
async _microWait(page, waitTime, msg = '') {
const insertedMsg = (msg.length > 0) ? `${msg} - ` : '';
if (msg !== '') logger.debug(`${insertedMsg}Waiting ${waitTime * 100} ms...`);
await page.waitFor(waitTime * 100);
}
/**
*
* @param page
* @param waitTime
* @param msg
* @returns {Promise<void>}
* @private
*/
async _nanoWait(page, waitTime, msg = '') {
const insertedMsg = (msg.length > 0) ? `${msg} - ` : '';
if (msg !== '') logger.debug(`${insertedMsg}Waiting ${waitTime * 10} ms...`);
await page.waitFor(waitTime * 10);
}
/**
*
* @param destPath
* @param data
* @returns {Promise<*>}
* @private
*/
async _saveToFile(destPath, data) {
// use for artefacts saving only
return new Promise((resolve, reject) => {
const fullPath = `${__dirname}/../artefacts/${destPath}`;
fs.writeFile(fullPath, data, function(err) {
if(err)
reject(err);
else
resolve(`File saved to '${fullPath}'`);
});
});
}
/**
*
* @param destPath
* @param data
* @returns {Promise<*>}
* @private
*/
async _dumpFile(destPath, data) {
return new Promise((resolve, reject) => {
fs.writeFile(destPath, data, function(err) {
if(err)
reject(err);
else
resolve(`File saved to '${destPath}'`);
});
});
}
/**
*
* @param destPath
* @returns {Promise<string>}
* @private
*/
async _createTimestampDirectory(destPath = null) {
const now = new Date();
const timestamp = dateFormat(now, 'yyyymmddHHMM');
const fullPath = `${destPath}/${timestamp}`;
logger.info('fullPath', fullPath);
if (!fs.existsSync(fullPath))
fs.ensureDirSync(fullPath);
return fullPath;
}
/**
*
* @param destPath
* @returns {Promise<*>}
* @private
*/
async _createDirectory(destPath = null) {
try{
if (!fs.existsSync(destPath))
fs.ensureDirSync(destPath);
}
catch( err) {
logger.error('_createDirectory', err);
}
return destPath;
}
/**
*
* @param destPath
* @param filename
* @returns {Promise<*>}
* @private
*/
async _createArchive(destPath = null, filename = null, glob = false) {
return new Promise((resolve, reject) => {
if (!destPath || !filename) {
const e = new Error('Missing paths');
logger.error(e);
reject(e);
}
const archive = archiver(filename, {
'zlib': { 'level': 9 } // Sets the compression level.
});
if (glob)
archive.glob(`${destPath}`);
else
archive.directory(`${destPath}/`);
archive.finalize().then(() => {
logger.debug('Archive finished');
resolve();
});
});
}
/**
*
* @param destPath
* @param filename
* @param glob
* @returns {Promise<*>}
* @private
*/
async _createArchiveV2(destPath = null, filename = null, glob = false) {
logger.debug('=== _createArchiveV2 :: STREAMING ===');
return new Promise((resolve, reject) => {
if (!destPath || !filename) {
const e = new Error('Missing paths');
logger.error(e);
reject(e);
}
const output = fs.createWriteStream(filename);
const archive = archiver('zip', {
'TransformOptions': {
'objectMode':true
},
'zlib': { 'level': 6 } // Sets the compression level.
});
archive.pipe(output);
if (glob)
archive.glob(`${destPath}`);
else
archive.directory(`${destPath}/`);
archive.finalize().then(() => {
logger.debug('Archive finished');
resolve();
});
});
}
/**
*
* @param urlStr
* @returns {*}
*/
explodeURL (urlStr = null) {
if (!urlStr || urlStr === '')
return (null);
try {
const workURL = url.parse(urlStr);
return tldExtract.parse_host( workURL.host);
}
catch(e) {
return e;
}
}
/**
* Get Params from a url string
*/
_getParamsFromUrl(url) {
url = decodeURI(url);
if (typeof url === 'string') {
const params = url.split('?');
const obj = {};
if (params.length > 1) {
const eachParamsArr = params[1].split('&');
if (eachParamsArr && eachParamsArr.length)
eachParamsArr.map(param => {
const keyValuePair = param.split('=');
const key = keyValuePair[0];
const value = keyValuePair[1];
obj[key] = value;
});
}
return obj;
}
}
/**
*
* @param text
* @returns {string}
* @private
*/
_cleanUp(text) {
if (!text) return '';
const regexNewLine = /\n/;
const regexCollapseWS = /\s+/g;
return text.replace(regexNewLine, '').replace(regexCollapseWS, ' ').trim();
}
_makeFieldName(text) {
const removePunctuation = /([^A-Za-z0-9\s])+/g;
if (!text) return '';
let workString = this._cleanUp(text);
workString = removeAccents.remove(workString);
workString = workString.replace(removePunctuation, '');
workString = camelCase(workString);
return workString;
}
async _renameFile(origFN, newFN) {
await checkFileExists(origFN)
.then(async exists => {
console.log(`file exists: ${exists}`);
if (exists)
await fs.renameSync(origFN, newFN);
}).catch((e) => {
logger.error(e);
});
}
/**
*
* @private
*/
async _start() {
logger.debug(`<=- START ${this.id}-=>`);
const now = new Date();
this.perf.started = now.getTime();
this.on('recover', async () => {
await this.recover();
});
// await this._createLock();
}
/**
*
* @returns {Promise<void>}
* @private
*/
async _done() {
logger.info('<=- DONE -=>');
// OK To close the browser window now
this.canDetach();
await this._forcePageClose();
await this._killRunningBrowser();
await this._complete();
}
/**
*
* @returns {Promise<void>}
* @private
*/
async _complete() {
try {
if (global.gc) global.gc();
}
catch (e) {
logger.warn('`node --expose-gc`');
}
logger.info('<=- COMPLETE -=>');
}
/**
*
* @param url
* @param options
* @param noRecover
* @returns {Promise<void>}
* @private
*/
async _goto(url, options = {}, noRecover = false) {
this.lastUrl = url;
const newOptions = Object.assign({ 'timeout':90000, 'waitUntil':'networkidle0' }, options);
logger.debug(newOptions);
try {
logger.info('Goto:', url);
await this.page.goto(url, newOptions).catch((err) => {
logger.error('GOTO', err);
if (err.message.indexOf('net::ERR_FAILED') !== -1)
this.browserCrashed = true;
if (!noRecover)
this.emit('recover');
});
}
catch (error) {
logger.error(error);
logger.error(url, options);
// if (error === 'net::ERR_CONNECTION_TIMED_OUT')
}
}
/**
*
* @param fn
* @param time
* @returns {Function}
* @private
*/
_debounce(fn, time) {
let timeout;
return function (...args) { // <-- not an arrow function
const functionCall = () => fn.apply(this, args);
clearTimeout(timeout);
timeout = setTimeout(functionCall, time);
};
}
/**
*
* @param callback
* @param limit
* @returns {Function}
* @private
*/
_throttle (callback, limit) {
var wait = false;
return function () {
if (!wait) {
callback.apply(null, arguments);
wait = true;
setTimeout(function () {
wait = false;
}, limit);
}
};
}
/**
*
* @param func
* @returns {function(): *}
* @private
*/
_once(func) {
var alreadyCalled = false;
var result;
return function() {
if (!alreadyCalled) {
result = func.apply(this, arguments);
alreadyCalled = true;
}
return result;
};
};
/**
*
* @param restartURL
* @returns {Promise<void>}
*/
async restart(restartURL) {
const rURL = restartURL || this.lastUrl;
logger.info(`Restarting ${this.id} // Going to ${rURL}`);
await this._goto(rURL);
}
/**
*
* @param filename
* @param data
* @returns {Promise<void>}
*/
async saveFile(filename, data) {
try{
fs.writeFileSync(filename, data);
}
catch( err) {
logger.error(err);
}
}
}
module.exports = Scraper;

33
outlet.js Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env node
const CronJob = require('cron').CronJob;
// load env variables from file
require('dotenv').config();
const OutletScrape = require('./scrapers/outlet');
async function run() {
const outlet = new OutletScrape();
if (typeof(process.env.outlet) === 'string' ) {
console.log(`${outlet.id} cron set for ${process.env.OUTLET_CRON}`);
new CronJob(process.env.OUTLET_CRON, async function() {
await outlet.run();
}, null, true);
}
if (process.env.SCRAPE_START === outlet.id)
{
console.log('go');
await outlet.run();
}
console.log('Outlet Launched');
}
process.once('uncaughtException', function caught(err) {
console.error('Uncaught', err);
});
run();

2250
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

33
package.json Normal file
View File

@ -0,0 +1,33 @@
{
"name": "changedetection",
"version": "1.0.0",
"description": "",
"main": "server.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"fecha": "^3.0.3",
"fs-extra": "latest",
"html-differ": "^1.4.0",
"jsonfile": "^5.0.0",
"lodash": "^4.17.15",
"log4js": "^5.1.0",
"node-localstorage": "^1.3.1",
"pug": "^2.0.4",
"puppeteer": "^1.19.0",
"smtp-email-sender": "^1.0.0",
"text-diff": "^1.0.1",
"time-since": "^1.0.7",
"underscore": "^1.9.1"
},
"devDependencies": {
"cron": "^1.7.2",
"dateformat": "^3.0.3",
"dotenv": "^8.2.0",
"eslint": "^6.5.1"
}
}

414
pug/App.css Normal file
View File

@ -0,0 +1,414 @@
@import url('https://fonts.googleapis.com/css?family=Roboto');
/* Global Styles */
:root {
--primary-color: #dc3545;
--dark-color: #333333;
--light-color: #f4f4f4;
--danger-color: #dc3545;
--success-color: #28a745;
}
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: 'Roboto', sans-serif;
font-size: 1rem;
line-height: 1.6;
background-color: #fff;
color: #333;
}
a {
color: var(--primary-color);
text-decoration: none;
}
a:hover {
color: #666;
}
ul {
list-style: none;
}
img {
width: 100%;
}
/* Utilities */
.container {
max-width: 1100px;
margin: auto;
overflow: hidden;
padding: 0 2rem;
}
/* Text Styles*/
.x-large {
font-size: 4rem;
line-height: 1.2;
margin-bottom: 1rem;
}
.large {
font-size: 3rem;
line-height: 1.2;
margin-bottom: 1rem;
}
.lead {
font-size: 1.5rem;
margin-bottom: 1rem;
}
.text-center {
text-align: center;
}
.text-primary {
color: var(--primary-color);
}
.text-dark {
color: var(--dark-color);
}
.text-success, ins {
color: var(--success-color);
}
.text-danger, del {
color: var(--danger-color);
}
.text-center {
text-align: center;
}
.text-right {
text-align: right;
}
.text-left {
text-align: left;
}
/* Center All */
.all-center {
display: flex;
flex-direction: column;
width: 100%;
margin: auto;
justify-content: center;
align-items: center;
text-align: center;
}
/* Cards */
.card {
padding: 1rem;
border: #ccc 1px dotted;
margin: 0.7rem 0;
}
/* List */
.list {
margin: 0.5rem 0;
}
.list li {
padding-bottom: 0.3rem;
}
/* Padding */
.p {
padding: 0.5rem;
}
.p-1 {
padding: 1rem;
}
.p-2 {
padding: 2rem;
}
.p-3 {
padding: 3rem;
}
.py {
padding: 0.5rem 0;
}
.py-1 {
padding: 1rem 0;
}
.py-2 {
padding: 2rem 0;
}
.py-3 {
padding: 3rem 0;
}
/* Margin */
.m {
margin: 0.5rem;
}
.m-1 {
margin: 1rem;
}
.m-2 {
margin: 2rem;
}
.m-3 {
margin: 3rem;
}
.my {
margin: 0.5rem 0;
}
.my-1 {
margin: 1rem 0;
}
.my-2 {
margin: 2rem 0;
}
.my-3 {
margin: 3rem 0;
}
/* Grid */
.grid-2 {
display: grid;
grid-template-columns: repeat(2, 1fr);
grid-gap: 1rem;
}
.grid-3 {
display: grid;
grid-template-columns: repeat(3, 1fr);
grid-gap: 1rem;
}
.grid-4 {
display: grid;
grid-template-columns: repeat(4, 1fr);
grid-gap: 1rem;
}
.btn {
display: inline-block;
background: var(--light-color);
color: #333;
padding: 0.4rem 1.3rem;
font-size: 1rem;
border: none;
cursor: pointer;
margin-right: 0.5rem;
transition: opacity 0.2s ease-in;
outline: none;
}
.btn-link {
background: none;
padding: 0;
margin: 0;
}
.btn-block {
display: block;
width: 100%;
}
.btn-sm {
font-size: 0.8rem;
padding: 0.3rem 1rem;
margin-right: 0.2rem;
}
.badge {
display: inline-block;
font-size: 0.8rem;
padding: 0.2rem 0.7rem;
text-align: center;
margin: 0.3rem;
background: var(--light-color);
color: #333;
border-radius: 5px;
}
.alert {
padding: 0.7rem;
margin: 1rem 0;
opacity: 0.9;
background: var(--light-color);
color: #333;
}
.btn-primary,
.bg-primary,
.badge-primary,
.alert-primary {
background: var(--primary-color);
color: #fff;
}
.btn-light,
.bg-light,
.badge-light,
.alert-light {
background: var(--light-color);
color: #333;
}
.btn-dark,
.bg-dark,
.badge-dark,
.alert-dark {
background: var(--dark-color);
color: #fff;
}
.btn-danger,
.bg-danger,
.badge-danger,
.alert-danger {
background: var(--danger-color);
color: #fff;
}
.btn-success,
.bg-success,
.badge-success,
.alert-success {
background: var(--success-color);
color: #fff;
}
.btn-white,
.bg-white,
.badge-white,
.alert-white {
background: #fff;
color: #333;
border: #ccc solid 1px;
}
.btn:hover {
opacity: 0.8;
}
.bg-light,
.badge-light {
border: #ccc solid 1px;
}
.round-img {
border-radius: 50%;
}
/* Forms */
input {
margin: 1.2rem 0;
}
.form-text {
display: block;
margin-top: 0.3rem;
color: #888;
}
input[type='text'],
input[type='email'],
input[type='password'],
input[type='date'],
select,
textarea {
display: block;
width: 100%;
padding: 0.4rem;
font-size: 1.2rem;
border: 1px solid #ccc;
}
input[type='submit'],
button {
font: inherit;
}
table th,
table td {
padding: 1rem;
text-align: left;
}
table th {
background: var(--light-color);
}
/* Navbar */
.navbar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.7rem 2rem;
z-index: 1;
width: 100%;
opacity: 0.9;
margin-bottom: 1rem;
}
.navbar ul {
display: flex;
}
.navbar a {
color: #fff;
padding: 0.45rem;
margin: 0 0.25rem;
}
.navbar a:hover {
color: var(--light-color);
}
.navbar .welcome span {
margin-right: 0.6rem;
}
/* Mobile Styles */
@media (max-width: 700px) {
.hide-sm {
display: none;
}
.grid-2,
.grid-3,
.grid-4 {
grid-template-columns: 1fr;
}
/* Text Styles */
.x-large {
font-size: 3rem;
}
.large {
font-size: 2rem;
}
.lead {
font-size: 1rem;
}
/* Navbar */
.navbar {
display: block;
text-align: center;
}
.navbar ul {
text-align: center;
justify-content: center;
}
}

1
pug/email.css Normal file
View File

@ -0,0 +1 @@
@import url('https://fonts.googleapis.com/css?family=Roboto');:root {--primary-color: #dc3545;--dark-color: #333333;--light-color: #f4f4f4;--danger-color: #dc3545;--success-color: #28a745;}* {box-sizing: border-box;margin: 0;padding: 0;}body {font-family: 'Roboto', sans-serif;font-size: 1rem;line-height: 1.6;background-color: #fff;color: #333;}a {color: var(--primary-color);text-decoration: none;}a:hover {color: #666;}ul {list-style: none;}img {width: 100%;}.container {max-width: 1100px;margin: auto;overflow: hidden;padding: 0 2rem;}.x-large {font-size: 4rem;line-height: 1.2;margin-bottom: 1rem;}.large {font-size: 3rem;line-height: 1.2;margin-bottom: 1rem;}.lead {font-size: 1.5rem;margin-bottom: 1rem;}.text-center {text-align: center;}.text-primary {color: var(--primary-color);}.text-dark {color: var(--dark-color);}.text-success, ins {color: var(--success-color);}.text-danger, del {color: var(--danger-color);}.text-center {text-align: center;}.text-right {text-align: right;}.text-left {text-align: left;}.all-center {display: flex;flex-direction: column;width: 100%;margin: auto;justify-content: center;align-items: center;text-align: center;}.card {padding: 1rem;border: #ccc 1px dotted;margin: 0.7rem 0;}.list {margin: 0.5rem 0;}.list li {padding-bottom: 0.3rem;}.p {padding: 0.5rem;}.p-1 {padding: 1rem;}.p-2 {padding: 2rem;}.p-3 {padding: 3rem;}.py {padding: 0.5rem 0;}.py-1 {padding: 1rem 0;}.py-2 {padding: 2rem 0;}.py-3 {padding: 3rem 0;}.m {margin: 0.5rem;}.m-1 {margin: 1rem;}.m-2 {margin: 2rem;}.m-3 {margin: 3rem;}.my {margin: 0.5rem 0;}.my-1 {margin: 1rem 0;}.my-2 {margin: 2rem 0;}.my-3 {margin: 3rem 0;}.grid-2 {display: grid;grid-template-columns: repeat(2, 1fr);grid-gap: 1rem;}.grid-3 {display: grid;grid-template-columns: repeat(3, 1fr);grid-gap: 1rem;}.grid-4 {display: grid;grid-template-columns: repeat(4, 1fr);grid-gap: 1rem;}.btn {display: inline-block;background: var(--light-color);color: #333;padding: 0.4rem 1.3rem;font-size: 1rem;border: none;cursor: pointer;margin-right: 0.5rem;transition: opacity 0.2s ease-in;outline: none;}.btn-link {background: none;padding: 0;margin: 0;}.btn-block {display: block;width: 100%;}.btn-sm {font-size: 0.8rem;padding: 0.3rem 1rem;margin-right: 0.2rem;}.badge {display: inline-block;font-size: 0.8rem;padding: 0.2rem 0.7rem;text-align: center;margin: 0.3rem;background: var(--light-color);color: #333;border-radius: 5px;}.alert {padding: 0.7rem;margin: 1rem 0;opacity: 0.9;background: var(--light-color);color: #333;}.btn-primary, .bg-primary, .badge-primary, .alert-primary {background: var(--primary-color);color: #fff;}.btn-light, .bg-light, .badge-light, .alert-light {background: var(--light-color);color: #333;}.btn-dark, .bg-dark, .badge-dark, .alert-dark {background: var(--dark-color);color: #fff;}.btn-danger, .bg-danger, .badge-danger, .alert-danger {background: var(--danger-color);color: #fff;}.btn-success, .bg-success, .badge-success, .alert-success {background: var(--success-color);color: #fff;}.btn-white, .bg-white, .badge-white, .alert-white {background: #fff;color: #333;border: #ccc solid 1px;}.btn:hover {opacity: 0.8;}.bg-light, .badge-light {border: #ccc solid 1px;}.round-img {border-radius: 50%;}input {margin: 1.2rem 0;}.form-text {display: block;margin-top: 0.3rem;color: #888;}input[type='text'], input[type='email'], input[type='password'], input[type='date'], select, textarea {display: block;width: 100%;padding: 0.4rem;font-size: 1.2rem;border: 1px solid #ccc;}input[type='submit'], button {font: inherit;}table th, table td {padding: 1rem;text-align: left;}table th {background: var(--light-color);}.navbar {display: flex;justify-content: space-between;align-items: center;padding: 0.7rem 2rem;z-index: 1;width: 100%;opacity: 0.9;margin-bottom: 1rem;}.navbar ul {display: flex;}.navbar a {color: #fff;padding: 0.45rem;margin: 0 0.25rem;}.navbar a:hover {color: var(--light-color);}.navbar .welcome span {margin-right: 0.6rem;}@media (max-width: 700px) {.hide-sm {display: none;}.grid-2, .grid-3, .grid-4 {grid-template-columns: 1fr;}.x-large {font-size: 3rem;}.large {font-size: 2rem;}.lead {font-size: 1rem;}.navbar {display: block;text-align: center;}.navbar ul {text-align: center;justify-content: center;}}

24
pug/email.pug Normal file
View File

@ -0,0 +1,24 @@
html(lang="en")
head
style
include email.css
meta(charset='utf-8')
title
ChangeDetection !{name}
body.container
h1 ChangeDetection: !{name}
p.m At your request we are sending you this alert to let you know that a page you have been monitoring has changed.
p.m The following monitored page has changed:
a.m(href=url)= url
case since
when 1: p.m It has been 1 day since the page last changed.
default: p.m It has been !{since} days since the page last changed.
p.m See below for the details of the change
blockquote.card !{changed}

18
pug/test.pug Normal file
View File

@ -0,0 +1,18 @@
html(lang="en")
head
link(rel="stylesheet", href="https://www.gitcdn.xyz/repo/bradtraversy/github-finder/master/src/App.css")
meta(charset='utf-8')
title
ChangeDetection !{name}
body.container
h1 ChangeDetection: !{name}
p.m At your request we are sending you this alert to let you know that a page you have been monitoring has changed.
p.m The following monitored page has changed:
a.m(href=url)= url
p.m See below for the details of the change
blockquote.card !{changed}

33
rc.js Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env node
const CronJob = require('cron').CronJob;
// load env variables from file
require('dotenv').config();
const Ireland = require('./scrapers/rc');
async function run() {
const ieScraper = new Ireland();
if (typeof(process.env.IE_CRON) === 'string' ) {
console.log(`${ieScraper.id} cron set for ${process.env.IE_CRON}`);
new CronJob(process.env.IE_CRON, async function() {
await ieScraper.run();
}, null, true);
}
if (process.env.SCRAPE_START === ieScraper.id)
{
console.log('go');
await ieScraper.run();
}
console.log('RC Launched');
}
process.once('uncaughtException', function caught(err) {
console.error('Uncaught', err);
});
run();

26
realsettings.json Normal file
View File

@ -0,0 +1,26 @@
[
{
"url": "https://www.harmankardon.co.uk/outlet/",
"name": "Outlet"
},
{
"url": "http://www.harmankardon.co.uk/sale-uk/",
"name": "harmankardon Sale UK"
},
{
"url": "http://www.seanharry.com/events/ultimates/",
"name": "Starfury Ultimates"
},
{
"url": "http://www.seanharry.com/vampire/index.html",
"name": "Starfury Vampire"
},
{
"url": "https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView",
"name": "Cruise"
},{
"url": "https://www.silvrtree.co.uk/cinema/1",
"name": "Imax Glasgow"
}
]

154
scrapers/outlet.js Normal file
View File

@ -0,0 +1,154 @@
const Scraper = require('../lib/scraper');
const cheerio = require('cheerio');
const path = require('path');
const logger = require('log4js').getLogger('RC');
const LocalStorage = require('node-localstorage').LocalStorage;
const fs = require('fs');
const HtmlDiffer = require('html-differ').HtmlDiffer;
const diffLogger = require('html-differ/lib/logger');
const Diff = require('text-diff');
logger.level = process.env.LOGGER_LEVEL || 'debug';
class OutletScrape extends Scraper {
constructor() {
super();
this.setID('OUTLET');
this.run = this._debounce(async () => {
await this.__run();
}, 5000);
}
async process() {
const options = {
ignoreAttributes: ['value', 'id', 'd'],
compareAttributesAsJSON: [],
ignoreWhitespaces: true,
ignoreComments: true,
ignoreEndTags: false,
ignoreDuplicateAttributes: false
};
const oldFile = `${this.path}/previous.html`;
// var basefile = fs.readFileSync('1.html', 'utf-8')
// const body = await this.page.content();
const innerText = await this.page.evaluate(() => {
return {
'body': document.body.innerText
};
});
// logger.debug(innerText.body);
if (!fs.existsSync(oldFile)) {
fs.writeFileSync(oldFile, body.body, 'utf-8');
} else
{
const previousFile = fs.readFileSync(oldFile, 'utf-8');
var diff = new Diff(); // options may be passed to constructor; see below
var textDiff = diff.main(previousFile, innerText.body); // produces diff array
const levenshtein = diff.levenshtein(textDiff);
logger.debug('levenshtein:', levenshtein);
if (levenshtein !== 0) {
logger.debug(diff.prettyHtml(textDiff));
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
}
}
}
async start() {
await super._start();
try{
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
// this.startPage = 'https://silvrtree.co.uk/slack';
const mouseDownDuration = OutletScrape.notARobot();
this.setPath(path.resolve(`${__dirname }/../artefacts/outlet`));
await this._initBrowser(true);
await this._createBrowserPage();
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
await this.page.setViewport({ 'width': 1200, 'height': 800 });
await this._goto(this.startPage);
await this._randomWait(this.page, 3, 5);
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
logger.debug('loaded..');
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
}
catch(e) {
throw new Error(e);
}
}
/**
* Grab the Pdf's and screenshots
* @returns {Promise<void>}
*/
async __run() {
try {
logger.debug('run');
await this.start();
await this.process();
logger.debug('Done...');
// await this._randomWait(this.page, 5, 10);
// await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null);
// const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions'];
/*for (const section of sections)
await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section);
this.emit('done');*/
}
catch(e) {
throw new Error(e);
}
}
}
module.exports = OutletScrape;

129
scrapers/rc.js Normal file
View File

@ -0,0 +1,129 @@
const Scraper = require('../lib/scraper');
const cheerio = require('cheerio');
const path = require('path');
const logger = require('log4js').getLogger('RC');
const LocalStorage = require('node-localstorage').LocalStorage;
const fs = require('fs');
const Diff = require('text-diff');
logger.level = process.env.LOGGER_LEVEL || 'debug';
class RCScrape extends Scraper {
constructor() {
super();
this.setID('RC');
this.run = this._debounce(async () => {
await this.__run();
}, 5000);
}
async process() {
const options = {
ignoreAttributes: ['value', 'id', 'd'],
compareAttributesAsJSON: [],
ignoreWhitespaces: true,
ignoreComments: true,
ignoreEndTags: false,
ignoreDuplicateAttributes: false
};
const oldFile = `${this.path}/previous.html`;
// var basefile = fs.readFileSync('1.html', 'utf-8')
// const body = await this.page.content();
const innerText = await this.page.evaluate(() => {
return {
'body': document.body.innerText
};
});
// logger.debug(innerText.body);
if (!fs.existsSync(oldFile)) {
fs.writeFileSync(oldFile, body.body, 'utf-8');
} else
{
}
}
async start() {
await super._start();
try{
this.startPage = 'https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView';
// this.startPage = 'https://silvrtree.co.uk/slack';
const mouseDownDuration = RCScrape.notARobot();
this.setPath(path.resolve(`${__dirname }/../artefacts/rc`));
await this._initBrowser(true);
await this._createBrowserPage();
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
await this.page.setViewport({ 'width': 1200, 'height': 800 });
await this._goto(this.startPage);
await this._randomWait(this.page, 3, 5);
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
logger.debug('loaded..');
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
}
catch(e) {
throw new Error(e);
}
}
/**
* Grab the Pdf's and screenshots
* @returns {Promise<void>}
*/
async __run() {
try {
logger.debug('run');
await this.start();
await this.process();
// await this._randomWait(this.page, 5, 10);
// await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null);
// const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions'];
/*for (const section of sections)
await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section);
this.emit('done');*/
}
catch(e) {
throw new Error(e);
}
}
}
module.exports = RCScrape;

247
scrapers/scraper.js Normal file
View File

@ -0,0 +1,247 @@
const Scraper = require('../lib/scraper');
const path = require('path');
const logger = require('log4js').getLogger('RC');
const fs = require('fs');
const dateFormat = require('dateformat');
const _ = require('lodash');
const jsonfile = require('jsonfile');
const Diff = require('text-diff');
const time = require("time-since");
const pug = require('pug');
const email = require('smtp-email-sender')({
'host': 'mail.caliban.io',
'port': '465',
'auth': {
'user': 'aida@caliban.io',
'pass': 'WaF#E+5am7.)\\csD',
'type': 'LOGIN' // PLAIN, LOGIN, MD5 etc...
},
'secure': 'secure'
});
logger.level = process.env.LOGGER_LEVEL || 'debug';
class ChangeDetection extends Scraper {
constructor() {
super();
this.setID('CD');
this.run = this._debounce(async () => {
await this.__run();
}, 5000);
}
pugTest(data, newpath) {
logger.debug(pug.renderFile(`${newpath}/` + 'pug/email.pug', data));
}
sendSMTP(data, newPath) {
const now = new Date();
const attachments = [
{
path:`${data.screenshot}.png`
}
];
const html = pug.renderFile(`${newPath}/` + 'pug/email.pug', data);
email({
'from': 'Aida <aida@caliban.io>',
'to': 'Martin <martind2000@gmail.com>',
'subject': `ChangeDetection: ${data.name}`,
'html': html,
attachments: attachments
});
}
async processItem(item) {
logger.debug(`Processing ${item.name}...`);
const now = new Date();
const filename = _.kebabCase(item.name);
const oldFile = `${this.path}/${filename}.html`;
const stats = this.stats.get(filename) || { 'lastSaved': now, 'lastChanged':null };
await this._goto(item.url);
await this._randomWait(this.page, 3, 5);
const innerText = await this.page.evaluate(() => {
return {
'body': document.body.innerText
};
});
if (!fs.existsSync(oldFile)) {
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
this.stats.set(filename, stats);
}
else {
const previousFile = fs.readFileSync(oldFile, 'utf-8');
const diff = new Diff(); // options may be passed to constructor; see below
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
const levenshtein = diff.levenshtein(textDiff);
logger.debug('levenshtein:', levenshtein);
if (levenshtein !== 0) {
logger.info('Changed...');
const timestamp = dateFormat(now, 'yyyymmddHHMM');
const screenshotPath = `${this.path}/screenshots/${filename}-${timestamp}`;
stats.previousChange = stats.lastSaved;
stats.lastSaved = now;
stats.lastChanged = now;
stats.screenshot = screenshotPath;
stats.changed = diff.prettyHtml(textDiff);
stats.levenshtein = levenshtein;
stats.since = time.since(new Date(stats.previousChange)).days();
await this._makeScreenshotV2(this.page, screenshotPath, null);
await this._randomWait(this.page, 3, 5);
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
this.stats.set(filename, stats);
const pugData = {...stats, ...item};
console.log(pugData);
this.pugTest(pugData, './');
}
}
}
async processItems() {
for (const item of this.settings)
await this.processItem(item);
}
async processOld() {
const options = {
'ignoreAttributes': ['value', 'id', 'd'],
'compareAttributesAsJSON': [],
'ignoreWhitespaces': true,
'ignoreComments': true,
'ignoreEndTags': false,
'ignoreDuplicateAttributes': false
};
const oldFile = `${this.path}/previous.html`;
const innerText = await this.page.evaluate(() => {
return {
'body': document.body.innerText
};
});
if (!fs.existsSync(oldFile))
fs.writeFileSync(oldFile, body.body, 'utf-8');
else {
const previousFile = fs.readFileSync(oldFile, 'utf-8');
const diff = new Diff(); // options may be passed to constructor; see below
const textDiff = diff.main(previousFile, innerText.body); // produces diff array
const levenshtein = diff.levenshtein(textDiff);
logger.debug('levenshtein:', levenshtein);
if (levenshtein !== 0) {
logger.debug(diff.prettyHtml(textDiff));
fs.writeFileSync(oldFile, innerText.body, 'utf-8');
}
}
}
async start() {
await super._start();
try{
this.startPage = 'https://www.harmankardon.co.uk/outlet/';
// this.startPage = 'https://silvrtree.co.uk/slack';
const mouseDownDuration = ChangeDetection.notARobot();
await this.setPath(path.resolve(`${__dirname }/../artefacts`));
await this._createDirectory(`${this.path}/screenshots`);
await this._initBrowser(true);
await this._createBrowserPage();
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
await this.page.setViewport({ 'width': 1200, 'height': 800 });
// await this._goto(this.startPage);
await this._randomWait(this.page, 3, 5);
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
logger.debug('Started..');
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
}
catch(e) {
throw new Error(e);
}
}
async loadSettings() {
logger.debug('Load settings...');
const statsFile = `${this.path}/stats.json`;
this.settings = jsonfile.readFileSync('settings.json');
let stats = [];
if (fs.existsSync(statsFile))
stats = jsonfile.readFileSync(statsFile) || [];
this.stats = new Map(stats);
}
async saveSettings() {
logger.debug('Save settings...');
const statsFile = `${this.path}/stats.json`;
const stats = [...this.stats];
// logger.debug(stats);
jsonfile.writeFileSync(statsFile, stats);
}
/**
* Grab the Pdf's and screenshots
* @returns {Promise<void>}
*/
async __run() {
try {
logger.debug('run');
await this.start();
// await this.process();
await this.loadSettings();
logger.debug('Running...');
await this.processItems();
await this.saveSettings();
await this._done();
}
catch(e) {
throw new Error(e);
}
}
}
module.exports = ChangeDetection;

0
server.js Normal file
View File

26
settings.json Normal file
View File

@ -0,0 +1,26 @@
[
{
"url": "https://www.harmankardon.co.uk/outlet/",
"name": "Outlet"
},
{
"url": "http://www.harmankardon.co.uk/sale-uk/",
"name": "harmankardon Sale UK"
},
{
"url": "http://www.seanharry.com/events/ultimates/",
"name": "Starfury Ultimates"
},
{
"url": "http://www.seanharry.com/vampire/index.html",
"name": "Starfury Vampire"
},
{
"url": "https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView",
"name": "Cruise"
},{
"url": "https://www.silvrtree.co.uk/cinema/1",
"name": "Imax Glasgow"
}
]