changedetection/scrapers/rc.js
Martin Donnelly 9858a90912 init
2019-10-21 23:38:27 +01:00

130 lines
3.0 KiB
JavaScript

const Scraper = require('../lib/scraper');
const cheerio = require('cheerio');
const path = require('path');
const logger = require('log4js').getLogger('RC');
const LocalStorage = require('node-localstorage').LocalStorage;
const fs = require('fs');
const Diff = require('text-diff');
logger.level = process.env.LOGGER_LEVEL || 'debug';
class RCScrape extends Scraper {
constructor() {
super();
this.setID('RC');
this.run = this._debounce(async () => {
await this.__run();
}, 5000);
}
async process() {
const options = {
ignoreAttributes: ['value', 'id', 'd'],
compareAttributesAsJSON: [],
ignoreWhitespaces: true,
ignoreComments: true,
ignoreEndTags: false,
ignoreDuplicateAttributes: false
};
const oldFile = `${this.path}/previous.html`;
// var basefile = fs.readFileSync('1.html', 'utf-8')
// const body = await this.page.content();
const innerText = await this.page.evaluate(() => {
return {
'body': document.body.innerText
};
});
// logger.debug(innerText.body);
if (!fs.existsSync(oldFile)) {
fs.writeFileSync(oldFile, body.body, 'utf-8');
} else
{
}
}
async start() {
await super._start();
try{
this.startPage = 'https://www.royalcaribbean.co.uk/itinerary-details/?itin=07E233&ship=AL&sail=20201122&room=OceanView';
// this.startPage = 'https://silvrtree.co.uk/slack';
const mouseDownDuration = RCScrape.notARobot();
this.setPath(path.resolve(`${__dirname }/../artefacts/rc`));
await this._initBrowser(true);
await this._createBrowserPage();
// await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
await this.page.setViewport({ 'width': 1200, 'height': 800 });
await this._goto(this.startPage);
await this._randomWait(this.page, 3, 5);
// await this.page.waitForSelector('#SI_ID_Head_FromPrice');
logger.debug('loaded..');
// await this.page.click('#ctl00_cphRegistersMasterPage_lblViewList > a', { 'delay':mouseDownDuration });*/
}
catch(e) {
throw new Error(e);
}
}
/**
* Grab the Pdf's and screenshots
* @returns {Promise<void>}
*/
async __run() {
try {
logger.debug('run');
await this.start();
await this.process();
// await this._randomWait(this.page, 5, 10);
// await this._makeScreenshotV2(this.page, `${ this.path}/Central Bank of Ireland Registers`, null);
// const sections = ['Registers of Payment Services Firms', 'Registers of E-Money Firms', 'Register of Credit Institutions'];
/*for (const section of sections)
await this.grabSection('#ctl00_cphRegistersMasterPage_downloadsSection', section);
this.emit('done');*/
}
catch(e) {
throw new Error(e);
}
}
}
module.exports = RCScrape;