Martin Donnelly be5d3eae07 init
2019-05-05 20:13:56 +01:00

80 lines
2.3 KiB
JavaScript

const logger = require('log4js').getLogger('GR');
const path = require('path');
const url = require('url');
const Scraper = require('../helpers/scraper');
class GRScrape extends Scraper {
constructor() {
super();
this.id = 'GR';
this.on('done', () => {
this._done();
});
this.run = this._throttle(async () => {
await this.__run();
}, 5000);
if (process.env.NODE_ENV === 'production')
this._checkLock().then((l) => {
if(l)
this.run();
});
}
async start() {
super._start();
this.setPath(path.resolve(`${__dirname }/../artefacts/GR/BG`));
this.startPage = 'https://www.bankofgreece.gr/Pages/en/Supervision/SupervisedInstitutions/default.aspx';
await this._doNonRepudiation(false, { 'sslWithPrefix': true }).catch((err) => {
logger.warn(err);
});
await this._initBrowser();
await this._createBrowserPage();
await this.page.setViewport({ 'width': 1200, 'height': 800 });
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
await this._randomWait(this.page, 3, 5);
this._makeScreenshotV2(this.page, `${this.path}/index`);
await this.page._client.send('Page.setDownloadBehavior', { 'behavior': 'allow', 'downloadPath': this.path });
logger.info('Saving excels into:', this.path);
for (const linkText of [
'List of credit institutions operating in Greece',
'List of credit institutions authorised in Greece with operations abroad through a subsidiary or a branch',
'List/register of payment institutions',
'List/register of electronic money institutions'
]) {
const links = await this.page.$x(`//a[contains(text(), \'${linkText}\')]`);
const linkElement = links[0];
const href = await this.page.evaluate(
link => link.getAttribute('href'),
linkElement,
);
const xlsUrl = url.resolve(await this.page.url(), href);
await this._goto(xlsUrl, { 'waitUntil':'networkidle0' });
await this._randomWait(this.page, 3, 5);
}
// wait until all downloads finished (currently this is only possible with 'page.goto', so we go back to the start page.
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
this.emit('done');
}
async __run() {
await this.start();
}
}
module.exports = GRScrape;