Martin Donnelly a5109efabe 2019-05-12
2019-05-12 18:33:09 +01:00

328 lines
8.4 KiB
JavaScript

const Scraper = require('../helpers/scraper');
const path = require('path');
const logger = require('log4js').getLogger('FI');
const url = require('url');
logger.level = process.env.LOGGER_LEVEL || 'warn';
class FIScrape extends Scraper {
constructor(checkForLock = true) {
super();
this.id = 'FI';
this.addToBlockFilters(['msecnd.net', 'siteimproveanalytics.com', 'newrelic.com', 'visualstudio.com']);
this.on('done', () => {
this._done();
});
this.run = this._throttle(async () => {
await this.__run();
}, 5000);
if (checkForLock)
this._checkLock().then((l) => {
if(l)
this.run();
});
this.on('error', (err) => {
logger.error('Error catcher!!', err);
});
}
/**
*
* @returns {Promise<void>}
*/
async movePageToBottom() {
await this.page.evaluate(() => {
window.scrollBy(0, window.innerHeight);
});
}
/**
*
* @returns {Promise<void>}
*/
async renameFile() {
try{
const filename = this.modeNames[this.step];
const sourceFile = 'exported.json';
const origFile = `${this.path}/${sourceFile}`;
const newFile = `${this.path}/${filename}.json`;
await this._renameFile(origFile, newFile);
}
catch( err) {
logger.error(err);
}
}
/**
*
* @returns {Promise<void>}
*/
async clickReturn() {
await this._randomWait(this.page, 5, 7, 'clickReturn');
this.step++;
this.emit('next');
}
/**
*
* @returns {Promise<void>}
*/
async clickSearch() {
logger.debug('clickSearch');
await this.movePageToBottom();
await this._randomWait(this.page, 2, 3, 'Move to bottom');
await this.page.waitForSelector('#tree-search-button', { 'visible':true, 'timeout':75000 }).then(async (elm) => {
logger.debug('found');
await elm.focus();
this._microWait(this.page, 5);
await elm.click({ 'delay':90 });
}).catch((e) => {
logger.error('Search button missing', e);
});
await this._randomWait(this.page, 2, 3, 'after clickSearch click');
}
/**
*
* @returns {Promise<void>}
*/
async selectOptions() {
logger.debug(`select ${this.modeNames[this.step]}`);
const clickablesSource = [
[
'#tree > ul > li:nth-child(4) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > ul > li:nth-child(2) > div > span:nth-child(3) > label',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(3) > div > span:nth-child(3) > label',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(4) > div > span:nth-child(3) > label'
],
[
'#tree > ul > li:nth-child(4) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span:nth-child(3) > label',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(1) > div > span:nth-child(3) > label',
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(2) > div > span:nth-child(3) > label'
],
[
'#tree > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
'#tree > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span:nth-child(3) > label'
]
];
const clickables = clickablesSource[this.step];
let itemFound;
await this.movePageToBottom();
for(let step = 0; step < clickables.length;step++) {
itemFound = false;
do{
logger.debug('Wait for:', clickables[step]);
await this.page.waitForSelector(clickables[step], { 'timeout':75000 }).then(async (elm) => {
console.log('found');
itemFound = true;
await elm.hover().catch((err) => {
logger.warn(err);
});
this._microWait(this.page, 5);
await elm.focus();
this._microWait(this.page, 5);
await elm.click({ 'delay':90 });
this._microWait(this.page, 5);
}).catch((e) => {
logger.error('item missing', e);
// pageLoaded = false;
});
await this._randomWait(this.page, 3, 4);
}
while(!itemFound);
}
}
/**
*
* @returns {Promise<void>}
*/
async motions() {
switch(this.step) {
case 0:
case 1:
case 2:
await this.selectOptions();
await this.clickSearch();
await this.renameFile();
await this.clickReturn();
break;
default:
// Menu fell through
this.complete = true;
this.emit('done');
break;
}
}
/**
*
* @returns {Promise<void>}
*/
async waitForPage() {
await this.page.waitForSelector('#tree > ul', { 'visible':true, 'timeout':75000 }).then(async (elm) => {
logger.debug('Option tree visible');
await this._randomWait(this.page, 3, 5);
await this.clearCookieStrap();
await this.motions();
}).catch((e) => {
logger.error('waitForPage', e);
});
}
/**
*
* @returns {Promise<void>}
*/
async clearCookieStrap() {
await this.page.waitForSelector('#cookie-consent > div > div > button', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
logger.debug('page');
await elm.click({ 'delay':90 });
await this._randomWait(this.page, 3, 5);
}).catch(() => {
logger.debug('Cookie strap not found');
});
}
/**
*
* @returns {Promise<void>}
*/
async processNewPage() {
logger.debug('** processNewPage');
// give the page a few seconds to settle
await this._randomWait(this.page, 3, 5);
await this.waitForPage();
}
/**
*
* @returns {Promise<void>}
*/
async start() {
super._start();
try {
this.step = 0;
this.complete = false;
this.startPage = 'http://www.finanssivalvonta.fi/en/About_us/Supervised/Pages/supervisedentities.aspx';
this.setPath(path.resolve(`${__dirname }/../artefacts/FI/FCMC`));
await this._doNonRepudiation(false, { 'sslWithPrefix': false }).catch((err) => {
logger.error(err);
});
await this._initBrowser(false);
await this._createBrowserPage();
await this._makeResponsive();
this.page.on('domcontentloaded', this._throttle(async () => {
this.processNewPage().catch((err) => {
logger.error('processNewPage fail', err);
});
}, 5000));
// Check and capture response file
this.page.on('response', async o => {
try{
const rUrl = await o.url();
if (rUrl.includes('supervised-entity-api/v1/all-supervised-entities')) {
logger.debug('satus:', await o.status());
o.text().then((data) => {
if (data.length > 0) {
const filename = `${this.path}/exported.json`.substring(0, 240);
logger.debug('>> Intercepting:', rUrl);
this.saveFile(filename, data);
}
else
logger.debug('Request response is empty');
}).catch((e) => {
logger.warn(e.message);
});
}
}
catch( err) {
logger.info('Response.text failed');
}
});
this.on('next', this._throttle(async () => {
await this.page.goto(this.startPage).catch((err) => {
logger.error(err);
this._uploadError();
});
}, 5000));
await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
await this.page.setViewport({ 'width': 1200, 'height': 800 });
await this.page.goto(this.startPage).catch((err) => {
logger.error(err);
this._uploadError();
});
await this._randomWait(this.page, 3, 5);
}
catch(e) {
throw Error(e);
}
}
/**
*
* @returns {Promise<void>}
* @private
*/
async __run() {
await this.start();
}
}
module.exports = FIScrape;