328 lines
8.4 KiB
JavaScript
328 lines
8.4 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const path = require('path');
|
|
const logger = require('log4js').getLogger('FI');
|
|
const url = require('url');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
class FIScrape extends Scraper {
|
|
|
|
constructor(checkForLock = true) {
|
|
super();
|
|
this.id = 'FI';
|
|
|
|
this.addToBlockFilters(['msecnd.net', 'siteimproveanalytics.com', 'newrelic.com', 'visualstudio.com']);
|
|
|
|
this.on('done', () => {
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._throttle(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
if (checkForLock)
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
|
|
this.on('error', (err) => {
|
|
logger.error('Error catcher!!', err);
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async movePageToBottom() {
|
|
await this.page.evaluate(() => {
|
|
window.scrollBy(0, window.innerHeight);
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async renameFile() {
|
|
try{
|
|
const filename = this.modeNames[this.step];
|
|
|
|
const sourceFile = 'exported.json';
|
|
|
|
const origFile = `${this.path}/${sourceFile}`;
|
|
const newFile = `${this.path}/${filename}.json`;
|
|
|
|
await this._renameFile(origFile, newFile);
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async clickReturn() {
|
|
await this._randomWait(this.page, 5, 7, 'clickReturn');
|
|
this.step++;
|
|
|
|
this.emit('next');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async clickSearch() {
|
|
logger.debug('clickSearch');
|
|
|
|
await this.movePageToBottom();
|
|
|
|
await this._randomWait(this.page, 2, 3, 'Move to bottom');
|
|
|
|
await this.page.waitForSelector('#tree-search-button', { 'visible':true, 'timeout':75000 }).then(async (elm) => {
|
|
logger.debug('found');
|
|
await elm.focus();
|
|
this._microWait(this.page, 5);
|
|
await elm.click({ 'delay':90 });
|
|
}).catch((e) => {
|
|
logger.error('Search button missing', e);
|
|
});
|
|
|
|
await this._randomWait(this.page, 2, 3, 'after clickSearch click');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async selectOptions() {
|
|
logger.debug(`select ${this.modeNames[this.step]}`);
|
|
|
|
const clickablesSource = [
|
|
[
|
|
'#tree > ul > li:nth-child(4) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > ul > li:nth-child(2) > div > span:nth-child(3) > label',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(3) > div > span:nth-child(3) > label',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(4) > div > span:nth-child(3) > label'
|
|
],
|
|
[
|
|
'#tree > ul > li:nth-child(4) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span:nth-child(3) > label',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(1) > div > span:nth-child(3) > label',
|
|
'#tree > ul > li:nth-child(4) > ul > li:nth-child(2) > ul > li:nth-child(2) > div > span:nth-child(3) > label'
|
|
],
|
|
[
|
|
'#tree > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span.gj-tree-glyphicons-expander',
|
|
'#tree > ul > li:nth-child(1) > ul > li:nth-child(1) > div > span:nth-child(3) > label'
|
|
]
|
|
|
|
];
|
|
|
|
const clickables = clickablesSource[this.step];
|
|
let itemFound;
|
|
await this.movePageToBottom();
|
|
|
|
for(let step = 0; step < clickables.length;step++) {
|
|
itemFound = false;
|
|
do{
|
|
logger.debug('Wait for:', clickables[step]);
|
|
await this.page.waitForSelector(clickables[step], { 'timeout':75000 }).then(async (elm) => {
|
|
console.log('found');
|
|
itemFound = true;
|
|
await elm.hover().catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
this._microWait(this.page, 5);
|
|
await elm.focus();
|
|
this._microWait(this.page, 5);
|
|
await elm.click({ 'delay':90 });
|
|
this._microWait(this.page, 5);
|
|
}).catch((e) => {
|
|
logger.error('item missing', e);
|
|
// pageLoaded = false;
|
|
});
|
|
|
|
await this._randomWait(this.page, 3, 4);
|
|
}
|
|
while(!itemFound);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async motions() {
|
|
switch(this.step) {
|
|
|
|
case 0:
|
|
case 1:
|
|
case 2:
|
|
await this.selectOptions();
|
|
|
|
await this.clickSearch();
|
|
|
|
await this.renameFile();
|
|
|
|
await this.clickReturn();
|
|
|
|
break;
|
|
|
|
default:
|
|
// Menu fell through
|
|
this.complete = true;
|
|
|
|
this.emit('done');
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async waitForPage() {
|
|
await this.page.waitForSelector('#tree > ul', { 'visible':true, 'timeout':75000 }).then(async (elm) => {
|
|
logger.debug('Option tree visible');
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this.clearCookieStrap();
|
|
|
|
await this.motions();
|
|
}).catch((e) => {
|
|
logger.error('waitForPage', e);
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async clearCookieStrap() {
|
|
await this.page.waitForSelector('#cookie-consent > div > div > button', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
logger.debug('page');
|
|
|
|
await elm.click({ 'delay':90 });
|
|
await this._randomWait(this.page, 3, 5);
|
|
}).catch(() => {
|
|
logger.debug('Cookie strap not found');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
|
|
async processNewPage() {
|
|
logger.debug('** processNewPage');
|
|
// give the page a few seconds to settle
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this.waitForPage();
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
try {
|
|
this.step = 0;
|
|
this.complete = false;
|
|
|
|
this.startPage = 'http://www.finanssivalvonta.fi/en/About_us/Supervised/Pages/supervisedentities.aspx';
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/FI/FCMC`));
|
|
|
|
await this._doNonRepudiation(false, { 'sslWithPrefix': false }).catch((err) => {
|
|
logger.error(err);
|
|
});
|
|
|
|
await this._initBrowser(false);
|
|
await this._createBrowserPage();
|
|
|
|
await this._makeResponsive();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 5000));
|
|
|
|
// Check and capture response file
|
|
this.page.on('response', async o => {
|
|
try{
|
|
const rUrl = await o.url();
|
|
|
|
if (rUrl.includes('supervised-entity-api/v1/all-supervised-entities')) {
|
|
logger.debug('satus:', await o.status());
|
|
|
|
o.text().then((data) => {
|
|
if (data.length > 0) {
|
|
const filename = `${this.path}/exported.json`.substring(0, 240);
|
|
logger.debug('>> Intercepting:', rUrl);
|
|
|
|
this.saveFile(filename, data);
|
|
}
|
|
else
|
|
logger.debug('Request response is empty');
|
|
}).catch((e) => {
|
|
logger.warn(e.message);
|
|
});
|
|
}
|
|
}
|
|
catch( err) {
|
|
logger.info('Response.text failed');
|
|
}
|
|
});
|
|
|
|
this.on('next', this._throttle(async () => {
|
|
await this.page.goto(this.startPage).catch((err) => {
|
|
logger.error(err);
|
|
this._uploadError();
|
|
});
|
|
}, 5000));
|
|
|
|
await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
await this.page.goto(this.startPage).catch((err) => {
|
|
logger.error(err);
|
|
this._uploadError();
|
|
});
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
}
|
|
catch(e) {
|
|
throw Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
* @private
|
|
*/
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = FIScrape;
|