782 lines
21 KiB
JavaScript
782 lines
21 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const cheerio = require('cheerio');
|
|
const path = require('path');
|
|
const jsonfile = require('jsonfile');
|
|
const logger = require('log4js').getLogger('EE');
|
|
const url = require('url');
|
|
const removeAccents = require('remove-accents-diacritics');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
class EEScrape extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.id = 'EE';
|
|
|
|
this.on('done', () => {
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._throttle(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
this.recover = this._debounce(async () => {
|
|
await this.__recover();
|
|
}, 120000);
|
|
|
|
if (process.env.NODE_ENV === 'production')
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<Array>}
|
|
*/
|
|
async extractIndexItems(html, serviceObject) {
|
|
const newArray = [] ;
|
|
|
|
const $ = cheerio.load(html);
|
|
const links = $('a');
|
|
|
|
links.each((i, item) => {
|
|
const href = $(item).attr('href');
|
|
const text = this._cleanUp($(item).text());
|
|
|
|
const newUrl = `${this.rootURI}${href}`;
|
|
|
|
newArray.push({ 'name':text, 'href':newUrl });
|
|
});
|
|
|
|
return newArray;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractEntityDetails(html) {
|
|
try {
|
|
const newObj = {};
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const title = $('h1.page-title').text();
|
|
|
|
newObj.title = this._cleanUp(title);
|
|
|
|
const tables = $('article div.table-wrap table');
|
|
|
|
const rows = $(tables).eq(0).find('tbody > tr');
|
|
|
|
rows.each((i, item) => {
|
|
const children = $(item).children();
|
|
|
|
const curLabel = this._makeFieldName($(children).eq(0).text());
|
|
|
|
newObj[curLabel] = (this._cleanUp($(children).eq(1).text()));
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractEntityServices(html) {
|
|
try {
|
|
const newObj = {};
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const tables = $('article div.table-wrap table');
|
|
|
|
if (tables.length > 1)
|
|
|
|
tables.each((i, table) => {
|
|
if (i > 0) {
|
|
const label = this._makeFieldName($(table).find('caption').text());
|
|
|
|
const services = $(table).find('div.field__item').map((i, el) => {
|
|
return this._cleanUp($(el).text());
|
|
}).get();
|
|
|
|
if (!newObj.hasOwnProperty(label))
|
|
newObj[label] = services.slice();
|
|
else
|
|
newObj[label] = newObj[label].concat(services);
|
|
}
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @param blockType
|
|
* @returns {{licenseDescription: string, blockType: string}}
|
|
*/
|
|
extractEntityLicense(html ) {
|
|
try {
|
|
const blockType = 'Licenses';
|
|
const newObj = { 'licenseDescription':'', 'blockType': blockType, 'licenses' : [] };
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const header = $(`h3:contains("${blockType}")`);
|
|
|
|
if ($(header).length === 0) return {};
|
|
|
|
const fieldContent = $(header).next();
|
|
|
|
const children = $(fieldContent).children();
|
|
|
|
children.each((i, item) => {
|
|
const newLicense = {};
|
|
newLicense.permitNumber = this._cleanUp($(item).find('div.field--name-field-permit-number div.field__item').text()) ;
|
|
newLicense.permitEntryDate = this._cleanUp($(item).find('div.field--name-field-permit-entry-date div.field__item').text()) ;
|
|
|
|
const block = $(item).find('div.field--name-field-permit-restrictions');
|
|
|
|
newLicense.restrictions = $(block).find('p').map((i, el) => {
|
|
return this._cleanUp($(el).text());
|
|
}).get();
|
|
|
|
newObj.licenses.push(newLicense);
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @param blockType
|
|
* @returns {{licenseDescription: string, blockType: string}}
|
|
*/
|
|
extractEntityCrossBorder(html ) {
|
|
try {
|
|
const blockType = 'List of cross-border services provided';
|
|
const newObj = { 'crossBorder' : [] };
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const header = $(`h3:contains("${blockType}")`);
|
|
|
|
if ($(header).length === 0) return {};
|
|
|
|
const fieldContent = $(header).next();
|
|
|
|
const children = $(fieldContent).children();
|
|
|
|
children.each((i, item) => {
|
|
const cb = {};
|
|
cb.permitNumber = this._cleanUp($(item).find('div.field--name-field-overborder-permit-number div.field__item').text()) ;
|
|
cb.permitEntryDate = this._cleanUp($(item).find('div.field--name-field-overborder-permit-date div.field__item').text()) ;
|
|
cb.startDate = this._cleanUp($(item).find('div.field--name-field-overborder-permit-start div.field__item').text()) ;
|
|
|
|
// field--name-field-overborder-permit-start
|
|
const block = $(item).find('div.field--name-field-services-list');
|
|
|
|
cb.cbServices = $(block).find('div.paragraph--type--subject-services-list').map((i, el) => {
|
|
const service = this._cleanUp($(el).children().eq(0).text());
|
|
const country = this._cleanUp($(el).children().eq(1).text());
|
|
|
|
return { service, country };
|
|
}).get();
|
|
|
|
newObj.crossBorder.push(cb);
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @param blockType
|
|
* @returns {{licenseDescription: string, blockType: string}}
|
|
*/
|
|
extractEntityBranches(html ) {
|
|
try {
|
|
const subDetails = [['country', 'field--name-field-country'], ['businessName', 'field--name-field-business-name'], ['address', 'field--name-field-address'], ['phone', 'field--name-field-phone']];
|
|
const blockType = 'Branches';
|
|
const newObj = { 'branches' : [] };
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const header = $(`h3:contains("${blockType}")`);
|
|
|
|
if ($(header).length === 0) return {};
|
|
|
|
const fieldContent = $(header).next();
|
|
|
|
const children = $(fieldContent).children();
|
|
|
|
children.each((i, item) => {
|
|
const workObj = { 'details' : {}, 'branchServices':[], 'licenses':{} };
|
|
|
|
workObj.name = this._cleanUp($(item).find('header.paragraph-heading h4').text());
|
|
|
|
for (const sdItems of subDetails)
|
|
workObj.details[sdItems[0]] = this._cleanUp($(item).find(`div.${sdItems[1]} div.field__item`).text()) ;
|
|
|
|
const branchPermissions = $(item).find('div.field--name-field-branch-permissions');
|
|
const branchServices = $(item).find('div.field--name-field-branch-services');
|
|
|
|
workObj.branchServices = $(branchServices).find('div.paragraph--type--subject-services-list-simple div.field__item').map((i, el) => {
|
|
return this._cleanUp($(el).text());
|
|
}).get();
|
|
|
|
workObj.licenses = $(branchPermissions).find('div.paragraph--type--subject-branch-permits').map((i, el) => {
|
|
const permitNumber = this._cleanUp($(el).children().eq(0).find('div.field__item').text());
|
|
const start = this._cleanUp($(el).children().eq(1).find('div.field__item').text());
|
|
|
|
return { permitNumber, start };
|
|
}).get();
|
|
|
|
newObj.branches.push(workObj);
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntityDetails(serviceObject) {
|
|
const id = serviceObject.links[serviceObject.step].name;
|
|
logger.info(`Process ${serviceObject.step} of ${serviceObject.items} // ${this.modeTitles[this.mode]} entity:${id}`);
|
|
|
|
let pageLoaded = true;
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const entity = removeAccents.remove(id.trim());
|
|
|
|
const filename = this._makeFileName(entity);
|
|
|
|
const filePath = `${this.path}/${filename}`.substring(0, 240);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this.page.waitForSelector('h1.page-title').catch((e) => {
|
|
logger.error('processEntityDetails', e);
|
|
pageLoaded = false;
|
|
});
|
|
|
|
if (pageLoaded) {
|
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
|
|
const body = await this.page.content();
|
|
|
|
// --
|
|
|
|
const details = await this.extractEntityDetails(body);
|
|
|
|
const licenses = await this.extractEntityLicense(body);
|
|
const crossBorder = await this.extractEntityCrossBorder(body);
|
|
const services = await this.extractEntityServices(body);
|
|
const branches = await this.extractEntityBranches(body);
|
|
|
|
// --
|
|
await jsonfile.writeFile(`${filePath}.json`, { details, licenses, crossBorder, services, branches });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
serviceObject.links[serviceObject.step].filename = `${filename}.json`;
|
|
serviceObject.step++;
|
|
|
|
if (serviceObject.step < serviceObject.items) {
|
|
const newUrl = serviceObject.links[serviceObject.step].href;
|
|
|
|
await this._goto(newUrl).catch((err) => {
|
|
if (err.name === 'TimeoutError')
|
|
this.emit('recover');
|
|
});
|
|
}
|
|
else
|
|
this.emit('serviceDone');
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processIndex(serviceObject) {
|
|
let html = '';
|
|
|
|
logger.info(`Building the ${this.modeTitles[this.mode]} index...`);
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this.page.waitForSelector('div.view-content', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
html = await this.page.evaluate(el => el.outerHTML, elm);
|
|
}).catch((e) => {
|
|
logger.error(e);
|
|
logger.warn('No index list');
|
|
});
|
|
|
|
const indexList = await this.extractIndexItems(html);
|
|
|
|
logger.debug('serviceObject.indexStep', serviceObject.indexStep);
|
|
|
|
serviceObject.links = serviceObject.links.concat(indexList).map((v) => {
|
|
v['meta'] = serviceObject.indexStep;
|
|
|
|
return v;
|
|
});
|
|
|
|
const filename = this.modeNames[this.mode];
|
|
|
|
await this._randomWait(this.page, 5, 7);
|
|
|
|
const subStep = (serviceObject.pageCount > 0) ? `-${serviceObject.pageCount}` : '';
|
|
this._makeScreenshotV2(this.page, `${this.path}/${filename}_main_${serviceObject.indexStep}${subStep}`, null);
|
|
|
|
await this.page.waitForSelector('li.next-nav > a.button.next', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
logger.debug('Next page..');
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 5, 7);
|
|
serviceObject.pageCount++;
|
|
this.emit('pageChanged');
|
|
}).catch(() => {
|
|
serviceObject.pageCount = 0;
|
|
this.emit('indexdone');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async buildIndex(serviceObject) {
|
|
await this.page.waitForSelector('div.view-content', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await this.processIndex(serviceObject);
|
|
}).catch((e) => {
|
|
// logger.error(e);
|
|
logger.warn('No index list');
|
|
this.emit('indexdone');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processRedirector() {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
await this.processEntityDetails(this.paymentServices);
|
|
break;
|
|
|
|
case 1:
|
|
await this.processEntityDetails(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processEntityDetails(this.creditServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async indexRedirector() {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
await this.buildIndex(this.paymentServices);
|
|
break;
|
|
|
|
case 1:
|
|
await this.buildIndex(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.buildIndex(this.creditServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
|
|
async processNewPage() {
|
|
// give the page a few seconds to settle
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
const pathname = pageUrl.pathname;
|
|
|
|
logger.debug('workMode::', ['Indexing', 'Scraping'][this.workMode]);
|
|
|
|
if (pathname === '/') {
|
|
logger.error('Invalid path');
|
|
logger.debug(JSON.stringify(pageUrl));
|
|
logger.warn('processNewPage::emit recover');
|
|
this.emit('recover');
|
|
|
|
return;
|
|
}
|
|
|
|
switch (this.workMode) {
|
|
|
|
case 0:
|
|
await this.indexRedirector();
|
|
break;
|
|
|
|
case 1:
|
|
await this.processRedirector();
|
|
break;
|
|
|
|
default:
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error(`Unknown page: ${pageUrl}`);
|
|
}
|
|
else {
|
|
logger.warn('processNewPage Fell through');
|
|
logger.warn('currentPage.location', pageUrl.href);
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async restart() {
|
|
logger.info(`Restarting ${this.modeTitles[this.mode]}`);
|
|
|
|
this._goto(this.lastUrl);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
* @private
|
|
*/
|
|
async __recover() {
|
|
logger.warn('*** RECONNECTING PAGE ***');
|
|
|
|
logger.info('BrowserCrashed:', this.browserCrashed);
|
|
|
|
await this._forcePageClose();
|
|
|
|
if (this.browserCrashed)
|
|
await this._initBrowser();
|
|
|
|
await this._createBrowserPage();
|
|
this.page.on('domcontentloaded', () => {
|
|
this.processNewPage();
|
|
});
|
|
const timeout = 90000;
|
|
|
|
setTimeout(async() => {
|
|
logger.warn('Attempting recovery..');
|
|
|
|
await this.restart();
|
|
}, timeout);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async attachEvents() {
|
|
this.on('pageChanged', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
// clear out stock recover handler
|
|
|
|
this.removeAllListeners('recover');
|
|
|
|
this.on('recover', async () => {
|
|
logger.info('onRecover');
|
|
await this.recover();
|
|
});
|
|
|
|
this.on('entityComplete', () => {
|
|
this.handleEntityComplete();
|
|
});
|
|
|
|
this.on('serviceDone', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
this.emit('paymentServicesDone');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('emoneyServicesDone');
|
|
break;
|
|
|
|
case 2:
|
|
this.emit('creditServicesDone');
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('psindexdone', async () => {
|
|
let newUrl;
|
|
this.paymentServices.items = this.paymentServices.links.length;
|
|
logger.info(`${this.paymentServices.items} items indexed`);
|
|
|
|
this.paymentServices.indexStep++;
|
|
|
|
if (this.paymentServices.indexStep >= this.paymentServices.urls.length) {
|
|
this.workMode = 1;
|
|
logger.debug(JSON.stringify(this.paymentServices));
|
|
newUrl = this.paymentServices.links[this.paymentServices.step].href;
|
|
}
|
|
else
|
|
newUrl = this.paymentServices.urls[this.paymentServices.indexStep];
|
|
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('emindexdone', async () => {
|
|
let newUrl;
|
|
|
|
this.emoneyServices.items = this.emoneyServices.links.length;
|
|
logger.info(`${this.emoneyServices.items} items indexed`);
|
|
|
|
this.emoneyServices.indexStep++;
|
|
if (this.emoneyServices.indexStep >= this.emoneyServices.urls.length) {
|
|
this.workMode = 1;
|
|
newUrl = this.emoneyServices.links[this.emoneyServices.step].href;
|
|
}
|
|
else
|
|
newUrl = this.emoneyServices.urls[this.emoneyServices.indexStep];
|
|
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('ciindexdone', async () => {
|
|
let newUrl;
|
|
this.creditServices.items = this.creditServices.links.length;
|
|
logger.info(`${this.creditServices.items} items indexed`);
|
|
|
|
this.creditServices.indexStep++;
|
|
if (this.creditServices.indexStep >= this.creditServices.urls.length) {
|
|
this.workMode = 1;
|
|
newUrl = this.creditServices.links[this.creditServices.step].href;
|
|
}
|
|
else
|
|
newUrl = this.creditServices.urls[this.creditServices.indexStep];
|
|
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('indexdone', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
this.emit('psindexdone');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('emindexdone');
|
|
break;
|
|
|
|
case 2:
|
|
this.emit('ciindexdone');
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('paymentServicesDone', async () => {
|
|
this.workMode = 0;
|
|
await super._paymentServicesDone();
|
|
});
|
|
|
|
this.on('emoneyServicesDone', async () => {
|
|
logger.warn('emoneyServicesDone');
|
|
this.workMode = 0;
|
|
try{
|
|
this.emoneyServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/emoneyServices.json`, { 'links':this.emoneyServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/emoneyServices.json`, this.emoneyServices);
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
await this._goto(this.creditServices.urls[0]);
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('creditServicesDone', async () => {
|
|
logger.warn('creditServicesDone');
|
|
this.workMode = 0;
|
|
try{
|
|
this.creditServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/creditServices.json`, { 'links':this.creditServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/creditServices.json`, this.creditServices);
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
this.emit('done');
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
try {
|
|
this.mode = 0;
|
|
this.workMode = 0;
|
|
|
|
this.rootURI = 'https://www.fi.ee';
|
|
|
|
this.paymentServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://www.fi.ee/en/payment-services/payment-institutions/estonian-payment-institutions',
|
|
'https://www.fi.ee/en/payment-services/payment-services/payment-institutions/estonian-payment-institutions-exemption',
|
|
'https://www.fi.ee/en/payment-services/payment-institutions/payment-services/branches-foreign-payment-institutions',
|
|
'https://www.fi.ee/en/payment-services/payment-services/payment-institutions/payment-agents',
|
|
'https://www.fi.ee/en/payment-services/payment-institutions/payment-services/providers-cross-border-payment-sevices',
|
|
'https://www.fi.ee/en/payment-services/payment-institutions/payment-agents-providers-cross-border-payment-services'],
|
|
'sections' : [],
|
|
'sectionLinks' : [],
|
|
'pageCount' : 0
|
|
};
|
|
|
|
this.emoneyServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://www.fi.ee/en/payment-services/payment-services/e-money-institutions/estonian-e-money-institutions',
|
|
'https://www.fi.ee/en/payment-services/payment-services/e-money-institutions/estonian-e-money-institutions-exemption',
|
|
'https://www.fi.ee/en/payment-services/payment-services/e-money-institutions/distributors-e-money',
|
|
'https://www.fi.ee/en/payment-services/e-money-institutions/providers-cross-border-e-money-services',
|
|
'https://www.fi.ee/en/distributors-providers-cross-border-e-money-services',
|
|
'https://www.fi.ee/en/payment-services/payment-services/e-money-institutions/branches-foreign-e-money-institutions'],
|
|
'sections' : [],
|
|
'sectionLinks' : [],
|
|
'pageCount' : 0
|
|
};
|
|
|
|
this.creditServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false,
|
|
'started': false,
|
|
'urls': ['https://www.fi.ee/en/banking-and-credit/banking-and-credit/credit-institutions/licensed-credit-institutions-estonia',
|
|
'https://www.fi.ee/en/banking-and-credit/credit-institutions/affiliated-branches-foreign-credit-institutions',
|
|
'https://www.fi.ee/en/banking-and-credit/banking-and-credit/credit-institutions/representative-offices-foreign-credit-institutions',
|
|
'https://www.fi.ee/en/banking-and-credit/banking-and-credit/credit-institutions/providers-cross-border-banking-services'],
|
|
'sections' : [],
|
|
'sectionLinks' : [],
|
|
'pageCount' : 0
|
|
};
|
|
|
|
this.startPage = this.paymentServices.urls[0];
|
|
this.emoneyUrl = this.emoneyServices.urls[0];
|
|
this.credit = this.creditServices.urls[0];
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/EE/FI`));
|
|
|
|
// await this._doNonRepudiation();
|
|
|
|
await this._initBrowser();
|
|
await this._createBrowserPage();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
if (this.eventNames().length === 2)
|
|
await this.attachEvents();
|
|
|
|
//
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
await this._goto(this.paymentServices.urls[0], { 'waitUntil':'networkidle0' });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = EEScrape;
|