795 lines
22 KiB
JavaScript
795 lines
22 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const cheerio = require('cheerio');
|
|
const path = require('path');
|
|
const jsonfile = require('jsonfile');
|
|
const removeAccents = require('remove-accents-diacritics');
|
|
const logger = require('log4js').getLogger('NL');
|
|
const url = require('url');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
class NLScrape extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.setID('NL');
|
|
|
|
this.addToBlockFilters(['cookiebar.js', 'readspeaker']);
|
|
|
|
this.on('done', () => {
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._throttle(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
// Delays the call to 30 seconds after the last time it was called.
|
|
// Useful if the page beaks and multiple errors happen at the same time
|
|
this.recover = this._debounce(async () => {
|
|
await this.__recover();
|
|
}, 30000);
|
|
|
|
if (process.env.NODE_ENV === 'production')
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
}
|
|
|
|
async extractDetail(body) {
|
|
const description = [];
|
|
try{
|
|
const $ = cheerio.load(body);
|
|
|
|
const rows = $('dl.extra > dd > table > tbody > tr');
|
|
|
|
rows.each((index, item) => {
|
|
let cells = $(item).find('th');
|
|
|
|
const title = this._cleanUp($(cells.get(0)).text()).replace(':', '') || '';
|
|
|
|
cells = $(item).find('td');
|
|
const detail = this._cleanUp($(cells.get(0)).text()) || '';
|
|
|
|
if (title !== '')
|
|
description.push([title, detail]);
|
|
});
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
|
|
return description;
|
|
}
|
|
|
|
async extractActivity(body) {
|
|
const details = [];
|
|
try{
|
|
const $ = cheerio.load(body);
|
|
const rows = $('#tab2 > div > div > table > tbody > tr');
|
|
let previousFinancialService = '';
|
|
|
|
rows.each((index, item) => {
|
|
const cells = $(item).find('td');
|
|
|
|
const activity = this._cleanUp($(cells.get(0)).text()) || '';
|
|
const startDate = this._cleanUp($(cells.get(1)).text()) || '';
|
|
const endDate = this._cleanUp($(cells.get(2)).text()) || '';
|
|
|
|
const thCell = $(item).find('th');
|
|
const financialService = this._cleanUp($(thCell.get(0)).text()) || previousFinancialService;
|
|
|
|
details.push({ financialService, activity, startDate, endDate });
|
|
|
|
previousFinancialService = financialService;
|
|
});
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
/**
|
|
* Extract Passporting Out Data from page
|
|
* @param body
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractPassportingOut(body) {
|
|
const details = {};
|
|
|
|
try{
|
|
const $ = cheerio.load(body);
|
|
|
|
const rows = $('#tab6 > div > div > table > tbody > tr');
|
|
let previouseuPassportOut = '';
|
|
|
|
rows.each((index, item) => {
|
|
const cells = $(item).find('td');
|
|
|
|
const activity = this._cleanUp($(cells.get(0)).text()) || '';
|
|
const country = this._cleanUp($(cells.get(1)).text()) || '';
|
|
const startDate = this._cleanUp($(cells.get(2)).text()) || '';
|
|
const endDate = this._cleanUp($(cells.get(3)).text()) || '';
|
|
|
|
const thCell = $(item).find('th');
|
|
const euPassportOut = this._cleanUp($(thCell.get(0)).text()) || previouseuPassportOut;
|
|
|
|
if (!details.hasOwnProperty(country))
|
|
details[country] = [{ activity, startDate, endDate, euPassportOut }];
|
|
else
|
|
details[country].push({ activity, startDate, endDate, euPassportOut });
|
|
|
|
previouseuPassportOut = euPassportOut;
|
|
});
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
/**
|
|
* Extract Passporting In Data from page
|
|
* @param body
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractPassportingIn(body) {
|
|
const details = {};
|
|
|
|
try{
|
|
const $ = cheerio.load(body);
|
|
|
|
const rows = $('#tab7 > div > div > table > tbody > tr');
|
|
let previouseuPassportIn = '';
|
|
|
|
rows.each((index, item) => {
|
|
const cells = $(item).find('td');
|
|
|
|
const activity = this._cleanUp($(cells.get(0)).text()) || '';
|
|
const startDate = this._cleanUp($(cells.get(1)).text()) || '';
|
|
|
|
const thCell = $(item).find('th');
|
|
const euPassportIn = this._cleanUp($(thCell.get(0)).text()) || previouseuPassportIn;
|
|
|
|
if (!details.hasOwnProperty(euPassportIn))
|
|
details[euPassportIn] = [{ activity, startDate }];
|
|
else
|
|
details[euPassportIn].push({ activity, startDate });
|
|
|
|
previouseuPassportIn = euPassportIn;
|
|
});
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
/**
|
|
* Process Entity Detail
|
|
*
|
|
* @returns {Promise<{activity: *, details: *}>}
|
|
*/
|
|
async processEntityDetail(serviceObject) {
|
|
const noWhiteSpace = /\W/g;
|
|
const urlSections = ['WFTBI', 'WFTEG', 'WFTKF'];
|
|
const id = serviceObject.links[serviceObject.step].id;
|
|
|
|
logger.info(`Process V2 ${this.modeTitles[this.mode]} entity ${serviceObject.step + 1} of ${serviceObject.items} // ${id}`);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const entity = removeAccents.remove(id.trim());
|
|
|
|
const filename = this._makeFileName(entity);
|
|
|
|
const filePath = `${this.path}/${filename}`.substring(0, 240);
|
|
|
|
await this.page.waitForSelector('#contentcolumn > div.interactive-tabs > ol > li:nth-child(2) > a', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 3, 5);
|
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
}).catch(() => {
|
|
logger.debug('No activity tab');
|
|
});
|
|
|
|
await this.page.waitForSelector('div.interactive-tabs > ol > li a[href*="#tab6"]', { 'visible':true, 'timeout':2500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this._makeScreenshotV2(this.page, `${filePath}_passportingout`, null);
|
|
}).catch(() => {
|
|
logger.debug('No passporting Out tab');
|
|
});
|
|
|
|
await this.page.waitForSelector('div.interactive-tabs > ol > li a[href*="#tab7"]', { 'visible':true, 'timeout':2500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this._makeScreenshotV2(this.page, `${filePath}_passportingin`, null);
|
|
}).catch(() => {
|
|
logger.debug('No passporting In tab');
|
|
});
|
|
|
|
const body = await this.page.content();
|
|
const details = await this.extractDetail(body);
|
|
const activity = await this.extractActivity(body);
|
|
const passportingOut = await this.extractPassportingOut(body);
|
|
const passportingIn = await this.extractPassportingIn(body);
|
|
|
|
await jsonfile.writeFile(`${filePath}.json`, { details, activity, passportingOut, passportingIn });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
serviceObject.links[serviceObject.step].filename = `${filename}.json`;
|
|
serviceObject.step++;
|
|
|
|
if (serviceObject.step < serviceObject.items) {
|
|
const newUrl = `https://www.dnb.nl/en/supervision/public-register/${urlSections[this.mode]}/${serviceObject.links[serviceObject.step].href}`;
|
|
|
|
await this._goto(newUrl);
|
|
}
|
|
else
|
|
this.emit('entityDone');
|
|
}
|
|
|
|
/**
|
|
* Process WFTBI / Payment Services Detail
|
|
*
|
|
* @returns {Promise<{activity: *, details: *}>}
|
|
*/
|
|
async processWFTBIDetail() {
|
|
await this.processEntityDetail(this.paymentServices);
|
|
}
|
|
|
|
/**
|
|
* Process WFTEG / Emoney services Detail
|
|
* @returns {Promise<{activity: *, details: *}>}
|
|
*/
|
|
async processWFTEGDetail() {
|
|
await this.processEntityDetail(this.emoneyServices);
|
|
}
|
|
|
|
/**
|
|
* Process WFTKF / Credit Services Details
|
|
* @returns {Promise<{activity: *, passportingOut: void, details: *}>}
|
|
*/
|
|
async processWFTKFDetail() {
|
|
await this.processEntityDetail(this.creditServices);
|
|
}
|
|
|
|
/**
|
|
* Initiate WFTBI / Payment Services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async initiateWFTBI() {
|
|
try{
|
|
// first time around.
|
|
// need to kick off the index correctly..
|
|
|
|
const options = await this.page.$$('#ddfilter option');
|
|
const wantedOption = ['2:3c Dutch branch of payment institution (EEA incl. NL)'];
|
|
for (const item of options) {
|
|
const text = await this.page.evaluate(el => el.innerText, item);
|
|
const value = await this.page.evaluate(el => el.value, item);
|
|
|
|
if (wantedOption.indexOf(text) !== -1) {
|
|
await this.page.select('#ddfilter', value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
this._findAndClick('#search-main button');
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initiaite WFTEG / Emoney services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async initiateWFTEG() {
|
|
try{
|
|
// first time around.
|
|
// need to kick off the index correctly..
|
|
|
|
const options = await this.page.$$('#ddfilter option');
|
|
const wantedOption = ['2:10b Carrying on the business of an electronic money institution'];
|
|
for (const item of options) {
|
|
const text = await this.page.evaluate(el => el.innerText, item);
|
|
const value = await this.page.evaluate(el => el.value, item);
|
|
|
|
if (wantedOption.indexOf(text) !== -1) {
|
|
await this.page.select('#ddfilter', value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
this._findAndClick('#search-main button');
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initiate WFTKF / Credit Services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async initiateWFTKF() {
|
|
try{
|
|
// first time around.
|
|
// need to kick off the index correctly..
|
|
|
|
const options = await this.page.$$('#ddfilter option');
|
|
const selects = ['2:12(1) Carrying on the business of a bank', '2:13(1) Carrying on the business of a bank'];
|
|
const wantedOption = [];
|
|
wantedOption.push(selects[this.creditServices.step]);
|
|
for (const item of options) {
|
|
const text = await this.page.evaluate(el => el.innerText, item);
|
|
const value = await this.page.evaluate(el => el.value, item);
|
|
|
|
if (wantedOption.indexOf(text) !== -1) {
|
|
await this.page.select('#ddfilter', value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
this._findAndClick('#search-main button');
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process WFTBI / Payment Services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processWFTBI() {
|
|
const nonWhiteSpace = /\W/g;
|
|
logger.info('WFTBI / Payment Services');
|
|
await this._randomWait(this.page, 3, 5);
|
|
const origUrl = await this.page.url();
|
|
const pageUrl = url.parse(origUrl);
|
|
|
|
if (pageUrl.query === null)
|
|
// we need to select the correct item from the dropdown.
|
|
this.initiateWFTBI();
|
|
|
|
else {
|
|
// crack query
|
|
|
|
const body = await this.page.content();
|
|
const $ = cheerio.load(body);
|
|
|
|
const q = this._getParamsFromUrl(origUrl);
|
|
|
|
const page = q.page || '1';
|
|
|
|
await this._makeScreenshotV2(this.page, `${this.path}/paymentServices_menu_${page}`, null);
|
|
|
|
const rows = $('#contentcolumn table tbody tr');
|
|
|
|
rows.each((i, elm) => {
|
|
const children = cheerio(elm).children();
|
|
let statutoryName = children.eq(0).text();
|
|
let tradeName = children.eq(1).text();
|
|
|
|
statutoryName = removeAccents.remove(statutoryName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
tradeName = removeAccents.remove(tradeName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
const id = (statutoryName === tradeName) ? statutoryName : `${statutoryName}-${tradeName}`;
|
|
|
|
let href = cheerio(children.eq(0)).find('a').attr('href');
|
|
href = href.concat('&locale=en_GB');
|
|
// this is the one we want.
|
|
|
|
this.paymentServices.links.push({ id, href });
|
|
});
|
|
|
|
const next = $('a.next').attr('href') || '';
|
|
|
|
if (next !== '')
|
|
this._findAndClick('a.next');
|
|
else
|
|
this.emit('startProcessingPaymentServices');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process WFTEG / Emoney services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processWFTEG() {
|
|
const nonWhiteSpace = /\W/g;
|
|
logger.info('WFTEG / EMoney Services');
|
|
await this._randomWait(this.page, 3, 5);
|
|
const origUrl = await this.page.url();
|
|
const pageUrl = url.parse(origUrl);
|
|
|
|
if (pageUrl.query === null)
|
|
// we need to select the correct item from the dropdown.
|
|
this.initiateWFTEG();
|
|
|
|
else {
|
|
// crack query
|
|
|
|
const body = await this.page.content();
|
|
const $ = cheerio.load(body);
|
|
|
|
const q = this._getParamsFromUrl(origUrl);
|
|
|
|
const page = q.page || '1';
|
|
|
|
await this._makeScreenshotV2(this.page, `${this.path}/eMoney_menu_${page}`, null);
|
|
|
|
const rows = $('#contentcolumn table tbody tr');
|
|
|
|
rows.each((i, elm) => {
|
|
const children = cheerio(elm).children();
|
|
let statutoryName = children.eq(0).text();
|
|
let tradeName = children.eq(1).text();
|
|
|
|
statutoryName = removeAccents.remove(statutoryName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
tradeName = removeAccents.remove(tradeName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
// const id = `${statutoryName}-${tradeName}`;
|
|
const id = (statutoryName === tradeName) ? statutoryName : `${statutoryName}-${tradeName}`;
|
|
|
|
let href = cheerio(children.eq(0)).find('a').attr('href');
|
|
href = href.concat('&locale=en_GB');
|
|
// this is the one we want.
|
|
|
|
this.emoneyServices.links.push({ id, href });
|
|
});
|
|
|
|
const next = $('a.next').attr('href') || '';
|
|
|
|
if (next !== '')
|
|
this._findAndClick('a.next');
|
|
else
|
|
this.emit('startProcessingEMoneyServices');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process WFTKF / Credit Services
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processWFTKF() {
|
|
try {
|
|
// Credit Institute
|
|
const nonWhiteSpace = /\W/g;
|
|
logger.info('WFTKF / Credit Services');
|
|
await this._randomWait(this.page, 3, 5);
|
|
const origUrl = await this.page.url();
|
|
const pageUrl = url.parse(origUrl);
|
|
|
|
if (pageUrl.query === null)
|
|
// we need to select the correct item from the dropdown.
|
|
this.initiateWFTKF();
|
|
|
|
else {
|
|
// crack query
|
|
|
|
const body = await this.page.content();
|
|
const $ = cheerio.load(body);
|
|
|
|
const q = this._getParamsFromUrl(origUrl);
|
|
|
|
const page = q.page || '1';
|
|
|
|
await this._makeScreenshotV2(this.page, `${this.path}/creditServices_menu_${page}`, null);
|
|
|
|
const rows = $('#contentcolumn table tbody tr');
|
|
|
|
rows.each((i, elm) => {
|
|
const children = cheerio(elm).children();
|
|
let statutoryName = children.eq(0).text();
|
|
let tradeName = children.eq(1).text();
|
|
|
|
statutoryName = removeAccents.remove(statutoryName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
tradeName = removeAccents.remove(tradeName.trim()).replace(nonWhiteSpace, '_');
|
|
|
|
const id = (statutoryName === tradeName) ? statutoryName : `${statutoryName}-${tradeName}`;
|
|
|
|
// const id = `${statutoryName}-${tradeName}`;
|
|
|
|
let href = cheerio(children.eq(0)).find('a').attr('href');
|
|
href = href.concat('&locale=en_GB');
|
|
// this is the one we want.
|
|
|
|
logger.debug({ id, href });
|
|
|
|
this.creditServices.links.push({ id, href });
|
|
});
|
|
|
|
const next = $('a.next').attr('href') || '';
|
|
|
|
if (next !== '')
|
|
this._findAndClick('a.next');
|
|
else
|
|
if (this.creditServices.step === 0) {
|
|
this.creditServices.step = 1;
|
|
await this._goto(this.credit);
|
|
}
|
|
else
|
|
this.emit('startProcessingCreditServices');
|
|
}
|
|
}
|
|
catch(e) {
|
|
await this._uploadError();
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processNewPage() {
|
|
// give the page a few seconds to settle
|
|
|
|
const failedUrls = ['chrome-error://chromewebdata/'];
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
if (failedUrls.indexOf(pageUrl.href) !== -1) {
|
|
this.emit('recover');
|
|
|
|
return;
|
|
}
|
|
|
|
switch (pageUrl.pathname) {
|
|
|
|
case '/en/supervision/public-register/WFTBI/index.jsp':
|
|
await this.processWFTBI();
|
|
break;
|
|
case '/en/supervision/public-register/WFTBI/detail.jsp':
|
|
await this.processWFTBIDetail();
|
|
break;
|
|
case '/en/supervision/public-register/WFTEG/index.jsp':
|
|
await this.processWFTEG();
|
|
break;
|
|
case '/en/supervision/public-register/WFTEG/detail.jsp':
|
|
await this.processWFTEGDetail();
|
|
break;
|
|
case '/en/supervision/public-register/WFTKF/index.jsp':
|
|
await this.processWFTKF();
|
|
break;
|
|
case '/en/supervision/public-register/WFTKF/detail.jsp':
|
|
await this.processWFTKFDetail();
|
|
break;
|
|
default:
|
|
await this._uploadError();
|
|
throw new Error(`Unknown page: ${pageUrl.href}`);
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async restart() {
|
|
logger.info(`Restarting ${this.modeTitles[this.mode]}`);
|
|
|
|
switch (this.mode) {
|
|
|
|
case 2:
|
|
this.emit('startProcessingCreditServices');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('startProcessingEMoneyServices');
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
this.emit('startProcessingPaymentServices');
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
* @private
|
|
*/
|
|
async __recover() {
|
|
logger.warn('*** RECONNECTING PAGE ***');
|
|
|
|
if (this.browserCrashed) await this._initBrowser(true);
|
|
|
|
await this._createBrowserPage();
|
|
this.page.on('domcontentloaded', () => {
|
|
this.processNewPage();
|
|
});
|
|
|
|
const timeout = 90000;
|
|
|
|
setTimeout(async() => {
|
|
logger.warn('Attempting recovery..');
|
|
|
|
await this.restart();
|
|
}, timeout);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async attachEvents() {
|
|
this.on('entityDone', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
this.emit('paymentServicesDone');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('emoneyServicesDone');
|
|
break;
|
|
|
|
case 2:
|
|
this.emit('creditServicesDone');
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('startProcessingPaymentServices', async () => {
|
|
this.paymentServices.items = this.paymentServices.links.length;
|
|
logger.info(`${this.paymentServices.items} items indexed`);
|
|
|
|
const newUrl = `https://www.dnb.nl/en/supervision/public-register/WFTBI/${this.paymentServices.links[this.paymentServices.step].href}`;
|
|
|
|
logger.debug('startProcessingPaymentServices', newUrl);
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('paymentServicesDone', async () => {
|
|
this.paymentServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/paymentServices.json`, { 'links': this.paymentServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/paymentServices.json`, this.paymentServices);
|
|
|
|
await this._goto(this.emoneyUrl);
|
|
});
|
|
|
|
this.on('startProcessingEMoneyServices', async () => {
|
|
this.mode = 1;
|
|
this.emoneyServices.items = this.emoneyServices.links.length;
|
|
logger.debug(`${this.emoneyServices.items} EMoney items indexed` );
|
|
logger.debug(this.emoneyServices.links[this.emoneyServices.step].href);
|
|
|
|
const newUrl = `https://www.dnb.nl/en/supervision/public-register/WFTEG/${this.emoneyServices.links[this.emoneyServices.step].href}`;
|
|
|
|
logger.debug('startProcessingEMoneyServices', newUrl);
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('emoneyServicesDone', async () => {
|
|
this.emoneyServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/emoneyServices.json`, { 'links':this.emoneyServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/emoneyServices.json`, this.emoneyServices);
|
|
|
|
await this._goto(this.credit);
|
|
});
|
|
|
|
this.on('startProcessingCreditServices', async () => {
|
|
this.mode = 2;
|
|
this.creditServices.items = this.creditServices.links.length;
|
|
logger.debug(`${this.creditServices.items} CI items indexed` );
|
|
logger.debug(this.creditServices.links[this.creditServices.step].href);
|
|
|
|
const newUrl = `https://www.dnb.nl/en/supervision/public-register/WFTKF/${this.creditServices.links[this.creditServices.step].href}`;
|
|
logger.debug('startProcessingCreditServices', newUrl);
|
|
await this._goto(newUrl);
|
|
});
|
|
|
|
this.on('creditServicesDone', async () => {
|
|
this.creditServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/creditServices.json`, { 'links':this.creditServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/creditServices.json`, this.creditServices);
|
|
|
|
this.emit('done');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
|
|
this.mode = 0;
|
|
try {
|
|
this.paymentServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'visited': false,
|
|
'done' : false
|
|
};
|
|
|
|
this.emoneyServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false
|
|
};
|
|
|
|
this.creditServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false
|
|
};
|
|
|
|
this.startPage = 'https://www.dnb.nl/en/supervision/public-register/WFTBI/index.jsp';
|
|
this.emoneyUrl = 'https://www.dnb.nl/en/supervision/public-register/WFTEG/index.jsp';
|
|
this.credit = 'https://www.dnb.nl/en/supervision/public-register/WFTKF/index.jsp';
|
|
|
|
//
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/NL/DNB`));
|
|
|
|
await this._doNonRepudiation(false, { 'sslWithPrefix': true }).catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
await this._initBrowser(true);
|
|
await this._createBrowserPage();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
if (this.eventNames().length === 2)
|
|
await this.attachEvents();
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
|
|
await this._goto(this.startPage, { 'waitUntil':'networkidle2' });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = NLScrape;
|