819 lines
22 KiB
JavaScript
819 lines
22 KiB
JavaScript
|
const Scraper = require('../helpers/scraper');
|
||
|
const cheerio = require('cheerio');
|
||
|
const path = require('path');
|
||
|
const jsonfile = require('jsonfile');
|
||
|
const removeAccents = require('remove-accents-diacritics');
|
||
|
const logger = require('log4js').getLogger('MT');
|
||
|
const url = require('url');
|
||
|
|
||
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
||
|
|
||
|
class MTScrape extends Scraper {
|
||
|
|
||
|
constructor() {
|
||
|
super();
|
||
|
this.id = 'MT';
|
||
|
|
||
|
this.on('done', () => {
|
||
|
this._done();
|
||
|
});
|
||
|
|
||
|
this.run = this._debounce(async () => {
|
||
|
await this.__run();
|
||
|
}, 5000);
|
||
|
|
||
|
if (process.env.NODE_ENV === 'production')
|
||
|
this._checkLock().then((l) => {
|
||
|
if(l)
|
||
|
this.run();
|
||
|
});
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param html
|
||
|
* @returns {Promise<{authorization, details}>}
|
||
|
* @constructor
|
||
|
*/
|
||
|
async OLDextractEntity(html) {
|
||
|
const $ = cheerio.load(html);
|
||
|
const details = {};
|
||
|
const authorization = {};
|
||
|
|
||
|
details.name = this._cleanUp($('#lblName').text());
|
||
|
|
||
|
const dlCells = $('div#pnlCommonDetails').children();
|
||
|
const superCells = $('#LHDetails span.fix-width-caption');
|
||
|
|
||
|
// #lblStatus
|
||
|
|
||
|
dlCells.each((index, item) => {
|
||
|
if ($(item).attr('id') === 'pnlRegDate') {
|
||
|
const itemText = this._cleanUp($(item).find('span').text()).split(/\s*:\s*/);
|
||
|
|
||
|
details[itemText[0]] = itemText[1];
|
||
|
}
|
||
|
else {
|
||
|
const current = this._cleanUp($(item).find('p').text()).replace(/\s*:\s*/, '');
|
||
|
|
||
|
details[current] = this._cleanUp($(item).find('span').text());
|
||
|
}
|
||
|
});
|
||
|
|
||
|
superCells.each((index, item) => {
|
||
|
const nextElm = $($(item).next());
|
||
|
|
||
|
const li = $(nextElm).find('li');
|
||
|
|
||
|
const thisId = this._cleanUp($(item).text()).replace(/\s*:\s*/, '');
|
||
|
|
||
|
authorization[thisId] = [];
|
||
|
if (li.length > 0)
|
||
|
li.each((index, item) => {
|
||
|
const auth = $(item).html().split(' - ');
|
||
|
|
||
|
auth[1] = this._cleanUp(auth[1]);
|
||
|
authorization[thisId].push(auth);
|
||
|
});
|
||
|
else {
|
||
|
const itemText = this._cleanUp($(nextElm).text());
|
||
|
authorization[thisId].push(itemText);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
return { details, authorization };
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param html
|
||
|
* @returns {Promise<{authorization, details}>}
|
||
|
*/
|
||
|
async extractEntityV2(html) {
|
||
|
const trimToColon = /^.*?(?=(:))/;
|
||
|
|
||
|
const $ = cheerio.load(html);
|
||
|
const details = {};
|
||
|
const authorization = {};
|
||
|
const errors = [];
|
||
|
|
||
|
details.name = this._cleanUp($('div#mainTitle > div').text());
|
||
|
|
||
|
const dlCells = $('table#tableLicenceResult tr');
|
||
|
const superCells = $('#LHDetails span.fix-width-caption');
|
||
|
|
||
|
let previousLabel = '';
|
||
|
dlCells.each((index, item) => {
|
||
|
const children = $(item).children();
|
||
|
|
||
|
const rawLabel = $(children).eq(0).text().match(trimToColon);
|
||
|
const itemValue = this._cleanUp($(children).eq(1).text().trim());
|
||
|
|
||
|
if (rawLabel !== null ) {
|
||
|
const itemLabel = this._cleanUp(rawLabel[0]);
|
||
|
|
||
|
details[itemLabel] = itemValue;
|
||
|
previousLabel = itemLabel;
|
||
|
}
|
||
|
else
|
||
|
details[previousLabel] = details[previousLabel].concat([itemValue]);
|
||
|
});
|
||
|
|
||
|
previousLabel = '';
|
||
|
superCells.each((index, item) => {
|
||
|
const nextElm = $($(item).next());
|
||
|
|
||
|
const children = $(nextElm).children();
|
||
|
|
||
|
if ($(children).length <= 1) {
|
||
|
const li = $(nextElm).find('li');
|
||
|
|
||
|
const thisId = this._cleanUp($(item).text()).replace(/\s*:\s*/, '');
|
||
|
|
||
|
authorization[thisId] = [];
|
||
|
if (li.length > 0)
|
||
|
li.each((index, item) => {
|
||
|
const auth = $(item).text().split(' - ');
|
||
|
|
||
|
auth[1] = this._cleanUp(auth[1]);
|
||
|
|
||
|
if (auth[1] !== '')
|
||
|
authorization[thisId].push(auth);
|
||
|
});
|
||
|
else {
|
||
|
const itemText = this._cleanUp($(nextElm).text());
|
||
|
authorization[thisId].push(itemText);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
logger.warn('Possible error in the HTML');
|
||
|
logger.warn($(nextElm).html());
|
||
|
errors.push($(nextElm).html());
|
||
|
}
|
||
|
});
|
||
|
|
||
|
const outObj = { details, authorization };
|
||
|
|
||
|
if (errors.length > 0)
|
||
|
outObj.errors = errors;
|
||
|
|
||
|
return outObj;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param serviceObject
|
||
|
* @returns {Promise<void>}
|
||
|
* @constructor
|
||
|
*/
|
||
|
async OLDprocessIndex(serviceObject) {
|
||
|
logger.info(`Building the ${this.modeTitles[this.mode]} index...`);
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
const pagingItem = await this.page.$$('#ctl00_cphMain_rgLicenceHolders_ctl00 > tfoot > tr > td > table > tbody > tr > td > div.rgWrap.rgInfoPart strong');
|
||
|
|
||
|
const maxPagesText = (pagingItem.length > 0) ? await this.page.evaluate(el => el.innerText, pagingItem[1]) : '0';
|
||
|
|
||
|
const maxPages = parseInt(maxPagesText, 10);
|
||
|
|
||
|
const links = await this.page.$$('#ctl00_cphMain_rgLicenceHolders_ctl00 > tbody > tr > td> a');
|
||
|
|
||
|
for (const item of links) {
|
||
|
const id = await this.page.evaluate(el => el.innerText, item);
|
||
|
const href = await this.page.evaluate(el => el.href, item);
|
||
|
|
||
|
const params = this._getParamsFromUrl(href);
|
||
|
|
||
|
serviceObject.links.push({ id, href, 'entId': params.id, 'metaStep': serviceObject.indexMetaStep });
|
||
|
}
|
||
|
|
||
|
if (serviceObject.indexStep < (maxPages - 1) ) {
|
||
|
serviceObject.indexStep++;
|
||
|
await this._findAndClick('input.rgPageNext');
|
||
|
}
|
||
|
else
|
||
|
this.emit('indexdone');
|
||
|
}
|
||
|
|
||
|
async processIndexV2(serviceObject) {
|
||
|
// #tableResult span
|
||
|
const numberRegEx = /\d+/;
|
||
|
|
||
|
logger.debug('+ processIndexV2');
|
||
|
logger.info(`Building the ${this.modeTitles[this.mode]} index...`);
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
const links = await this.page.$$('#tableResult span');
|
||
|
|
||
|
for (const item of links) {
|
||
|
const id = await this.page.evaluate(el => el.innerText, item);
|
||
|
const href = await this.page.evaluate(el => el.getAttribute('onclick'), item);
|
||
|
|
||
|
serviceObject.links.push({ id, 'entId': href.match(numberRegEx)[0], 'metaStep': serviceObject.indexMetaStep });
|
||
|
}
|
||
|
|
||
|
this.emit('indexdone');
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param serviceObject
|
||
|
* @returns {Promise<void>}
|
||
|
* @constructor
|
||
|
*/
|
||
|
async OLDinitiateIndex(serviceObject) {
|
||
|
logger.debug('initiateIndex');
|
||
|
const matched = { 'left':false, 'right':false };
|
||
|
// first time around.
|
||
|
// need to kick off the index correctly..
|
||
|
|
||
|
await this._findAndClick('#ctl00_cphMain_RadComboBox1');
|
||
|
|
||
|
await this._randomWait(this.page, 2, 3);
|
||
|
const leftOptions = await this.page.$$('#ctl00_cphMain_RadComboBox1_DropDown > div > ul.rcbList li');
|
||
|
const wantedOption = serviceObject.indexMeta[serviceObject.indexMetaStep];
|
||
|
|
||
|
for (const item of leftOptions) {
|
||
|
const text = await this.page.evaluate(el => el.innerText, item);
|
||
|
|
||
|
if (wantedOption.indexOf(text) !== -1) {
|
||
|
await item.click({ 'delay':95 });
|
||
|
matched.left = true;
|
||
|
|
||
|
// this element can take a while to reload..
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
await this._randomWait(this.page, 7, 9);
|
||
|
await this._findAndClick('#ctl00_cphMain_RadComboBox2_Input');
|
||
|
await this._randomWait(this.page, 2, 3);
|
||
|
|
||
|
const rightOptions = await this.page.$$('#ctl00_cphMain_RadComboBox2_DropDown > div > ul.rcbList li');
|
||
|
for (const item of rightOptions) {
|
||
|
const text = await this.page.evaluate(el => el.innerText, item);
|
||
|
if (text === wantedOption[1]) {
|
||
|
matched.right = true;
|
||
|
await item.click({ 'delay':95 });
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Wait for items to setttle
|
||
|
await this._randomWait(this.page, 2, 3);
|
||
|
|
||
|
if (matched.left && matched.right) {
|
||
|
serviceObject.started = true;
|
||
|
await this._findAndClick('#cphMain_btnSearch2');
|
||
|
}
|
||
|
|
||
|
else
|
||
|
logger.error('Not fully matched', matched);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reworked for site reskin
|
||
|
* @param serviceObject
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async initiateIndexV2(serviceObject) {
|
||
|
logger.debug('initiateIndexV2');
|
||
|
const matched = { 'left':false, 'right':false };
|
||
|
// first time around.
|
||
|
// need to kick off the index correctly..
|
||
|
|
||
|
// select#select1
|
||
|
|
||
|
const leftOptions = await this.page.$$('select#select1 option');
|
||
|
const wantedOption = serviceObject.indexMeta[serviceObject.indexMetaStep];
|
||
|
|
||
|
for (const item of leftOptions) {
|
||
|
const rawText = await this.page.evaluate(el => el.innerText, item);
|
||
|
const value = await this.page.evaluate(el => el.value, item);
|
||
|
|
||
|
const text = this._cleanUp(rawText);
|
||
|
|
||
|
if (wantedOption.indexOf(text) !== -1) {
|
||
|
await this.page.select('select#select1', value);
|
||
|
|
||
|
matched.left = true;
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Wait for items to setttle
|
||
|
await this._randomWait(this.page, 2, 3);
|
||
|
|
||
|
const rightOptions = await this.page.$$('select#select2 option');
|
||
|
for (const item of rightOptions) {
|
||
|
const rawText = await this.page.evaluate(el => el.innerText, item);
|
||
|
const value = await this.page.evaluate(el => el.value, item);
|
||
|
|
||
|
const text = this._cleanUp(rawText);
|
||
|
|
||
|
if (text === wantedOption[1]) {
|
||
|
matched.right = true;
|
||
|
await this.page.select('select#select2', value);
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
await this._randomWait(this.page, 2, 2);
|
||
|
|
||
|
if (matched.left && matched.right) {
|
||
|
serviceObject.started = true;
|
||
|
await this._findAndClick('button.searchButtonAdv');
|
||
|
|
||
|
this.emit('processIndex');
|
||
|
}
|
||
|
|
||
|
else
|
||
|
logger.error('Not fully matched', matched);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param serviceObject
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async buildIndex(serviceObject) {
|
||
|
logger.debug('buildIndex');
|
||
|
if (!serviceObject.started)
|
||
|
await this.initiateIndexV2(serviceObject);
|
||
|
else
|
||
|
await this.processIndexV2(serviceObject);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @param serviceObject
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async nextItem(serviceObject) {
|
||
|
const entId = serviceObject.links[serviceObject.step].entId;
|
||
|
logger.debug('nextItem', entId);
|
||
|
|
||
|
await this.newLoadLicenceHolder(entId);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async indexRedirector() {
|
||
|
if (!this.processing)
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
await this.buildIndex(this.paymentServices);
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
await this.buildIndex(this.emoneyServices);
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
await this.buildIndex(this.creditServices);
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
|
||
|
else
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
await this.nextItem(this.paymentServices);
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
await this.nextItem(this.emoneyServices);
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
await this.nextItem(this.creditServices);
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async processEntityDetails(serviceObject) {
|
||
|
const noWhiteSpace = /\W/g;
|
||
|
|
||
|
const { id, entId } = serviceObject.links[serviceObject.step];
|
||
|
|
||
|
logger.info(`Process ${this.modeTitles[this.mode]} entity ${serviceObject.step}:${id}`);
|
||
|
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
const entity = removeAccents.remove(id.trim());
|
||
|
|
||
|
const filename = [this.modePrefix[this.mode], entity.replace(noWhiteSpace, '_'), `_${entId}`].join('');
|
||
|
|
||
|
const filePath = `${this.path}/${filename}`.substring(0, 240);
|
||
|
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
||
|
|
||
|
const body = await this.page.content();
|
||
|
|
||
|
const details = await this.extractEntityV2(body);
|
||
|
|
||
|
await jsonfile.writeFile(`${filePath}.json`, { details });
|
||
|
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
serviceObject.links[serviceObject.step].filename = `${filename}.json`;
|
||
|
serviceObject.step++;
|
||
|
|
||
|
if (serviceObject.step < serviceObject.items)
|
||
|
|
||
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
||
|
|
||
|
else
|
||
|
this.emit('serviceDone');
|
||
|
}
|
||
|
|
||
|
// processIndex
|
||
|
|
||
|
async handleProcessIndex() {
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
await this.processIndexV2(this.paymentServices);
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
await this.processIndexV2(this.emoneyServices);
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
await this.processIndexV2(this.creditServices);
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async processRedirector() {
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
await this.processEntityDetails(this.paymentServices);
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
await this.processEntityDetails(this.emoneyServices);
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
await this.processEntityDetails(this.creditServices);
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async processNewPage() {
|
||
|
// give the ajax page a few seconds to settle
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
|
||
|
const pageUrl = url.parse(await this.page.url());
|
||
|
|
||
|
if (pageUrl.href === 'chrome-error://chromewebdata/') {
|
||
|
logger.warn('Directed to: chrome-error://chromewebdata/');
|
||
|
this.emit('recover');
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
logger.debug('processNewPage', pageUrl.href);
|
||
|
|
||
|
switch (pageUrl.pathname) {
|
||
|
|
||
|
case '/pages/licenceholders.aspx':
|
||
|
case '/financial-services-register/':
|
||
|
await this.indexRedirector();
|
||
|
break;
|
||
|
|
||
|
case'/pages/licenceholder.aspx':
|
||
|
case '/financial-services-register/result/':
|
||
|
await this.processRedirector();
|
||
|
break;
|
||
|
case '/en/our-registers/company-register/gransoverskridandehandel/':
|
||
|
await this.crossBorderRedirector();
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
if (process.env.NODE_ENV) {
|
||
|
await this._uploadError();
|
||
|
this.emit('backoff');
|
||
|
throw new Error(`Unknown page: ${pageUrl.href}`);
|
||
|
}
|
||
|
else {
|
||
|
logger.warn('processNewPage Fell through');
|
||
|
logger.warn('pathName', pathName);
|
||
|
logger.warn('currentPage.location', pageUrl);
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces the goto
|
||
|
* @param id
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async newLoadLicenceHolder(id) {
|
||
|
// loadLicenceHolder(10966)
|
||
|
const formElm = await this.page.$('form#loadHolder');
|
||
|
|
||
|
logger.debug('loadLicenceHolder', id);
|
||
|
|
||
|
await this.page.evaluate(x => {
|
||
|
x.target = '_self';
|
||
|
}, formElm);
|
||
|
|
||
|
await this._microWait(this.page, 5);
|
||
|
|
||
|
await this.page.evaluate(x => {
|
||
|
return loadLicenceHolder(x);
|
||
|
}, id);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async attachEvents() {
|
||
|
this.on('processIndex', async () => {
|
||
|
this.handleProcessIndex();
|
||
|
});
|
||
|
//
|
||
|
|
||
|
this.on('pageChanged', this._debounce(async () => {
|
||
|
this.processNewPage().catch((err) => {
|
||
|
logger.error('processNewPage fail', err);
|
||
|
});
|
||
|
}, 1000));
|
||
|
|
||
|
this.on('psindexdone', async () => {
|
||
|
this.paymentServices.indexMetaStep++;
|
||
|
|
||
|
if (this.paymentServices.indexMetaStep < this.paymentServices.indexMeta.length) {
|
||
|
logger.info('Resetting for next meta index...');
|
||
|
// next..
|
||
|
this.paymentServices.started = false;
|
||
|
this.paymentServices.indexStep = 0;
|
||
|
|
||
|
await this._goto(this.startPage);
|
||
|
}
|
||
|
else {
|
||
|
this.paymentServices.items = this.paymentServices.links.length;
|
||
|
|
||
|
logger.info(`${this.paymentServices.items} items indexed`);
|
||
|
|
||
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
||
|
|
||
|
logger.warn('GO THROUGH THE NEW LIST!!!!');
|
||
|
|
||
|
this.processing = true;
|
||
|
|
||
|
await this._randomWait(this.page, 2, 2, 'New page transition');
|
||
|
}
|
||
|
});
|
||
|
|
||
|
this.on('emindexdone', async () => {
|
||
|
this.emoneyServices.indexMetaStep++;
|
||
|
|
||
|
if (this.emoneyServices.indexMetaStep < this.emoneyServices.indexMeta.length) {
|
||
|
logger.info('Resetting for next meta index...');
|
||
|
// next..
|
||
|
this.emoneyServices.started = false;
|
||
|
this.emoneyServices.indexStep = 0;
|
||
|
|
||
|
await this._goto(this.startPage);
|
||
|
}
|
||
|
else {
|
||
|
this.emoneyServices.items = this.emoneyServices.links.length;
|
||
|
logger.info(`${this.emoneyServices.items} items indexed`);
|
||
|
|
||
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
||
|
|
||
|
logger.warn('GO THROUGH THE NEW LIST!!!!');
|
||
|
|
||
|
this.processing = true;
|
||
|
|
||
|
await this._randomWait(this.page, 2, 2, 'New page transition');
|
||
|
}
|
||
|
});
|
||
|
|
||
|
this.on('ciindexdone', async () => {
|
||
|
this.creditServices.indexMetaStep++;
|
||
|
|
||
|
if (this.creditServices.indexMetaStep < this.creditServices.indexMeta.length) {
|
||
|
logger.info('Resetting for next meta index...');
|
||
|
// next..
|
||
|
this.creditServices.started = false;
|
||
|
this.creditServices.indexStep = 0;
|
||
|
|
||
|
await this._goto(this.startPage);
|
||
|
}
|
||
|
else {
|
||
|
this.creditServices.items = this.creditServices.links.length;
|
||
|
logger.info(`${this.creditServices.items} items indexed`);
|
||
|
|
||
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
||
|
|
||
|
logger.warn('GO THROUGH THE NEW LIST!!!!');
|
||
|
|
||
|
this.processing = true;
|
||
|
|
||
|
await this._randomWait(this.page, 2, 2, 'New page transition');
|
||
|
}
|
||
|
});
|
||
|
|
||
|
this.on('indexdone', async () => {
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
this.emit('psindexdone');
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
this.emit('emindexdone');
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
this.emit('ciindexdone');
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
});
|
||
|
|
||
|
this.on('serviceDone', async () => {
|
||
|
switch (this.mode) {
|
||
|
|
||
|
case 0:
|
||
|
this.emit('paymentServicesDone');
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
this.emit('emoneyServicesDone');
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
this.emit('creditServicesDone');
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
});
|
||
|
|
||
|
this.on('paymentServicesDone', async () => {
|
||
|
this.paymentServices.done = true;
|
||
|
jsonfile.writeFileSync(`${this.path}/paymentServices.json`, { 'links': this.paymentServices.links });
|
||
|
jsonfile.writeFileSync(`${this.debugPath}/paymentServices.json`, this.paymentServices);
|
||
|
this.mode++;
|
||
|
this.processing = false;
|
||
|
|
||
|
await this._goto(this.emoneyServices.urls[0]);
|
||
|
});
|
||
|
|
||
|
this.on('emoneyServicesDone', async () => {
|
||
|
this.emoneyServices.done = true;
|
||
|
jsonfile.writeFileSync(`${this.path}/emoneyServices.json`, { 'links':this.emoneyServices.links });
|
||
|
jsonfile.writeFileSync(`${this.debugPath}/emoneyServices.json`, this.emoneyServices);
|
||
|
|
||
|
this.mode++;
|
||
|
this.processing = false;
|
||
|
|
||
|
await this._goto(this.creditServices.urls[0]);
|
||
|
});
|
||
|
|
||
|
this.on('creditServicesDone', async () => {
|
||
|
this.creditServices.done = true;
|
||
|
jsonfile.writeFileSync(`${this.path}/creditServices.json`, { 'links':this.creditServices.links });
|
||
|
jsonfile.writeFileSync(`${this.debugPath}/creditServices.json`, this.creditServices);
|
||
|
|
||
|
this.emit('done');
|
||
|
});
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* @returns {Promise<void>}
|
||
|
*/
|
||
|
async start() {
|
||
|
super._start();
|
||
|
try {
|
||
|
this.mode = 0;
|
||
|
this.processing = false;
|
||
|
|
||
|
this.modeTitles = ['Payment Service', 'EMoney', 'Credit Services'];
|
||
|
|
||
|
this.paymentServices = {
|
||
|
'items': 0,
|
||
|
'links': [],
|
||
|
'step': 46,
|
||
|
'indexStep': 0,
|
||
|
'indexMetaStep':0,
|
||
|
'visited': false,
|
||
|
'done' : false,
|
||
|
'started': false,
|
||
|
'urls': ['https://www.mfsa.com.mt/pages/licenceholders.aspx'],
|
||
|
'indexMeta' : [
|
||
|
['Financial Institutions',
|
||
|
'Financial Institutions licensed to undertake payment services under the 2nd Schedule to the Financial Institutions Act (Payment Institutions)'],
|
||
|
['Financial Institutions',
|
||
|
'Local Financial Institutions licensed to undertake activities under the 2nd Schedule to the Financial Institutions Act (Payment Institutions) exercising the freedom to provide services outside Malta'],
|
||
|
['Financial Institutions',
|
||
|
'Local Financial Institutions licensed to undertake activities under the 2nd Schedule to the Financial Institutions Act (Payment Institutions) exercising the freedom to establish a branch outside Malta']
|
||
|
|
||
|
]
|
||
|
};
|
||
|
|
||
|
this.emoneyServices = {
|
||
|
'items': 0,
|
||
|
'links': [],
|
||
|
'step': 0,
|
||
|
'indexStep': 0,
|
||
|
'indexMetaStep':0,
|
||
|
'visited': false,
|
||
|
'done' : false,
|
||
|
'started': false,
|
||
|
'urls': ['https://www.mfsa.com.mt/pages/licenceholders.aspx'],
|
||
|
'indexMeta' : [
|
||
|
['Financial Institutions',
|
||
|
'Financial Institutions licenced to issue electronic money under the 3rd Schedule to the Financial Institutions Act (Electronic Money Institutions)'],
|
||
|
['Financial Institutions',
|
||
|
'Local Financial Institutions licensed to issue electronic money under the 3rd Schedule to the Financial Institutions Act (Electronic Money Institutions) exercising the freedom to provide services outside Malta'],
|
||
|
['Financial Institutions',
|
||
|
'Local Financial Institutions licensed to issue electronic money under the 3rd Schedule to the Financial Institutions Act (Electronic Money Institutions) exercising the freedom to establish a branch outside Malta']
|
||
|
|
||
|
]
|
||
|
};
|
||
|
|
||
|
this.creditServices = {
|
||
|
'items': 0,
|
||
|
'links': [],
|
||
|
'step': 0,
|
||
|
'indexStep': 0,
|
||
|
'indexMetaStep':0,
|
||
|
'visited': false,
|
||
|
'done' : false,
|
||
|
'started': false,
|
||
|
'urls': ['https://www.mfsa.com.mt/pages/licenceholders.aspx'],
|
||
|
'indexMeta' : [
|
||
|
['Credit Institutions',
|
||
|
'Credit Institutions'],
|
||
|
['Credit Institutions',
|
||
|
'Freedom of Services and Establishments - Exercise of the freedom to provide services outside Malta'],
|
||
|
['Credit Institutions',
|
||
|
'Freedom of Services and Establishments - Exercise of the freedom to set up an establishment outside Malta']
|
||
|
|
||
|
]
|
||
|
};
|
||
|
|
||
|
this.startPage = this.paymentServices.urls[0];
|
||
|
this.emoneyUrl = 'https://www.bafin.de/DE/PublikationenDaten/Datenbanken/EGeldInstitute/e-geld-institute_node.html';
|
||
|
this.credit = 'https://portal.mvp.bafin.de/database/InstInfo/sucheForm.do?locale=en_GB';
|
||
|
|
||
|
this.setPath(path.resolve(`${__dirname }/../artefacts/MT/MFSA`));
|
||
|
|
||
|
await this._doNonRepudiation().catch((err) => {
|
||
|
logger.warn(err);
|
||
|
});
|
||
|
|
||
|
await this._initBrowser();
|
||
|
await this._createBrowserPage();
|
||
|
|
||
|
this.page.on('domcontentloaded', this._debounce(async () => {
|
||
|
this.processNewPage().catch((err) => {
|
||
|
logger.error('processNewPage fail', err);
|
||
|
});
|
||
|
}, 2500));
|
||
|
|
||
|
if (this.eventNames().length === 2)
|
||
|
await this.attachEvents();
|
||
|
|
||
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
||
|
await this._goto(this.startPage, { 'waitUntil':'networkidle2' });
|
||
|
|
||
|
await this._randomWait(this.page, 3, 5);
|
||
|
}
|
||
|
catch(e) {
|
||
|
throw new Error(e);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async __run() {
|
||
|
await this.start();
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
module.exports = MTScrape;
|