792 lines
22 KiB
JavaScript
792 lines
22 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const cheerio = require('cheerio');
|
|
const path = require('path');
|
|
const jsonfile = require('jsonfile');
|
|
const logger = require('log4js').getLogger('(LU)');
|
|
const url = require('url');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
function debounce(func, wait, immediate) {
|
|
var timeout;
|
|
|
|
return () => {
|
|
const context = this;
|
|
const args = arguments;
|
|
const later = () => {
|
|
timeout = null;
|
|
if (!immediate) func.apply(context, args);
|
|
};
|
|
var callNow = immediate && !timeout;
|
|
clearTimeout(timeout);
|
|
timeout = setTimeout(later, wait);
|
|
if (callNow) func.apply(context, args);
|
|
};
|
|
}
|
|
|
|
class LUScrape extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.setID('LU');
|
|
|
|
this.on('done', () => {
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._throttle(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
if (process.env.NODE_ENV === 'production')
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
|
|
this.debounceHandleIndexPage = debounce(() => {
|
|
// the index page sometimes reloads up to 3 times..
|
|
this.emit('handleIndexPage');
|
|
}, 7500);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleIndexPage() {
|
|
const thisUrl = await this.page.url();
|
|
const pageUrl = url.parse(thisUrl);
|
|
|
|
switch (pageUrl.hash) {
|
|
|
|
case '#Home':
|
|
case '#AdvancedSearch':
|
|
await this.indexPageHomeMode();
|
|
break;
|
|
|
|
case '#ResultResearch':
|
|
this.emit('handleEntityIndex');
|
|
break;
|
|
|
|
case '#DetailEntity':
|
|
|
|
this.emit('processEntity');
|
|
break;
|
|
case null:
|
|
this.emit('selectSearchManually');
|
|
break;
|
|
default:
|
|
logger.error('HASH NOT RECOGNISED');
|
|
logger.error(pageUrl);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async indexPageHomeMode() {
|
|
try{
|
|
const searchType = ['6', '7', '1'];
|
|
|
|
const bodys = ['#advancedsearch_paymentservicestype-body', '#advancedsearch_electronicmoneytype-body', '#advancedsearch_banktype-body'];
|
|
|
|
const bankInputs = ['#advancedsearch_bankgroup1_inputEl', '#advancedsearch_bankgroupA_inputEl', '#advancedsearch_bankgroupB_inputEl',
|
|
'#advancedsearch_bankgroupC_inputEl', '#advancedsearch_bankgroupD_inputEl', '#advancedsearch_bankgroup2_inputEl', '#advancedsearch_bankgroup3_inputEl'];
|
|
|
|
// click the advanced search button
|
|
await this.page.waitForSelector('#menu_advanced').then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
});
|
|
|
|
// click
|
|
await this.page.waitForSelector('#advancedsearch_type-bodyEl').then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
});
|
|
|
|
await this._randomWait(this.page, 2, 2);
|
|
|
|
// call the EXT function to set the advanced search mode..
|
|
|
|
await this.page.evaluate(x => {
|
|
return Ext.getCmp('advancedsearch_type').setValue(x);
|
|
}, searchType[this.mode]);
|
|
|
|
// Mode 0 & Mode 1 have a list of options which can be iterated easily
|
|
// Mode 2 requires a handful of different inputs to be clicked on
|
|
|
|
await this._microWait(this.page, 7);
|
|
|
|
if (this.mode === 0) {
|
|
await this.page.waitForSelector('label#advancedsearch_paymentinstitutionsgroup1-boxLabelEl').then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
});
|
|
|
|
await this._randomWait(this.page, 2, 2);
|
|
}
|
|
|
|
if (this.mode === 0 && this.mode === 1) {
|
|
const options = await this.page.$$(`${bodys[this.mode]} div.x-form-item-body input.x-form-checkbox-default`);
|
|
|
|
// click all the elements
|
|
logger.debug('options length', options.length);
|
|
|
|
for (const item of options)
|
|
await item.click({ 'delay':Scraper.notARobot() });
|
|
}
|
|
|
|
if (this.mode === 2)
|
|
for(const bI of bankInputs) {
|
|
const input = await this.page.$$(`${bodys[this.mode]} div.x-form-item-body input${bI}`);
|
|
await input[0].click({ 'delay':Scraper.notARobot() });
|
|
}
|
|
|
|
await this._randomWait(this.page, 1, 1);
|
|
// click the button
|
|
await this.page.waitForSelector('#advancedsearch_searchbutton').then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
});
|
|
|
|
// now wait for the results to load..
|
|
|
|
await this.page.waitForSelector('#title-1083-textEl').then(async () => {
|
|
logger.debug('Results loaded');
|
|
this.emit('pageChanged');
|
|
});
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
this.emit('recover');
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async entityIndexFirstPass(serviceObject) {
|
|
try{
|
|
const body = await this.page.content();
|
|
|
|
const $ = cheerio.load(body);
|
|
|
|
const pageDetails = await this.extractBarDetails($);
|
|
|
|
const { currentPageIndex, currentPageMax } = pageDetails;
|
|
|
|
if (((currentPageIndex <= currentPageMax) && (currentPageIndex === (serviceObject.step + 1))) || (currentPageIndex === 0 && currentPageMax === 0 )) {
|
|
serviceObject.currentIndexLength = pageDetails.currentIndexLength;
|
|
serviceObject.currentPageMax = currentPageMax;
|
|
|
|
serviceObject.visited = true;
|
|
serviceObject.currentIndex = url.parse(await this.page.url());
|
|
serviceObject.currentMetaIndex = 0;
|
|
}
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param $
|
|
* @returns {Promise<{currentIndexLength: number, maxPages: number, currentPageMax: number, page: number, currentPageIndex: number}>}
|
|
*/
|
|
async extractBarDetails($) {
|
|
try{
|
|
const numberExtract = /(\d+)/g;
|
|
|
|
const pagingBar = $('#resultresearch_paging-targetEl').children();
|
|
|
|
const page = parseInt($(pagingBar).eq(4).find('input').val(), 10);
|
|
|
|
const workMaxPages = this._cleanUp($(pagingBar).eq(5).text() );
|
|
const maxPages = parseInt(workMaxPages.match(numberExtract)[0], 10);
|
|
|
|
const rawDisplaying = this._cleanUp($(pagingBar).eq(pagingBar.length - 1).text());
|
|
|
|
const [ currentPageIndex, currentPageMax, currentIndexLength ] = rawDisplaying.match(numberExtract).map((s) => {
|
|
return parseInt(s, 10);
|
|
});
|
|
|
|
return { page, maxPages, currentPageIndex, currentPageMax, currentIndexLength };
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntityIndex(serviceObject) {
|
|
try{
|
|
const fields = ['type', 'name', 'address'];
|
|
|
|
logger.info(`Working on the ${this.modeTitles[this.mode]} index...`);
|
|
|
|
await this._randomWait(this.page, 1, 2);
|
|
|
|
if (serviceObject.visited === false) {
|
|
logger.debug('Preparing...');
|
|
serviceObject.restart = false;
|
|
await this.entityIndexFirstPass(serviceObject);
|
|
}
|
|
|
|
if (serviceObject.visited === true) {
|
|
serviceObject.currentMetaIndex = serviceObject.step % serviceObject.currentPageMax;
|
|
|
|
logger.debug('serviceObject.currentMetaIndex', serviceObject.currentMetaIndex);
|
|
|
|
if ((serviceObject.step > 0) && (serviceObject.currentMetaIndex === 0) && (serviceObject.restart === true)) {
|
|
logger.debug('Maxed out this page..');
|
|
|
|
// serviceObject.visited = false;
|
|
|
|
serviceObject.restart = false;
|
|
|
|
await this.page.waitForSelector('#button-1052').then(async (elm) => {
|
|
logger.debug('Proceeding to next index page..');
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
this.emit('pageChanged');
|
|
});
|
|
}
|
|
else {
|
|
logger.debug('dealing...');
|
|
|
|
serviceObject.restart = true;
|
|
|
|
logger.debug(`div#ResultResearchGridView table:nth-child(${serviceObject.currentMetaIndex + 1})`);
|
|
const wantedRow = await this.page.$$(`div#ResultResearchGridView table:nth-child(${serviceObject.currentMetaIndex + 1})`);
|
|
const htmlTable = await this.page.evaluate(el => el.outerHTML, wantedRow[0]);
|
|
|
|
const $ = cheerio.load(`<table>${htmlTable}</table>`);
|
|
|
|
const cells = $('div.x-grid-cell-inner');
|
|
|
|
serviceObject.current = {};
|
|
|
|
cells.each((index, item) => {
|
|
serviceObject.current[ fields[index] ] = this._cleanUp($(item).text());
|
|
});
|
|
|
|
if (typeof(serviceObject.current.name ) !== 'undefined' && serviceObject.current.name !== '') {
|
|
const fileName = this._makeFileName(serviceObject.current.name);
|
|
serviceObject.current.fileName = fileName;
|
|
serviceObject.current.filePath = `${this.path}/${fileName}`.substring(0, 240);
|
|
}
|
|
|
|
// logger.debug(serviceObject);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await wantedRow[0].click({ 'delay':97, 'clickCount': 2 });
|
|
|
|
await this._randomWait(this.page, 1, 1);
|
|
|
|
this.emit('pageChanged');
|
|
}
|
|
}
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param $
|
|
* @param html
|
|
* @param divId
|
|
* @param sequence
|
|
* @returns {Promise<Array>}
|
|
*/
|
|
async extractGridPanel($, html, divId, sequence) {
|
|
try{
|
|
const outObj = [];
|
|
|
|
const elms = $(html).find(`${divId} div.x-grid-item-container table`);
|
|
|
|
elms.each((index, itm) => {
|
|
const newObj = {};
|
|
for(const seqItem of sequence) {
|
|
const mclass = `.x-grid-cell-${seqItem[0]}`;
|
|
const rowElm = $(itm).find(mclass);
|
|
newObj[seqItem[1]] = this._cleanUp($(rowElm).text());
|
|
}
|
|
|
|
outObj.push(newObj);
|
|
});
|
|
|
|
return outObj;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractEntityDetails(html) {
|
|
try{
|
|
const details = {};
|
|
const detailSequence = [['detailEntity_type_inputEl', 'type'],
|
|
['detailEntity_number_inputEl', 'number'],
|
|
['detailEntity_name_inputEl', 'name'],
|
|
['detailEntity_address_inputEl', 'address'],
|
|
['detailEntity_startdate_inputEl', 'startdate'],
|
|
['detailEntity_closeddate_inputEl', 'closedate'],
|
|
['detailEntity_countrycode_inputEl', 'countrycode'],
|
|
['detailEntity_group_inputEl', 'group'],
|
|
['detailEntity_subgroup_inputEl', 'subgroup'],
|
|
['detailEntity_iciOutside_inputEl', 'iciOutside'],
|
|
['detailEntity_icilinked_inputEl', 'icilinked']
|
|
|
|
];
|
|
|
|
const gridPanels = [{
|
|
'id': 'autorisedStatus',
|
|
'sequence': [['detailEntity_autorisedStatus', 'autorisedStatus'],
|
|
['detailEntity_recentChangeautorisedStatus', 'recentChangeautorisedStatus'],
|
|
['detailEntity_recentChangeautorisedDate', 'recentChangeautorisedDate']],
|
|
'divId': '#detailEntity_autorisedStatusGridPanel-body'
|
|
}, {
|
|
'id': 'agentOrBranch',
|
|
'sequence': [['detailEntity_agentorbranchData', 'agentorbranchData'], ['detailEntity_agentData', 'agentData'],
|
|
['detailEntity_branchData', 'branchData'], ['detailEntity_agentorbranchCountry', 'agentorbranchCountry'],
|
|
['detailEntity_agentorbranchAddress', 'agentorbranchAddress'], ['detailEntity_agentorbranchlegalstatus', 'agentorbranchlegalstatus']],
|
|
'divId': '#detailEntity_agentorbranchGridPanel-body'
|
|
}, {
|
|
'id': 'iciOutsideTable',
|
|
'sequence': [['detailEntity_iciOutsideMember', 'iciOutsideMember']],
|
|
'divId': '#detailEntity_iciOutsideGridPanel-body'
|
|
}, {
|
|
'id': 'icilinkedTable',
|
|
'sequence': [['detailEntity_icilinkedname', 'icilinkedname'], ['detailEntity_icilinkedstartingdate', 'icilinkedstartingdate'],
|
|
['detailEntity_icilinkedendingdate', 'icilinkedendingdate']],
|
|
'divId': '#detailEntity_icilinkedGridPanel-body'
|
|
}, {
|
|
'id': 'othersStatus',
|
|
'sequence': [['detailEntity_otherStatus', 'otherStatus'], ['detailEntity_recentChangeotherStatus', 'recentChangeotherStatus'],
|
|
['detailEntity_recentChangeotherDate', 'recentChangeotherDate']],
|
|
'divId': '#detailEntity_othersStatusGridPanel-body'
|
|
}, {
|
|
'id': 'services',
|
|
'sequence': [['detailEntity_service', 'service'], ['detailEntity_recentChangeservice', 'recentChangeservice'],
|
|
['detailEntity_recentChangeserviceDate', 'recentChangeserviceDate']],
|
|
'divId': '#detailEntity_servicesGridPanel-body'
|
|
}, {
|
|
'id': 'ancillaryservices',
|
|
'sequence': [['detailEntity_ancillaryservice', 'ancillaryservice'],
|
|
['detailEntity_recentChangeancillaryservice', 'recentChangeancillaryservice'],
|
|
['detailEntity_recentChangeancillaryserviceDate', 'recentChangeancillaryserviceDate']],
|
|
'divId': '#detailEntity_ancillaryservicesGridPanel-body'
|
|
}, {
|
|
'id': 'prestataire',
|
|
'sequence': [['detailEntity_prestatairename', 'prestatairename'], ['detailEntity_prestataireheadoffice', 'prestataireheadoffice'],
|
|
['detailEntity_prestataireauthorisation', 'prestataireauthorisation']],
|
|
'divId': '#detailEntity_prestataireGridPanel-body'
|
|
}, {
|
|
'id': 'historicName',
|
|
'sequence': [['detailEntity_historicNameName', 'historicNameName'], ['detailEntity_historicNameDate', 'historicNameDate']],
|
|
'divId': '#detailEntity_historicNameGridPanel-body'
|
|
}];
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const mainDiv = $('#promoteDetailEntityPanel-innerCt');
|
|
|
|
for(const item of detailSequence) {
|
|
const i = $(mainDiv).find(`#${item[0]}`);
|
|
|
|
details[item[1]] = this._cleanUp($(i).text());
|
|
}
|
|
|
|
for( const grid of gridPanels)
|
|
details[grid.id] = await this.extractGridPanel($, mainDiv, grid.divId, grid.sequence);
|
|
|
|
return details;
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntity(serviceObject) {
|
|
try{
|
|
logger.info(`Process ${this.modeTitles[this.mode]} entity:${serviceObject.current.name}`);
|
|
logger.info(`Step ${serviceObject.step} of ${serviceObject.currentIndexLength}`);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const filePath = serviceObject.current.filePath;
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
|
|
const body = await this.page.content();
|
|
|
|
serviceObject.current.details = await this.extractEntityDetails(body);
|
|
|
|
this.emit('entityComplete');
|
|
|
|
logger.info('Entity complete...');
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<null>}
|
|
*/
|
|
async entityCompleter(serviceObject) {
|
|
try{
|
|
const filename = serviceObject.current.fileName;
|
|
|
|
const filePath = serviceObject.current.filePath;
|
|
const newObj = {};
|
|
|
|
logger.info(`Saving: ${filename}.json`);
|
|
await jsonfile.writeFile(`${filePath}.json`, serviceObject.current);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
newObj.fileName = `${filename}.json`;
|
|
newObj.name = serviceObject.current.name;
|
|
newObj.number = serviceObject.current.details.number || '';
|
|
|
|
serviceObject.links.push(newObj);
|
|
|
|
serviceObject.step++;
|
|
|
|
if (serviceObject.step < serviceObject.currentIndexLength) {
|
|
serviceObject.current = {};
|
|
await this.page.waitForSelector('a#detailEntity_backtolist').then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
this.emit('pageChanged');
|
|
});
|
|
}
|
|
|
|
else
|
|
this.emit('serviceDone');
|
|
}
|
|
catch( err) {
|
|
logger.error(err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleProcessEntity() {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processEntity(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processEntity(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processEntity(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleEntityComplete() {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.entityCompleter(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.entityCompleter(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.entityCompleter(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
async processNewPage() {
|
|
// give the page a few seconds to settle
|
|
// await this._randomWait(this.page, 3, 5);
|
|
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
if (pageUrl.href === 'chrome-error://chromewebdata/') {
|
|
logger.warn('Directed to: chrome-error://chromewebdata/');
|
|
this.emit('recover');
|
|
|
|
return;
|
|
}
|
|
|
|
if (pageUrl.href === 'about:blank') return;
|
|
|
|
if (pageUrl.pathname === '/index.html')
|
|
this.debounceHandleIndexPage();
|
|
|
|
else
|
|
if (process.env.NODE_ENV === 'production') {
|
|
await this._uploadError();
|
|
throw new Error(`Unknown page: ${pageUrl}`);
|
|
}
|
|
else {
|
|
logger.warn('processNewPage Fell through');
|
|
logger.warn('currentPage.location', pageUrl);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async attachEvents() {
|
|
// Need thiss for Angular / EXT based sites
|
|
this.on('pageChanged', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 1000));
|
|
|
|
this.on('entityComplete', () => {
|
|
this.handleEntityComplete();
|
|
});
|
|
|
|
this.on('handleIndexPage', () => {
|
|
this.handleIndexPage();
|
|
});
|
|
|
|
this.on('processEntity', () => {
|
|
this.handleProcessEntity();
|
|
});
|
|
|
|
this.on('serviceDone', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
this.emit('paymentServicesDone');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('emoneyServicesDone');
|
|
break;
|
|
|
|
case 2:
|
|
this.emit('creditServicesDone');
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('handleEntityIndex', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processEntityIndex(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processEntityIndex(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processEntityIndex(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('paymentServicesDone', async () => {
|
|
logger.warn('paymentServicesDone');
|
|
try{
|
|
this.paymentServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/paymentServices.json`, { 'links': this.paymentServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/paymentServices.json`, this.paymentServices);
|
|
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
await this._goto(this.emoneyServices.urls[0]);
|
|
this.emit('pageChanged');
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('emoneyServicesDone', async () => {
|
|
logger.warn('emoneyServicesDone');
|
|
try{
|
|
this.emoneyServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/emoneyServices.json`, { 'links':this.emoneyServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/emoneyServices.json`, this.emoneyServices);
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
await this._goto(this.creditServices.urls[0]);
|
|
this.emit('pageChanged');
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('creditServicesDone', async () => {
|
|
logger.warn('creditServicesDone');
|
|
try{
|
|
this.creditServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/creditServices.json`, { 'links':this.creditServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/creditServices.json`, this.creditServices);
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
this.emit('done');
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('selectSearchManually', async () => {
|
|
logger.debug('Locating advanced search button');
|
|
|
|
await this.page.waitForSelector('#menu_advanced', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':90 });
|
|
}).catch(() => {
|
|
logger.error('No advanced search button');
|
|
});
|
|
|
|
await this.page.waitForSelector('#promoteAdvancedSearchPanel-body', { 'visible':true, 'timeout':7500 }).then(async () => {
|
|
await this.indexPageHomeMode();
|
|
}).catch(() => {
|
|
logger.error('No advanced search form');
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
try {
|
|
this.mode = 0;
|
|
|
|
this.paymentServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://supervisedentities.apps.cssf.lu/index.html?language=en#AdvancedSearch'],
|
|
'sections' : [],
|
|
'sectionLinks' : []
|
|
};
|
|
|
|
this.emoneyServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://supervisedentities.apps.cssf.lu/index.html?language=en#AdvancedSearch'],
|
|
'sections' : [],
|
|
'sectionLinks' : []
|
|
};
|
|
|
|
this.creditServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false,
|
|
'started': false,
|
|
'urls': ['https://supervisedentities.apps.cssf.lu/index.html?language=en#AdvancedSearch'],
|
|
'sections' : [],
|
|
'sectionLinks' : []
|
|
};
|
|
|
|
this.startPage = this.paymentServices.urls[0];
|
|
this.emoneyUrl = this.emoneyServices.urls[0];
|
|
this.credit = this.creditServices.urls[0];
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/LU/CSSF`));
|
|
|
|
await this._doNonRepudiation().catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
await this._initBrowser();
|
|
await this._createBrowserPage();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 1000));
|
|
|
|
if (this.eventNames().length === 2)
|
|
await this.attachEvents();
|
|
|
|
await this._makeResponsive();
|
|
|
|
//
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
await this._goto(this.startPage, { 'waitUntil':'load' });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = LUScrape;
|
|
|