839 lines
22 KiB
JavaScript
839 lines
22 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const cheerio = require('cheerio');
|
|
const path = require('path');
|
|
const jsonfile = require('jsonfile');
|
|
const logger = require('log4js').getLogger('(SK)');
|
|
const url = require('url');
|
|
const camelCase = require('camelcase');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
class SKScrape extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.setID('SK');
|
|
|
|
this.on('done', () => {
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._throttle(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
if (process.env.NODE_ENV === 'production')
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<boolean>}
|
|
*/
|
|
async checkChangeLanguage() {
|
|
const languageIcon = await this.page.$$('#SubjectForm > div > div.panel-heading.sufit > table > tbody > tr > td:nth-child(2) > h3 > span > a > img');
|
|
|
|
if (languageIcon.length > 0) {
|
|
const value = await this.page.evaluate(el => el.getAttribute('src'), languageIcon[0]);
|
|
|
|
if (value === '/static/icon/ico_en.gif') {
|
|
// this needs a click
|
|
logger.info('Changing language to English..');
|
|
await this._findAndClick('#SubjectForm > div > div.panel-heading.sufit > table > tbody > tr > td:nth-child(2) > h3 > span > a ');
|
|
|
|
return true;
|
|
//
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleIntroPage() {
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
// Clear cookie bar
|
|
await this.page.waitForSelector('a.btnCookieAccept', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(() => {
|
|
logger.info('No cookie bar');
|
|
});
|
|
|
|
if (!this.inProgress && pageUrl.query === null) {
|
|
// fix language before going on
|
|
|
|
const changedLanguage = await this.checkChangeLanguage();
|
|
|
|
if (!changedLanguage) {
|
|
await this._randomWait(this.page, 3, 5, 'handleIntroPage');
|
|
|
|
await this._findAndClick(' body > div.container > div:nth-child(5) > div:nth-child(1) > div > div');
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processMainMenu(serviceObject) {
|
|
const wantedItem = serviceObject.sections[serviceObject.indexStep];
|
|
|
|
const expandables = ['#Categories > tbody:nth-child(4) > tr.level0.categctrl.categctrl1',
|
|
'#Categories > tbody:nth-child(4) > tr.level0.categctrl.categctrl2',
|
|
'#Categories > tbody:nth-child(4) > tr.level0.categctrl.categctrl3',
|
|
'#Categories > tbody:nth-child(4) > tr.level0.categctrl.categctrl4'
|
|
];
|
|
|
|
for (const item of expandables)
|
|
await this.page.$eval(item, e => e.click({ 'delay':90 }));
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const wantedRow = `[data-sector="${wantedItem}"]`;
|
|
|
|
logger.debug('Looking for', wantedRow);
|
|
|
|
await this.page.waitForSelector(wantedRow, { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(() => {
|
|
logger.warn('processMainMenu did not find what it was looking for!');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async entityIndexFirstPass(serviceObject) {
|
|
// breaks up `Showing 1 to 10 of 12 entries`
|
|
const breaker = /(\d+)/g;
|
|
|
|
const body = await this.page.content();
|
|
|
|
const $ = cheerio.load(body);
|
|
|
|
const subjectsInfo = $('#Subjects_info').text();
|
|
|
|
const brokenString = subjectsInfo.match(breaker);
|
|
|
|
const currentPageIndex = parseInt(brokenString[0], 10);
|
|
const currentPageMax = parseInt(brokenString[1], 10);
|
|
|
|
// The site returns the index from the last page when you select a different view.
|
|
// This should be watched and can cause a problem
|
|
|
|
logger.debug('subjectsInfo', subjectsInfo);
|
|
logger.debug('Step', serviceObject.step);
|
|
logger.debug('currentPageIndex', currentPageIndex);
|
|
|
|
if (((currentPageIndex <= currentPageMax) && (currentPageIndex === (serviceObject.step + 1))) || (currentPageIndex === 0 && currentPageMax === 0 )) {
|
|
serviceObject.currentIndexLength = parseInt(brokenString[2], 10);
|
|
serviceObject.currentPageMax = currentPageMax;
|
|
|
|
serviceObject.visited = true;
|
|
serviceObject.currentIndex = url.parse(await this.page.url());
|
|
serviceObject.currentMetaIndex = 0;
|
|
}
|
|
else {
|
|
logger.info('Need to click previous');
|
|
const nextButton = await this.page.$$('#Subjects_previous');
|
|
|
|
const buttonClasses = await this.page.$eval('#Subjects_previous', e => e.getAttribute('class'));
|
|
|
|
if (buttonClasses.split(' ').indexOf('disabled') === -1) {
|
|
// we need a click..
|
|
nextButton[0].click({ 'delay':90 });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
serviceObject.visited = false;
|
|
this.emit('entityIndex');
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntityIndex(serviceObject) {
|
|
const fields = ['referenceNumber', 'businessName', 'address', 'start', 'end', 'reason'];
|
|
|
|
const mouseDownDuration = Scraper.notARobot();
|
|
if (serviceObject.visited === false) {
|
|
logger.debug('Preparing...');
|
|
|
|
await this.page.waitForSelector('table#Subjects', { 'visible':true }).then(async () => {
|
|
await this.entityIndexFirstPass(serviceObject);
|
|
}).catch(() => {
|
|
logger.error('Table failed to render');
|
|
});
|
|
}
|
|
|
|
if (serviceObject.visited === true) {
|
|
serviceObject.currentMetaIndex = serviceObject.step % 10;
|
|
|
|
if ((serviceObject.step ) >= serviceObject.currentPageMax) {
|
|
const nextButton = await this.page.$$('#Subjects_next');
|
|
|
|
const buttonClasses = await this.page.$eval('#Subjects_next', e => e.getAttribute('class'));
|
|
|
|
if (buttonClasses.split(' ').indexOf('disabled') === -1) {
|
|
// we need a click..
|
|
nextButton[0].click({ 'delay':mouseDownDuration });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
serviceObject.visited = false;
|
|
this.emit('entityIndex');
|
|
}
|
|
else {
|
|
logger.debug('I think we are done here...');
|
|
this.emit('serviceDone');
|
|
}
|
|
}
|
|
|
|
else {
|
|
await this.page.waitForSelector('#Subjects > tbody');
|
|
|
|
const wantedRow = await this.page.$$(`#Subjects > tbody > tr:nth-child(${serviceObject.currentMetaIndex + 1})`);
|
|
const htmlRow = await this.page.evaluate(el => el.outerHTML, wantedRow[0]);
|
|
|
|
const $ = cheerio.load(`<table>${htmlRow}</table>`);
|
|
|
|
const cells = $('td');
|
|
|
|
serviceObject.current = {};
|
|
|
|
cells.each((index, item) => {
|
|
serviceObject.current[ fields[index] ] = $(item).text();
|
|
});
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
await wantedRow[0].click({ 'delay':mouseDownDuration });
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param $
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntityDetailBasicDetails($) {
|
|
const newObj = {};
|
|
|
|
const rows = $('tr');
|
|
|
|
rows.each((index, elm) => {
|
|
const children = $(elm).children();
|
|
|
|
const preLabel = $(children).eq(0).text();
|
|
const label = camelCase(this._cleanUp(preLabel.replace(':', '')));
|
|
|
|
newObj[label] = this._cleanUp($(children).eq(1).text());
|
|
});
|
|
|
|
return newObj;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param $
|
|
* @param elm
|
|
*/
|
|
decodeTable($, elm) {
|
|
const rows = $(elm).find('table.details tr');
|
|
const obj = {};
|
|
|
|
rows.each( (index, elm) => {
|
|
const children = $(elm).children();
|
|
|
|
const labelClass = $(children[0]).attr('class');
|
|
const label = camelCase(this._cleanUp($(children[0]).text().replace(':', '').replace(',', '')));
|
|
|
|
const contents = this._cleanUp($(children[1]).text().replace(/(Hide|View)\s*/, ''));
|
|
|
|
if (typeof(labelClass) !== 'undefined' && labelClass === 'dlabel')
|
|
obj[label] = contents;
|
|
});
|
|
|
|
return obj;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param $
|
|
* @returns {Promise<Array>}
|
|
*/
|
|
async processEntityDetailTableV2($) {
|
|
// take the first tbody as this is the main one...
|
|
const fields = [ 'license', 'start', 'end', 'reason'];
|
|
const outData = [];
|
|
let newObj = {};
|
|
|
|
let topLevel = '';
|
|
let midLevel = {};
|
|
let level1ID = '';
|
|
|
|
const tbody = $('tbody')[0];
|
|
const children = $(tbody).children();
|
|
|
|
children.each((index, item) => {
|
|
const itemClasses = $(item).attr('class').split(' ');
|
|
if ((itemClasses.indexOf('level0') !== -1) && (itemClasses.indexOf('sublicctrl') !== -1)) {
|
|
// TOP LEVEL
|
|
const itemChildren = $(item).children();
|
|
|
|
if (Object.keys(newObj).length !== 0) {
|
|
// push this object into the list
|
|
outData.push(newObj);
|
|
newObj = {};
|
|
}
|
|
|
|
topLevel = camelCase(this._cleanUp($(itemChildren[0]).text().replace(',', '')));
|
|
midLevel = {};
|
|
|
|
itemChildren.each((ci, celm) => {
|
|
midLevel[fields[ci]] = this._cleanUp($(celm).text());
|
|
});
|
|
|
|
midLevel.detail = [];
|
|
newObj[topLevel] = Object.assign({}, midLevel);
|
|
}
|
|
|
|
//
|
|
|
|
if ((itemClasses.indexOf('level0') !== -1) && (itemClasses.indexOf('details') !== -1))
|
|
// TOP LEVEL - DETAILS
|
|
newObj[topLevel].detail.push(this.decodeTable($, item));
|
|
|
|
//
|
|
|
|
if ((itemClasses.indexOf('level1') !== -1) && (itemClasses.indexOf('details') === -1)) {
|
|
// LEVEL 1
|
|
const itemChildren = $(item).children();
|
|
level1ID = camelCase(this._cleanUp($(itemChildren[0]).text()));
|
|
|
|
newObj[topLevel][level1ID] = [];
|
|
}
|
|
|
|
//
|
|
|
|
if ((itemClasses.indexOf('level1') !== -1) && (itemClasses.indexOf('details') !== -1)) {
|
|
// LEVEL 1 - DETAIL
|
|
|
|
const table = this.decodeTable($, item);
|
|
|
|
newObj[topLevel][level1ID].push(table);
|
|
}
|
|
|
|
//
|
|
|
|
if ((itemClasses.indexOf('level2') !== -1) && (itemClasses.indexOf('details') === -1)) {
|
|
// LEVEL 2
|
|
const itemChildren = $(item).children();
|
|
const obj = {};
|
|
|
|
itemChildren.each((ci, celm) => {
|
|
obj[fields[ci]] = this._cleanUp($(celm).text());
|
|
});
|
|
|
|
const nexttable = $(item).next();
|
|
|
|
obj.details = this.decodeTable($, nexttable);
|
|
|
|
if (level1ID === '') {
|
|
const newID = camelCase(this._cleanUp(obj.license.replace(',', '')));
|
|
newObj[topLevel][newID] = [];
|
|
newObj[topLevel][newID].push(obj);
|
|
}
|
|
|
|
else {
|
|
if (!newObj[topLevel].hasOwnProperty(level1ID))
|
|
newObj[topLevel][level1ID] = [];
|
|
|
|
newObj[topLevel][level1ID].push(obj);
|
|
}
|
|
}
|
|
});
|
|
|
|
// insert final obj
|
|
if (Object.keys(newObj).length !== 0) {
|
|
// push this object into the list
|
|
outData.push(newObj);
|
|
newObj = {};
|
|
}
|
|
|
|
return outData;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processEntityDetail(serviceObject) {
|
|
// level0 sublicctrl sublicctrl1 odd
|
|
// level0 sublicctrl sublicctrl1 odd sublicshow shown
|
|
|
|
// expand all accordians
|
|
|
|
const rows = await this.page.$$('tr.sublicctrl');
|
|
|
|
for (const item of rows) {
|
|
const cls = await this.page.evaluate(el => el.getAttribute('class'), item);
|
|
if (!cls.includes('shown'))
|
|
|
|
await item.click({ 'delay':Scraper.notARobot() });
|
|
}
|
|
|
|
await this.page.waitForSelector('#Licenses > tbody > tr.level1.shown.sublichide1.sllhidectrl.sllhidectrl1', { 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(() => {
|
|
logger.debug('No License information');
|
|
});
|
|
|
|
await this._microWait(this.page, 5);
|
|
|
|
// expand all viewable anchors
|
|
const wantedAnchors = await this.page.$$('.row a');
|
|
|
|
for (const item of wantedAnchors) {
|
|
const exItem = this._cleanUp(await this.page.evaluate(el => el.text, item));
|
|
|
|
if (exItem === 'View') {
|
|
await item.hover().catch((e) => {
|
|
logger.warn('Hover failed', e.name);
|
|
});
|
|
|
|
await item.click({ 'delay': Scraper.notARobot() }).catch((e) => {
|
|
logger.debug('View click failed', e.name);
|
|
});
|
|
}
|
|
}
|
|
|
|
const entityName = `${serviceObject.current.businessName}_${serviceObject.current.referenceNumber}`;
|
|
const fileName = this._makeFileName(entityName);
|
|
const filePath = await this._makeFilePath(entityName);
|
|
|
|
serviceObject.current.fileName = fileName;
|
|
|
|
await this._randomWait(this.page, 2, 2);
|
|
await this.page.focus('h3.page-header');
|
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
|
|
await this.page.waitForSelector('body > div.container > form.form-horizontal > table', { 'timeout':7500 }).then(async (elm) => {
|
|
logger.debug('prep for processEntityDetailBasicDetails');
|
|
|
|
const htmlBlock = await this.page.evaluate(el => el.outerHTML, elm);
|
|
|
|
const $ = cheerio.load(htmlBlock);
|
|
|
|
serviceObject.current.basicDetails = await this.processEntityDetailBasicDetails($);
|
|
});
|
|
|
|
await this.page.waitForSelector('#Licenses').then(async (elm) => {
|
|
logger.debug('prep for processEntityDetailTableV2');
|
|
|
|
const htmlBlock = await this.page.evaluate(el => el.outerHTML, elm);
|
|
|
|
const $ = cheerio.load(htmlBlock);
|
|
|
|
serviceObject.current.entityDetails = await this.processEntityDetailTableV2($);
|
|
});
|
|
|
|
this.entityCompleter(serviceObject);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async entityCompleter(serviceObject) {
|
|
const filename = serviceObject.current.fileName;
|
|
|
|
const filePath = `${this.path}/${filename}`.substring(0, 240);
|
|
|
|
logger.info(`Saving: ${filename}.json`);
|
|
|
|
const newLink = { 'referenceNumber':serviceObject.current.referenceNumber, 'businessName':serviceObject.current.businessName, 'fileName':`${filename}.json` };
|
|
|
|
serviceObject.links.push(newLink);
|
|
|
|
await jsonfile.writeFile(`${filePath}.json`, serviceObject.current);
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
serviceObject.step++;
|
|
|
|
if (serviceObject.step < serviceObject.currentIndexLength) {
|
|
serviceObject.current = {};
|
|
|
|
await this.page.goBack({ 'waitUntil':'networkidle0' });
|
|
}
|
|
else
|
|
this.emit('serviceDone');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleMainIndex() {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processMainMenu(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processMainMenu(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processMainMenu(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleEntityIndex() {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processEntityIndex(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processEntityIndex(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processEntityIndex(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleEntityDetail() {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processEntityDetail(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processEntityDetail(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processEntityDetail(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processNewPage() {
|
|
// give the page a few seconds to settle
|
|
await this._randomWait(this.page, 3, 5);
|
|
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
if (pageUrl.href === 'chrome-error://chromewebdata/') {
|
|
logger.warn('Directed to: chrome-error://chromewebdata/');
|
|
this.emit('recover');
|
|
|
|
return;
|
|
}
|
|
|
|
const params = Object.assign({ 'aa': '' }, this._getParamsFromUrl(pageUrl.search));
|
|
|
|
switch (params.aa) {
|
|
|
|
case '':
|
|
await this.handleIntroPage();
|
|
break;
|
|
|
|
case 'select_sector':
|
|
await this.handleMainIndex();
|
|
break;
|
|
|
|
case 'select_categ':
|
|
await this.handleEntityIndex();
|
|
break;
|
|
case 'select_subject':
|
|
await this.handleEntityDetail();
|
|
break;
|
|
|
|
default:
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error(`Unknown page: ${pageUrl}`);
|
|
}
|
|
else {
|
|
logger.warn('processNewPage Fell through');
|
|
logger.warn('currentPage.location', pageUrl);
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async attachEvents() {
|
|
this.on('entityComplete', () => {
|
|
this.handleEntityComplete();
|
|
});
|
|
|
|
this.on('serviceDone', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 0:
|
|
this.emit('paymentServicesDone');
|
|
break;
|
|
|
|
case 1:
|
|
this.emit('emoneyServicesDone');
|
|
break;
|
|
|
|
case 2:
|
|
this.emit('creditServicesDone');
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('entityIndex', async () => {
|
|
await this.handleEntityIndex();
|
|
});
|
|
|
|
this.on('paymentServicesDone', async () => {
|
|
try{
|
|
this.paymentServices.indexStep++;
|
|
if (this.paymentServices.indexStep < this.paymentServices.sections.length) {
|
|
this.paymentServices.visited = false;
|
|
this.paymentServices.step = 0;
|
|
await this._goto(this.paymentServices.urls[1]);
|
|
}
|
|
else {
|
|
this.paymentServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/paymentServices.json`, { 'links': this.paymentServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/paymentServices.json`, this.paymentServices);
|
|
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
await this._goto(this.creditServices.urls[0]);
|
|
}
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('emoneyServicesDone', async () => {
|
|
try{
|
|
this.emoneyServices.indexStep++;
|
|
|
|
if (this.emoneyServices.indexStep < this.emoneyServices.sections.length) {
|
|
this.emoneyServices.visited = false;
|
|
this.emoneyServices.step = 0;
|
|
await this._goto(this.emoneyServices.urls[0]);
|
|
}
|
|
else {
|
|
this.emoneyServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/emoneyServices.json`, { 'links': this.emoneyServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/emoneyServices.json`, this.emoneyServices);
|
|
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
await this._goto(this.emoneyServices.urls[0]);
|
|
}
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
|
|
this.on('creditServicesDone', async () => {
|
|
try{
|
|
this.creditServices.indexStep++;
|
|
|
|
if (this.creditServices.indexStep < this.creditServices.sections.length) {
|
|
this.creditServices.visited = false;
|
|
this.creditServices.step = 0;
|
|
await this._goto(this.creditServices.urls[0]);
|
|
}
|
|
else {
|
|
this.creditServices.done = true;
|
|
jsonfile.writeFileSync(`${this.path}/creditServices.json`, { 'links': this.creditServices.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/creditServices.json`, this.creditServices);
|
|
|
|
this.mode++;
|
|
this.inProgress = false;
|
|
|
|
this.emit('done');
|
|
}
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Initite the process
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
try {
|
|
this.mode = 0;
|
|
|
|
this.inProgress = false;
|
|
|
|
/*
|
|
|
|
Swapping sections from text to
|
|
data-sector ids.
|
|
document.querySelector('[data-sector="156"]')
|
|
|
|
Payment Services:
|
|
Payment Institutions and Branches of Foreign Payment Institutions // 9
|
|
Providing Payment Services in Limited Scope // 11
|
|
Account information service providers // 156
|
|
|
|
eMoney Services:
|
|
E-Money Institutions and Branches of Foreign E-Money Institutions // 12
|
|
E-Money Institutions Based in Slovakia // 37
|
|
|
|
credit Services:
|
|
Banks Authorised to Provide Investment Services // 5
|
|
Banks Based in Slovakia // 19
|
|
|
|
*/
|
|
this.paymentServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://subjekty.nbs.sk/', 'https://subjekty.nbs.sk/?aa=select_sector&bb=2&cc=&qq='],
|
|
'sections' : [9, 11, 156],
|
|
'sectionStep': 0,
|
|
'currentIndexLength' : 0,
|
|
'sectionLinks' : [],
|
|
'currentIndex' :'',
|
|
'currentMetaIndex' : 0
|
|
};
|
|
|
|
this.emoneyServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://subjekty.nbs.sk/?aa=select_sector&bb=2&cc=&qq='],
|
|
'sections' : [12, 37],
|
|
'sectionStep': 0,
|
|
'currentIndexLength' : 0,
|
|
'sectionLinks' : [],
|
|
'currentIndex' :'',
|
|
'currentMetaIndex' : 0
|
|
};
|
|
|
|
this.creditServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false,
|
|
'started': false,
|
|
'urls': ['https://subjekty.nbs.sk/?aa=select_sector&bb=2&cc=&qq='],
|
|
'sections' : [5, 19],
|
|
'sectionStep': 0,
|
|
'currentIndexLength' : 0,
|
|
'sectionLinks' : [],
|
|
'currentIndex' :'',
|
|
'currentMetaIndex' : 0
|
|
};
|
|
|
|
this.startPage = this.paymentServices.urls[0];
|
|
this.emoneyUrl = this.emoneyServices.urls[0];
|
|
this.credit = this.creditServices.urls[0];
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/SK/NBS`));
|
|
|
|
await this._doNonRepudiation().catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
await this._initBrowser();
|
|
await this._createBrowserPage();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
if (this.eventNames().length === 2)
|
|
await this.attachEvents();
|
|
|
|
//
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
|
|
|
await this._randomWait(this.page, 3, 5);
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = SKScrape;
|