890 lines
26 KiB
JavaScript
890 lines
26 KiB
JavaScript
const Scraper = require('../helpers/scraper');
|
|
const cheerio = require('cheerio');
|
|
const path = require('path');
|
|
const jsonfile = require('jsonfile');
|
|
const logger = require('log4js').getLogger('IT');
|
|
const url = require('url');
|
|
|
|
logger.level = process.env.LOGGER_LEVEL || 'warn';
|
|
|
|
class ITscrape extends Scraper {
|
|
|
|
constructor() {
|
|
super();
|
|
this.setID('IT');
|
|
|
|
this.on('done', () => {
|
|
// this._saveLocalStorage(this.page, `${this.path}/${this.id}_localstorage.json`);
|
|
this._done();
|
|
});
|
|
|
|
this.run = this._debounce(async () => {
|
|
await this.__run();
|
|
}, 5000);
|
|
|
|
if (process.env.NODE_ENV === 'production')
|
|
this._checkLock().then((l) => {
|
|
if(l)
|
|
this.run();
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async forceScrollToTop() {
|
|
// Force the scroll
|
|
|
|
await this.page.evaluate(() => {
|
|
window.scrollBy(0, window.innerHeight);
|
|
});
|
|
|
|
// Force the hover
|
|
|
|
await this.page.hover('#sub-navbar > giava-breadcrumb > ol > li:nth-child(3) > a').catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
// Force the focus
|
|
|
|
await this.page.focus('#sub-navbar > giava-breadcrumb > ol > li:nth-child(3) > a');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async forceEnglish() {
|
|
await this._randomWait(this.page, 2, 2, 'Force English');
|
|
|
|
await this.page.waitForSelector('#bs-example-navbar-collapse-1 > ul > li.dropdown > a', { 'visible':true, 'timeout':7500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 2, 2);
|
|
}).catch(() => {
|
|
logger.debug('No Language button');
|
|
});
|
|
|
|
await this._findAndClick('#bs-example-navbar-collapse-1 > ul > li.dropdown.open > ul > li:nth-child(2) > a');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleFrontPage() {
|
|
let pageReturned = false;
|
|
await this._randomWait(this.page, 3, 5, 'handleFrontPage');
|
|
|
|
await this.page.waitFor('ul.linkgroup a', { 'visible':true }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(async (err) => {
|
|
logger.info('handleFrontPage: ul.linkgroup a Not found', err);
|
|
});
|
|
|
|
do{
|
|
await this.page.waitFor('#my-container > div.container > div', { 'visible':true, 'timeout':7500 }).then(() => {
|
|
pageReturned = true;
|
|
}).catch(async () => {
|
|
logger.info('We didnt transition back correctly, forcing another click..\n');
|
|
});
|
|
|
|
if (!pageReturned) {
|
|
await this.page.hover('ul.linkgroup a').catch((err) => {
|
|
logger.debug(err.name);
|
|
});
|
|
|
|
await this.page.focus('ul.linkgroup a').catch((err) => {
|
|
logger.debug(err.name);
|
|
});
|
|
|
|
await this.page.waitFor('ul.linkgroup a', { 'visible':true }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(async (err) => {
|
|
logger.info('handleFrontPage: ul.linkgroup a still not found', err.name);
|
|
});
|
|
}
|
|
}
|
|
|
|
while(!pageReturned);
|
|
|
|
// Supervisory registers and lists
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async handleSecondPage() {
|
|
try{
|
|
// sometimes this page takes a while to load...
|
|
const url = await this.page.evaluate('location.href');
|
|
|
|
await this._randomWait(this.page, 10, 13, 'handleSecondPage');
|
|
|
|
await this.page.waitForSelector('div.loading', { 'visible':false, 'timeout':90000 }).catch((e) => {
|
|
logger.warn('Ajax loading shroud not removed after 90 seconds');
|
|
});
|
|
|
|
await this.page.waitForSelector('ul.nav.navbar-nav.navbar-center li a', { 'visible':false, 'timeout':90000 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
await this._randomWait(this.page, 5, 8, 'await transition');
|
|
}).catch((e) => {
|
|
logger.warn('Page Navigation navigation links failed to load / display');
|
|
});
|
|
|
|
// await this._findAndClick('ul.nav.navbar-nav.navbar-center li a', null, 'https://infostat.bancaditalia.it/GIAVAInquiry-public/ng/int-albi');
|
|
|
|
const newUrl = await this.page.evaluate('location.href');
|
|
|
|
if (url !== newUrl) {
|
|
logger.debug('The page Has changed!');
|
|
this.emit('pageChanged');
|
|
}
|
|
}
|
|
catch( err) {
|
|
logger.error('Failed to progress past second page', err);
|
|
this.emit('recover');
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async extractPSRegistry(html) {
|
|
try{
|
|
const registry = {};
|
|
const $ = cheerio.load(html);
|
|
|
|
const rows = $('app-details-anagrafica > div.row');
|
|
rows.each((index, item) => {
|
|
const divs = $(item).find('div');
|
|
|
|
if ($(item).children().length === 2) {
|
|
const name = this._cleanUp(divs.eq(0).text()) ;
|
|
registry[name] = this._cleanUp(divs.eq(1).text());
|
|
}
|
|
});
|
|
|
|
return registry;
|
|
}
|
|
catch (err) {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('extractPSRegistry\n', err);
|
|
}
|
|
else
|
|
logger.error('extractPSRegistry\n', err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<Array>}
|
|
*/
|
|
async extractPSRegisters(html) {
|
|
try {
|
|
const registers = [];
|
|
const $ = cheerio.load(html);
|
|
|
|
const rows = $('app-details-albi div.ag-bl-center.ag-bl-full-height-center > div > div.ag-body > div.ag-body-viewport-wrapper > div > div div[role="row"]');
|
|
|
|
logger.info(`${rows.length} registers item${(rows.length !== 1) ? 's' : ''}`);
|
|
rows.each((index, item) => {
|
|
const divs = $(item).find('div');
|
|
const obj = {};
|
|
|
|
for (let counter = 0; counter < divs.length;counter++) {
|
|
const name = this._cleanUp(divs.eq(counter).attr('col-id'));
|
|
obj[name] = this._cleanUp(divs.eq(counter).text());
|
|
}
|
|
registers.push(obj);
|
|
});
|
|
|
|
return registers;
|
|
}
|
|
catch (err) {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('extractPSRegisters\n', err);
|
|
}
|
|
else
|
|
logger.error('extractPSRegisters\n', err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param html
|
|
* @returns {Promise<Array>}
|
|
*/
|
|
async extractPSAuthority(html) {
|
|
try{
|
|
const authority = [];
|
|
const $ = cheerio.load(html);
|
|
|
|
const rows = $('app-details-att-autorizzate div.ag-bl-center.ag-bl-full-height-center > div > div.ag-body > div.ag-body-viewport-wrapper > div > div div[role="row"]');
|
|
|
|
logger.info(`${rows.length} authority item${(rows.length !== 1) ? 's' : ''}`);
|
|
rows.each((index, item) => {
|
|
const divs = $(item).find('div');
|
|
const obj = {};
|
|
|
|
for (let counter = 0; counter < divs.length;counter++) {
|
|
const name = this._cleanUp(divs.eq(counter).attr('col-id'));
|
|
obj[name] = this._cleanUp(divs.eq(counter).text());
|
|
}
|
|
authority.push(obj);
|
|
});
|
|
|
|
return authority;
|
|
}
|
|
catch (err) {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('extractPSAuthority\n', err);
|
|
}
|
|
else
|
|
logger.error('extractPSAuthority\n', err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async preparePSSearch() {
|
|
try{
|
|
await this._randomWait(this.page, 3, 5, `preparePSSearch - ${this.modeTitles[this.mode]}`);
|
|
|
|
// Brute force the selector
|
|
|
|
await this._findAndClick('#sub-navbar > app-int-albi > app-int-albi-search > div > div:nth-child(3) > div > input');
|
|
|
|
await this.page.waitForFunction(
|
|
'document.querySelector("#alboElenco").options.length > 1'
|
|
, { 'timeout':7500 }).then(() => {
|
|
logger.debug('Ajax done');
|
|
}).catch(() => {
|
|
throw new Error('Ajax not done');
|
|
});
|
|
|
|
const options = await this.page.$$('#alboElenco option');
|
|
const optionList = ['ALBO IP ART.114-SEPTIES TUB ', 'ALBO IMEL ITA EX 114-QUATER ', 'ALBO DELLE BANCHE '];
|
|
|
|
const wantedOption = [optionList[this.mode]];
|
|
|
|
for (const item of options) {
|
|
const text = await this.page.evaluate(el => el.innerText, item);
|
|
const value = await this.page.evaluate(el => el.value, item);
|
|
|
|
if (wantedOption.indexOf(text) !== -1) {
|
|
await this.page.select('#alboElenco', value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// wait for loading shroud to go away
|
|
await this.page.waitForSelector('div.loading', { 'visible':false, 'timeout':25000 });
|
|
|
|
let btnSuccess = false;
|
|
|
|
do {
|
|
await this.page.waitForSelector('button.btn.btn-success', { 'visible':true, 'timeout':2500 }).then(async (elm) => {
|
|
await elm.click({ 'delay':Scraper.notARobot() });
|
|
}).catch(() => {
|
|
btnSuccess = true;
|
|
});
|
|
await this._randomWait(this.page, 1, 1, 'preparePSSearch btnSuccess');
|
|
}
|
|
|
|
while(!btnSuccess);
|
|
|
|
this.page.waitFor('app-int-albi-grid-result').then(async () => {
|
|
//
|
|
await this.forceEnglish();
|
|
await this.emit('processAgTable');
|
|
}).catch(async (err) => {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('No results transition\n', err);
|
|
}
|
|
else
|
|
logger.error('No results transition\n', err);
|
|
});
|
|
}
|
|
catch (err) {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('preparePSSearch\n', err);
|
|
}
|
|
else
|
|
logger.error('preparePSSearch\n', err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<{registry, authority, registers}>}
|
|
*/
|
|
async processPSDetail() {
|
|
let registry = {}, registers = {}, authority = {};
|
|
|
|
await this._randomWait(this.page, 3, 3, 'processPSDetail: AJAX');
|
|
|
|
// await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
|
|
await this.page.waitFor('#sub-navbar > app-int-albi > app-int-albi-details > div > div.card.card-title > span > span', { 'visible': true }).catch((err) => {
|
|
logger.warn('AJAX data has failed to load');
|
|
logger.debug(err);
|
|
|
|
return { registry, registers, authority };
|
|
});
|
|
|
|
await this.page.waitFor('app-int-albi-details').then(async () => {
|
|
await this.forceScrollToTop();
|
|
|
|
const body = await this.page.content();
|
|
|
|
registry = await this.extractPSRegistry(body);
|
|
|
|
await this._randomWait(this.page, 2, 2, 'processPSDetail app-int-albi-details');
|
|
}).catch(async (err) => {
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error('processPSDetail\n', err);
|
|
}
|
|
else
|
|
logger.error('processPSDetail\n', err);
|
|
});
|
|
|
|
await this._randomWait(this.page, 1, 1, 'processPSDetail after app-int-albi-details');
|
|
|
|
//
|
|
|
|
await this.forceScrollToTop();
|
|
|
|
// wait for Registers Tab
|
|
await this.page.waitFor('#sub-navbar > app-int-albi > app-int-albi-details > div > div:nth-child(2) > ul > li:nth-child(2) > a', { 'visible': true, 'timeout':10000 }).then(async (elm) => {
|
|
logger.debug('** Showing Registers Tab');
|
|
await elm.click({ 'delay':90 });
|
|
|
|
await this.page.waitFor('app-details-albi', { 'visible': true, 'timeout':10000 }).then(async () => {
|
|
const body = await this.page.content();
|
|
|
|
registers = await this.extractPSRegisters(body);
|
|
|
|
await this._randomWait(this.page, 2, 2, 'processPSDetail app-details-albi');
|
|
}).catch(async (err) => {
|
|
if (process.env.NODE_ENV)
|
|
// await this._uploadError();
|
|
throw new Error('No tab transition\n', err);
|
|
|
|
else
|
|
logger.error('No tab transition');
|
|
});
|
|
|
|
await this._randomWait(this.page, 1, 1, 'processPSDetail after app-details-albi');
|
|
}).catch((err) => {
|
|
logger.warn('No "registers" Block...');
|
|
logger.debug(err);
|
|
});
|
|
|
|
// wait for Activity Tab
|
|
await this.forceScrollToTop();
|
|
await this.page.waitFor('#sub-navbar > app-int-albi > app-int-albi-details > div > div:nth-child(2) > ul > li:nth-child(3) > a', { 'visible': true, 'timeout':10000 }).then(async (elm) => {
|
|
logger.debug('** Showing Activity Tab');
|
|
await elm.click({ 'delay':90 });
|
|
let pageReturned = false;
|
|
|
|
do
|
|
await this.page.waitFor('app-details-att-autorizzate', { 'visible': true, 'timeout':10000 }).then(async () => {
|
|
pageReturned = true;
|
|
const body = await this.page.content();
|
|
|
|
authority = await this.extractPSAuthority(body);
|
|
|
|
await this._randomWait(this.page, 2, 2, 'processPSDetail app-details-att-autorizzate');
|
|
}).catch(async (err) => {
|
|
await this.forceScrollToTop();
|
|
await this._findAndClick('#sub-navbar > app-int-albi > app-int-albi-details > div > div:nth-child(2) > ul > li:nth-child(3) > a');
|
|
|
|
if (process.env.NODE_ENV)
|
|
throw new Error('No tab transition\n', err);
|
|
|
|
else
|
|
logger.warn('No tab transition');
|
|
});
|
|
while(!pageReturned);
|
|
}).catch((err) => {
|
|
logger.warn('No "Activity" Block...');
|
|
logger.debug(err);
|
|
});
|
|
|
|
return { registry, registers, authority };
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async returnToPSList() {
|
|
try{
|
|
let pageReturned = false;
|
|
await this.page.hover('#sub-navbar > giava-breadcrumb > ol > li:nth-child(3) > a').catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
await this._findAndClick('#sub-navbar > giava-breadcrumb > ol > li:nth-child(3) > a');
|
|
|
|
do
|
|
await this.page.waitFor('app-int-albi-grid-result').then(() => {
|
|
pageReturned = true;
|
|
}).catch(async (err) => {
|
|
logger.warn('We didnt transition back correctly, forcing another click..\n', err);
|
|
|
|
await this.forceScrollToTop();
|
|
|
|
await this._findAndClick('#sub-navbar > giava-breadcrumb > ol > li:nth-child(3) > a');
|
|
});
|
|
|
|
while(!pageReturned);
|
|
}
|
|
catch (err) {
|
|
logger.error('returnToPSList\n', err);
|
|
this.emit('recover');
|
|
if (process.env.NODE_ENV)
|
|
await this._uploadError();
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<number>}
|
|
*/
|
|
async psGetMaxRows() {
|
|
const regExNumbersOnly = /\d{1,13}(?:,\d{0,2})?/g;
|
|
|
|
const elm = await this.page.$$('#sub-navbar > app-int-albi > app-int-albi-grid-result > grid-pagination > div > div > div:nth-child(1) > p');
|
|
|
|
const text = await this.page.evaluate(el => el.innerText, elm[0]);
|
|
|
|
const numbers = regExNumbersOnly.exec(text);
|
|
|
|
return (numbers !== null) ? parseInt(numbers[0], 10) : -1;
|
|
}
|
|
|
|
async processDivs($, divs) {
|
|
const entries = {};
|
|
|
|
divs.each((index, item) => {
|
|
const itemText = this._cleanUp($(item).text());
|
|
const itemName = $(item).attr('col-id');
|
|
// logger.info(`>> ${index}`, itemName, itemText);
|
|
entries[itemName] = itemText;
|
|
});
|
|
|
|
return entries;
|
|
}
|
|
|
|
async psSetListCount(count) {
|
|
logger.debug('+ psSetListCount ');
|
|
await this.page.focus('#sub-navbar > app-int-albi > app-int-albi-grid-result > grid-pagination > div > div > div:nth-child(7) > div > input');
|
|
|
|
for(let del = 0;del < 5;del++)
|
|
await this.page.keyboard.press('Backspace');
|
|
|
|
await this.page.keyboard.type(count.toString(), { 'delay': 100 }); // Types slower, like a user
|
|
|
|
await this.page.keyboard.press('Enter');
|
|
|
|
await this._randomWait(this.page, 10, 10, 'ajax refresh');
|
|
logger.debug('- psSetListCount ');
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param serviceObject
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processAGTableV3(serviceObject) {
|
|
// this whole thing is ugly but at the moment it works
|
|
|
|
await this._randomWait(this.page, 3, 5, 'processAGTableV3');
|
|
|
|
const _defaultMaxPerPage = 10;
|
|
let workingData;
|
|
let elmStep;
|
|
let item;
|
|
let maxPages = 0;
|
|
let rowsInPass;
|
|
|
|
await this.psSetListCount(_defaultMaxPerPage);
|
|
const maxRows = await this.psGetMaxRows();
|
|
let remainingRows = maxRows;
|
|
|
|
logger.info('Max Rows', maxRows);
|
|
|
|
if (maxRows > _defaultMaxPerPage) {
|
|
maxPages = ~~(maxRows / _defaultMaxPerPage);
|
|
|
|
logger.info('Max pages:', maxPages);
|
|
}
|
|
|
|
for(let pageStep = 0; pageStep <= maxPages; pageStep++) {
|
|
logger.info('Pagestep', pageStep, (pageStep + 1) * _defaultMaxPerPage);
|
|
|
|
if (maxPages > 0)
|
|
if ((maxRows - ((pageStep ) * _defaultMaxPerPage)) > _defaultMaxPerPage)
|
|
rowsInPass = _defaultMaxPerPage;
|
|
else
|
|
|
|
rowsInPass = (maxRows - ((pageStep ) * _defaultMaxPerPage));
|
|
else
|
|
rowsInPass = maxRows;
|
|
|
|
logger.info(`Rows in this pass : ${rowsInPass}`);
|
|
|
|
for (let step = 0; step < rowsInPass; step++) {
|
|
for ( elmStep = 0; elmStep <= step; elmStep++) {
|
|
workingData = await this.page.$$(`div.ag-body-container div.ag-row[row-id="${elmStep}"]`);
|
|
item = workingData[0];
|
|
|
|
if (typeof item !== 'undefined')
|
|
await item.hover().catch((err) => {
|
|
logger.warn(err);
|
|
logger.info(item);
|
|
});
|
|
|
|
await this._microWait(this.page, 1);
|
|
}
|
|
|
|
await this._randomWait(this.page, 2, 2, 'processAGTableV3 after rows');
|
|
|
|
if (typeof item !== 'undefined') {
|
|
const html = await this.page.evaluate(el => el.innerHTML, item);
|
|
const clickable = await item.$('div[col-id="name"]');
|
|
const abiCodeElm = await item.$('div[col-id="abiCode"]');
|
|
const uid = await this.page.evaluate(el => el.innerText, abiCodeElm);
|
|
const clickName = await this.page.evaluate(el => el.innerText, clickable);
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const divs = $('div');
|
|
|
|
logger.info(`Processing : ${clickName}, ${remainingRows} remain.`);
|
|
|
|
if (!serviceObject.workingData.has(uid)) {
|
|
// Exract all the data from the cells
|
|
|
|
const newEntry = await this.processDivs($, divs);
|
|
|
|
// Insert it in the map
|
|
serviceObject.workingData.set(uid, newEntry);
|
|
|
|
await this._randomWait(this.page, 2, 2, `Processing : ${clickName}`);
|
|
|
|
const filePath = await this._makeFilePath(clickName);
|
|
const fileName = this._makeFileName(clickName);
|
|
await this._randomWait(this.page, 2, 2, 'processAGTableV3 before ss');
|
|
await this._makeScreenshotV2(this.page, `${filePath}_main`, null);
|
|
|
|
serviceObject.links.push({ uid, 'fileName':`${fileName}.json`, 'name':clickName });
|
|
|
|
// Go into the detail
|
|
await clickable.click();
|
|
|
|
await this._randomWait(this.page, 3, 4, 'processAGTableV3 before next');
|
|
|
|
remainingRows--;
|
|
|
|
await this.page.waitFor('app-int-albi-details').then(
|
|
|
|
await this.doAlbiDetails(filePath, newEntry)
|
|
|
|
).catch(async (err) => {
|
|
logger.error('No detail transition', err);
|
|
this.emit('recover');
|
|
|
|
if (process.env.NODE_ENV)
|
|
await this._uploadError();
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if (maxPages > 0) {
|
|
logger.info('Clicking to the next page...');
|
|
|
|
const nextButton = await this.page.$$('#sub-navbar > app-int-albi > app-int-albi-grid-result > grid-pagination > div > div > div:nth-child(5) > button');
|
|
const buttonDisabled = await this.page.evaluate(el => el.disabled, nextButton[0]);
|
|
if (!buttonDisabled) {
|
|
this._findAndClick('#sub-navbar > app-int-albi > app-int-albi-grid-result > grid-pagination > div > div > div:nth-child(5) > button');
|
|
await this._randomWait(this.page, 5, 5, 'processAGTableV3 next page click');
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.debug('processAGTableV3 DONE');
|
|
|
|
this.emit('doneProcessingGrid');
|
|
}
|
|
|
|
async doAlbiDetails(filePath, newEntry) {
|
|
try{
|
|
// process the page
|
|
const data = await this.processPSDetail();
|
|
data.details = newEntry;
|
|
|
|
logger.info(`Saving ${filePath}.json`);
|
|
await jsonfile.writeFile(`${filePath}.json`, data);
|
|
|
|
await this._randomWait(this.page, 5, 7, 'doAlbiDetails');
|
|
|
|
// Retun back to list
|
|
|
|
await this.returnToPSList();
|
|
|
|
await this._randomWait(this.page, 2, 2, 'doAlbiDetails after returnToPSList');
|
|
// wArray.push([uid, clickName]);
|
|
}
|
|
catch (err) {
|
|
logger.error('doAlbiDetails\n', err);
|
|
this.emit('recover');
|
|
|
|
if (process.env.NODE_ENV)
|
|
await this._uploadError();
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async processNewPage() {
|
|
// give the page a few seconds to settle
|
|
await this._randomWait(this.page, 3, 5, 'processNewPage');
|
|
|
|
const pageUrl = url.parse(await this.page.url());
|
|
|
|
switch (pageUrl.pathname) {
|
|
|
|
case '/compiti/vigilanza/albi-elenchi/index.html':
|
|
await this.handleFrontPage();
|
|
break;
|
|
|
|
case '/GIAVAInquiry-public/ng/':
|
|
await this.handleSecondPage();
|
|
break;
|
|
|
|
case '/GIAVAInquiry-public/ng/int-albi/search':
|
|
await this.preparePSSearch();
|
|
break;
|
|
case '/en/our-registers/company-register/gransoverskridandehandel/':
|
|
await this.crossBorderRedirector();
|
|
break;
|
|
|
|
default:
|
|
|
|
if (process.env.NODE_ENV) {
|
|
await this._uploadError();
|
|
throw new Error(`Unknown page: ${pageUrl}`);
|
|
}
|
|
else {
|
|
logger.warn('processNewPage Fell through');
|
|
logger.warn('currentPage.location', pageUrl);
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async attachEvents() {
|
|
// Need thiss for Angular based sites
|
|
|
|
// clear out stock recover handler
|
|
|
|
this.removeAllListeners('recover');
|
|
|
|
this.on('pageChanged', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
this.on('recover', this._debounce(async () => {
|
|
clearTimeout(this.backOffTimer);
|
|
|
|
logger.warn('Backing off for 5 minutes..');
|
|
const timeout = (60 * 1000) * 5;
|
|
|
|
this.backOffTimer = setTimeout(() => {
|
|
this.emit('restart');
|
|
// this.recover();
|
|
}, timeout);
|
|
}, 30000));
|
|
|
|
this.on('restart', this._debounce(async() => {
|
|
clearTimeout(this.backOffTimer);
|
|
|
|
logger.warn('Restarting::');
|
|
|
|
// await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
|
|
|
// use the Scraper recovery now to ensure crashed browser is resurrected
|
|
await this.__recover(this.startPage);
|
|
}, 15000));
|
|
|
|
this.on('processAgTable', async () => {
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
await this.processAGTableV3(this.emoneyServices);
|
|
break;
|
|
|
|
case 2:
|
|
await this.processAGTableV3(this.creditServices);
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
await this.processAGTableV3(this.paymentServices);
|
|
break;
|
|
|
|
}
|
|
});
|
|
|
|
this.on('doneProcessingGrid', async () => {
|
|
let curObj;
|
|
switch (this.mode) {
|
|
|
|
case 1:
|
|
curObj = this.emoneyServices;
|
|
break;
|
|
|
|
case 2:
|
|
curObj = this.creditServices;
|
|
break;
|
|
|
|
case 0:
|
|
default:
|
|
curObj = this.paymentServices;
|
|
break;
|
|
|
|
}
|
|
|
|
curObj.done = true;
|
|
curObj.items = curObj.links.length;
|
|
|
|
jsonfile.writeFileSync(`${this.path}/${this.modeNames[this.mode]}.json`, { 'links':curObj.links });
|
|
jsonfile.writeFileSync(`${this.debugPath}/${this.modeNames[this.mode]}.json`, curObj);
|
|
|
|
this.mode++;
|
|
|
|
if (this.mode < 3) {
|
|
await this._goto(this.startPage, { 'waitUntil':'networkidle0' });
|
|
|
|
await this._randomWait(this.page, 3, 5, 'doneProcessingGrid');
|
|
}
|
|
else
|
|
this.emit('done');
|
|
});
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async start() {
|
|
super._start();
|
|
try {
|
|
this.mode = 0;
|
|
|
|
this.modeTitles = ['Payment Service', 'EMoney', 'Credit Services'];
|
|
|
|
this.paymentServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://www.bancaditalia.it/compiti/vigilanza/albi-elenchi/index.html?com.dotmarketing.htmlpage.language=1'],
|
|
'workingData': new Map([]),
|
|
'workingIndex': 0
|
|
};
|
|
|
|
this.emoneyServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'urls': ['https://www.bancaditalia.it/compiti/vigilanza/albi-elenchi/index.html?com.dotmarketing.htmlpage.language=1'],
|
|
'workingData': new Map([]),
|
|
'workingIndex': 0
|
|
};
|
|
|
|
this.creditServices = {
|
|
'items': 0,
|
|
'links': [],
|
|
'step': 0,
|
|
'indexStep': 0,
|
|
'visited': false,
|
|
'done' : false,
|
|
'searchDone' : false,
|
|
'started': false,
|
|
'urls': ['https://www.bancaditalia.it/compiti/vigilanza/albi-elenchi/index.html?com.dotmarketing.htmlpage.language=1'],
|
|
'workingData': new Map([]),
|
|
'workingIndex': 0
|
|
};
|
|
|
|
this.startPage = this.paymentServices.urls[0];
|
|
this.emoneyUrl = '';
|
|
this.credit = '';
|
|
|
|
this.backOffTimer = 0;
|
|
|
|
this.setPath(path.resolve(`${__dirname }/../artefacts/IT/FSA`));
|
|
|
|
await this._doNonRepudiation(false, { 'sslWithPrefix':true }).catch((err) => {
|
|
logger.warn(err);
|
|
});
|
|
|
|
await this._initBrowser(true);
|
|
await this._createBrowserPage();
|
|
|
|
this.page.on('domcontentloaded', this._throttle(async () => {
|
|
this.processNewPage().catch((err) => {
|
|
logger.error('processNewPage fail', err);
|
|
});
|
|
}, 2500));
|
|
|
|
if (this.eventNames().length === 2)
|
|
await this.attachEvents();
|
|
|
|
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
|
await this._goto(this.startPage, { 'waitUntil':'networkidle2' });
|
|
|
|
await this._randomWait(this.page, 3, 5, 'After start');
|
|
}
|
|
catch(e) {
|
|
throw new Error(e);
|
|
}
|
|
}
|
|
|
|
async __run() {
|
|
await this.start();
|
|
}
|
|
|
|
}
|
|
|
|
module.exports = ITscrape;
|