87 lines
2.8 KiB
JavaScript
87 lines
2.8 KiB
JavaScript
|
const tape = require('tape');
|
||
|
const _test = require('tape-promise').default; // <---- notice 'default'
|
||
|
const test = _test(tape); // decorate tape
|
||
|
|
||
|
const Cyprus = require('../ncas/cy');
|
||
|
|
||
|
const fs = require('fs');
|
||
|
const jsonfile = require('jsonfile');
|
||
|
|
||
|
test.skip('CYPRUS::Scraper', async t => {
|
||
|
const cyScraper = new Cyprus();
|
||
|
|
||
|
await cyScraper._initBrowser();
|
||
|
|
||
|
cyScraper.browser.on('response', async target => {
|
||
|
console.log('response', target);
|
||
|
});
|
||
|
|
||
|
cyScraper.browser.on('targetcreated', async target => {
|
||
|
console.log(target);
|
||
|
if (target.url() !== 'about:blank') {
|
||
|
const tsDirectory = await this._createTimestampDirectory(this.path);
|
||
|
|
||
|
await cyScraper.page._client.send('Page.setDownloadBehavior', { 'behavior': 'allow', 'downloadPath': tsDirectory });
|
||
|
console.log('Saving into:', tsDirectory);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
cyScraper.page = await cyScraper.browser.newPage();
|
||
|
|
||
|
await cyScraper.page.goto('http://127.0.0.1:8080/test.html', { 'waitUntil': 'networkidle0' });
|
||
|
|
||
|
await cyScraper.downloadExcel();
|
||
|
|
||
|
t.deepEquals({}, {}, 'Extract the correct data');
|
||
|
t.end();
|
||
|
});
|
||
|
|
||
|
test('CYPRUS:: Scrape a Credit Service', async t => {
|
||
|
t.test('CYPRUS::Extract Local Credit Institutions from Page', async t => {
|
||
|
const contentDetail = await fs.readFileSync('tests/data/cy/content.html');
|
||
|
const expectedJSON = jsonfile.readFileSync('tests/data/cy/local_credit_001.json');
|
||
|
|
||
|
const cyScraper = new Cyprus();
|
||
|
|
||
|
const output = await cyScraper.extractLocalCreditInstitutions(contentDetail);
|
||
|
|
||
|
t.deepEquals(output, expectedJSON, 'Extracted Local Credit Institutions from Page');
|
||
|
|
||
|
t.end();
|
||
|
});
|
||
|
|
||
|
t.test('CYPRUS::Extract Foreign Credit Institutions from Page', async t => {
|
||
|
const contentDetail = fs.readFileSync('tests/data/cy/content.html');
|
||
|
const expectedJSON = jsonfile.readFileSync('tests/data/cy/foreign_credit_001.json');
|
||
|
|
||
|
const cyScraper = new Cyprus();
|
||
|
|
||
|
const output = await cyScraper.extractForeignCreditInstitutions(contentDetail);
|
||
|
|
||
|
t.deepEquals(output, expectedJSON, 'Extracted Foreign Credit Institutions from Page');
|
||
|
|
||
|
t.end();
|
||
|
});
|
||
|
|
||
|
t.skip('CYPRUS::Extract ALL Credit Institutions from Page', async t => {
|
||
|
const contentDetail = fs.readFileSync('tests/data/cy/credit_institutes.html');
|
||
|
const expectedJSON = jsonfile.readFileSync('tests/data/cy/all_credit_001.json');
|
||
|
|
||
|
const cyScraper = new Cyprus();
|
||
|
|
||
|
cyScraper.path = 'tests/sink';
|
||
|
await cyScraper._initBrowser(false);
|
||
|
cyScraper.page = await cyScraper.browser.newPage();
|
||
|
|
||
|
await cyScraper.page.goto(`data:text/html,${contentDetail}`, { 'waitUntil': 'networkidle0' });
|
||
|
|
||
|
const output = await cyScraper.processCreditInstitute();
|
||
|
await cyScraper.browser.close();
|
||
|
|
||
|
t.deepEquals(output, expectedJSON, 'Extracted ALL Credit Institutions from Page');
|
||
|
|
||
|
t.end();
|
||
|
});
|
||
|
});
|
||
|
|