obdfcascrape/tests/scrape.cz.js
Martin Donnelly be5d3eae07 init
2019-05-05 20:13:56 +01:00

173 lines
5.4 KiB
JavaScript

const tape = require('tape');
const _test = require('tape-promise').default; // <---- notice 'default'
const test = _test(tape); // decorate tape
const fs = require('fs');
const jsonfile = require('jsonfile');
const cheerio = require('cheerio');
const StaticServer = require('static-server');
const path = require('path');
const Czech = require('../ncas/cz');
const sourcePath = path.join(__dirname, 'data/cz/');
var server = new StaticServer({
'rootPath': sourcePath, // required, the root of the server file tree
'port': 7357, // required, the port to listen
'name': 'my-http-server', // optional, will set "X-Powered-by" HTTP header
// 'host': '10.0.0.100', // optional, defaults to any interface
'cors': '*', // optional, defaults to undefined
'followSymlink': true, // optional, defaults to a 404 error
'templates': {
'index': 'foo.html', // optional, defaults to 'index.html'
'notFound': '404.html' // optional, defaults to undefined
}
});
server.start(function () {
console.log('Server listening to', server.port);
console.log('Serving', server.rootPath);
});
test.skip('CZECH:: Captcha Handler', async t => {
t.test('CZECH::Break the captcha', async t => {
const expectedJSON = '095471'; // jsonfile.readFileSync('tests/data/se/ci001-detail.json');
const czScraper = new Czech();
await czScraper._initBrowser(true);
czScraper.page = await czScraper.browser.newPage();
await czScraper.page.goto('http://127.0.0.1:7357/captcha-001.html', { 'waitUntil': 'networkidle2' });
// await czScraper.page.goto('https://apl.cnb.cz/apljerrsdad/JERRS.WEB45.LOGIN_A?p_lang=en&p_err=8', { 'waitUntil': 'networkidle0' });
const result = await czScraper.captchaTest();
await cz.browser.close();
t.equals(result, expectedJSON, 'Busted captcha');
t.end();
});
t.end();
});
test('CZECH:: Scrape a Payment Service', async t => {
t.test('CZECH::Extract Basic Payment Services from Page', async t => {
const psDetail = fs.readFileSync('tests/data/cz/ps001.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/ps001.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractEntityDetails(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.test('CZECH::Extract More advanced details Payment Services from Page', async t => {
const psDetail = fs.readFileSync('tests/data/cz/ps002.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/ps002.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractEntityDetails(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.test('CZECH::Extract More Authorized activities for Payment Services from Page', async t => {
const psDetail = fs.readFileSync('tests/data/cz/ps003_auth.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/ps003_auth.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractEntityAuthority(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Authorized activities from Page');
t.end();
});
t.end();
});
test('CZECH:: Scrape creditServices Authorised activitites', async t => {
t.test('CZECH::Extract Authorized activities ( No 7 section, wih Matrix )', async t => {
const psDetail = fs.readFileSync('tests/data/cz/cs_001.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/cs_001.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractCreditServicesEntityAuthority(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.test('CZECH::Extract Authorized activities ( 7 Section no active, no Matrix )', async t => {
const psDetail = fs.readFileSync('tests/data/cz/cs_002.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/cs_002.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractCreditServicesEntityAuthority(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.test('CZECH::Extract Authorized activities ( With 7 Section and Matrix )', async t => {
const psDetail = fs.readFileSync('tests/data/cz/cs_003.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/cs_003.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractCreditServicesEntityAuthority(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.test('CZECH::Extract Authorized activities ( 4 tables including Matrix )', async t => {
const psDetail = fs.readFileSync('tests/data/cz/cs_004.html');
const expectedJSON = jsonfile.readFileSync('tests/data/cz/cs_004.json');
const czScraper = new Czech();
czScraper.urlPrefix = 'https://apl.cnb.cz/apljerrsdad/';
const output = await czScraper.extractCreditServicesEntityAuthority(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted Details from Page');
t.end();
});
t.end();
});