302 lines
9.2 KiB
JavaScript
302 lines
9.2 KiB
JavaScript
const tape = require('tape');
|
|
const _test = require('tape-promise').default; // <---- notice 'default'
|
|
const test = _test(tape); // decorate tape
|
|
|
|
const fs = require('fs');
|
|
const jsonfile = require('jsonfile');
|
|
|
|
const NO = require('../ncas/no');
|
|
|
|
const noScraper = new NO();
|
|
|
|
const failure = { 'fail':true };
|
|
const empty = {};
|
|
|
|
test.test('Indexes', async t => {
|
|
test.test('NO:: Process simple index', async t => {
|
|
const htmlFile = 'tests/data/no/index_001.html';
|
|
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/index_001.json');
|
|
|
|
noScraper.rootURI = 'https://www.finanstilsynet.no';
|
|
|
|
const output = await noScraper.extractIndexItems(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted index');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Entity', async t => {
|
|
test.test('NO:: Process PS Entity 001', async t => {
|
|
const htmlFile = 'tests/data/no/ent_001.html';
|
|
|
|
t.test('NO::Extract entity details', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_details.json');
|
|
|
|
const output = await noScraper.extractEntityDetails(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract entity license', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_license.json');
|
|
|
|
const output = await noScraper.extractEntityDetailLicense(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract license blocks through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Process PS Entity 002', async t => {
|
|
const htmlFile = 'tests/data/no/ent_002.html';
|
|
|
|
t.test('NO::Extract entity details', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_details.json');
|
|
|
|
const output = await noScraper.extractEntityDetails(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract entity license', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_license.json');
|
|
|
|
const output = await noScraper.extractEntityDetailLicense(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract entity Cross Border', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_cb.json');
|
|
|
|
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract license blocks through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Process PS Entity 003', async t => {
|
|
const htmlFile = 'tests/data/no/ent_003.html';
|
|
|
|
t.test('NO::Extract license blocks through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_003_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Process PS Entity 004', async t => {
|
|
const htmlFile = 'tests/data/no/ent_004.html';
|
|
|
|
t.test('NO::Extract license blocks through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_004_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Process PS Entity 005', async t => {
|
|
const htmlFile = 'tests/data/no/ent_005.html';
|
|
|
|
t.test('NO::Extract entity Cross Border', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_cb.json');
|
|
|
|
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::Extract license blocks through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Process PS Entity 006', async t => {
|
|
const htmlFile = 'tests/data/no/ent_006.html';
|
|
|
|
t.test('NO::CB with two sets of separate data', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_cb.json');
|
|
|
|
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO::CB with two sets of separate data through sniffer', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_sniff.json');
|
|
|
|
const output = await noScraper.entityContentSniffer(psDetail);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data through sniffer');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('NO:: Cross-Border format', async t => {
|
|
|
|
t.test('NO:: Extract Cross-Border services 001 (PS)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_001_ps.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_001_ps.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO:: Extract Cross-Border services 002 (PS)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_002_ps.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_002_ps.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO:: Extract Cross-Border services 003 (PS)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_003_ps.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_003_ps.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO:: Extract Cross-Border services 004 (PS)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_004_ps.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_004_ps.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO:: Extract Cross-Border services 005 (PS with an empty list item)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_005_ps_with_empty_item.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_005_ps_with_empty_item.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.test('NO:: Extract Cross-Border services 006 (PS with multiple countries)', async t => {
|
|
const crossborderHtml = fs.readFileSync('tests/data/no/cb_006_em_with_multi_countries.html').toString();
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_006_em_with_multi_countries.json');
|
|
|
|
const output = await noScraper.recurseCrossborderHtml(crossborderHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|