obdfcascrape/tests/scrape.no.js
Martin Donnelly be5d3eae07 init
2019-05-05 20:13:56 +01:00

231 lines
6.5 KiB
JavaScript

const tape = require('tape');
const _test = require('tape-promise').default; // <---- notice 'default'
const test = _test(tape); // decorate tape
const fs = require('fs');
const jsonfile = require('jsonfile');
const NO = require('../ncas/no');
const noScraper = new NO();
const failure = { 'fail':true };
const empty = {};
test.test('Indexes', async t => {
test.test('NO:: Process simple index', async t => {
const htmlFile = 'tests/data/no/index_001.html';
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/index_001.json');
noScraper.rootURI = 'https://www.finanstilsynet.no';
const output = await noScraper.extractIndexItems(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted index');
t.end();
});
t.end();
});
test.test('Entity', async t => {
test.test('NO:: Process PS Entity 001', async t => {
const htmlFile = 'tests/data/no/ent_001.html';
t.test('NO::Extract entity details', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_details.json');
const output = await noScraper.extractEntityDetails(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.test('NO::Extract entity license', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_license.json');
const output = await noScraper.extractEntityDetailLicense(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');
t.end();
});
t.test('NO::Extract license blocks through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.end();
});
test.test('NO:: Process PS Entity 002', async t => {
const htmlFile = 'tests/data/no/ent_002.html';
t.test('NO::Extract entity details', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_details.json');
const output = await noScraper.extractEntityDetails(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.test('NO::Extract entity license', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_license.json');
const output = await noScraper.extractEntityDetailLicense(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');
t.end();
});
t.test('NO::Extract entity Cross Border', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_cb.json');
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');
t.end();
});
t.test('NO::Extract license blocks through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.end();
});
test.test('NO:: Process PS Entity 003', async t => {
const htmlFile = 'tests/data/no/ent_003.html';
t.test('NO::Extract license blocks through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_003_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.end();
});
test.test('NO:: Process PS Entity 004', async t => {
const htmlFile = 'tests/data/no/ent_004.html';
t.test('NO::Extract license blocks through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_004_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.end();
});
test.test('NO:: Process PS Entity 005', async t => {
const htmlFile = 'tests/data/no/ent_005.html';
t.test('NO::Extract entity Cross Border', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_cb.json');
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');
t.end();
});
t.test('NO::Extract license blocks through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
t.end();
});
t.end();
});
test.test('NO:: Process PS Entity 006', async t => {
const htmlFile = 'tests/data/no/ent_006.html';
t.test('NO::CB with two sets of separate data', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_cb.json');
const output = await noScraper.extractEntityDetailCrossBorder(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data');
t.end();
});
t.test('NO::CB with two sets of separate data through sniffer', async t => {
const psDetail = fs.readFileSync(htmlFile);
const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_sniff.json');
const output = await noScraper.entityContentSniffer(psDetail);
t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data through sniffer');
t.end();
});
t.end();
});
t.end();
});