obdfcascrape/tests/scrape.no.js

const tape = require('tape');
const _test = require('tape-promise').default; // <---- notice 'default'
const test = _test(tape); // decorate tape

const fs = require('fs');
const jsonfile = require('jsonfile');

const NO = require('../ncas/no');

const noScraper = new NO();

const failure = { 'fail':true };
const empty = {};

test.test('Indexes', async t => {
  test.test('NO:: Process simple index', async t => {
    const htmlFile = 'tests/data/no/index_001.html';

    const psDetail = fs.readFileSync(htmlFile);

    const expectedJSON = jsonfile.readFileSync('tests/data/no/index_001.json');

    noScraper.rootURI = 'https://www.finanstilsynet.no';

    const output = await noScraper.extractIndexItems(psDetail);

    t.deepEquals(output, expectedJSON, 'Extracted index');

    t.end();
  });

  t.end();
});

test.test('Entity', async t => {
  test.test('NO:: Process PS Entity 001', async t => {
    const htmlFile = 'tests/data/no/ent_001.html';

    t.test('NO::Extract entity details', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_details.json');

      const output = await noScraper.extractEntityDetails(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.test('NO::Extract entity license', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_license.json');

      const output = await noScraper.extractEntityDetailLicense(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');

      t.end();
    });

    t.test('NO::Extract license blocks through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_001_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Process PS Entity 002', async t => {
    const htmlFile = 'tests/data/no/ent_002.html';

    t.test('NO::Extract entity details', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_details.json');

      const output = await noScraper.extractEntityDetails(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.test('NO::Extract entity license', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_license.json');

      const output = await noScraper.extractEntityDetailLicense(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity license from Page');

      t.end();
    });

    t.test('NO::Extract entity Cross Border', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_cb.json');

      const output = await noScraper.extractEntityDetailCrossBorder(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');

      t.end();
    });

    t.test('NO::Extract license blocks through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_002_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Process PS Entity 003', async t => {
    const htmlFile = 'tests/data/no/ent_003.html';

    t.test('NO::Extract license blocks through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_003_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Process PS Entity 004', async t => {
    const htmlFile = 'tests/data/no/ent_004.html';

    t.test('NO::Extract license blocks through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_004_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Process PS Entity 005', async t => {
    const htmlFile = 'tests/data/no/ent_005.html';

    t.test('NO::Extract entity Cross Border', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_cb.json');

      const output = await noScraper.extractEntityDetailCrossBorder(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity cross border from Page');

      t.end();
    });

    t.test('NO::Extract license blocks through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_005_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Process PS Entity 006', async t => {
    const htmlFile = 'tests/data/no/ent_006.html';

    t.test('NO::CB with two sets of separate data', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_cb.json');

      const output = await noScraper.extractEntityDetailCrossBorder(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data');

      t.end();
    });

    t.test('NO::CB with two sets of separate data through sniffer', async t => {
      const psDetail = fs.readFileSync(htmlFile);

      const expectedJSON = jsonfile.readFileSync('tests/data/no/ent_006_sniff.json');

      const output = await noScraper.entityContentSniffer(psDetail);

      t.deepEquals(output, expectedJSON, 'Extracted CB with two sets of separate data through sniffer');

      t.end();
    });

    t.end();
  });

  test.test('NO:: Cross-Border format', async t => {

    t.test('NO:: Extract Cross-Border services 001 (PS)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_001_ps.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_001_ps.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.test('NO:: Extract Cross-Border services 002 (PS)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_002_ps.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_002_ps.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.test('NO:: Extract Cross-Border services 003 (PS)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_003_ps.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_003_ps.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.test('NO:: Extract Cross-Border services 004 (PS)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_004_ps.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_004_ps.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.test('NO:: Extract Cross-Border services 005 (PS with an empty list item)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_005_ps_with_empty_item.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_005_ps_with_empty_item.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.test('NO:: Extract Cross-Border services 006 (PS with multiple countries)', async t => {
      const crossborderHtml = fs.readFileSync('tests/data/no/cb_006_em_with_multi_countries.html').toString();
      const expectedJSON = jsonfile.readFileSync('tests/data/no/cb_006_em_with_multi_countries.json');

      const output = await noScraper.recurseCrossborderHtml(crossborderHtml);

      t.deepEquals(output, expectedJSON, 'Extracted cross-border services from html');

      t.end();
    });

    t.end();
  });

  t.end();
});