const tape = require('tape'); const _test = require('tape-promise').default; // <---- notice 'default' const test = _test(tape); // decorate tape const cheerio = require('cheerio') const diff = require('deep-diff'); const fs = require('fs'); const jsonfile = require('jsonfile'); const Gibraltar = require('../ncas/gi'); const giScraper = new Gibraltar(); const failure = { 'fail':true }; const empty = {}; test.test('Unit', async t => { test.test('Get uppermost elements by selector', async t => { const htmlFile = 'tests/data/gi/unit/get-uppermost-elements-by-selector.html'; const html = fs.readFileSync(htmlFile); const selector = 'div.foo, li.bar'; const uppermostElements = await giScraper.getUppermostElementsBySelector(html, selector); t.equal(uppermostElements.length, 7); for (let i = 0; i < uppermostElements.length; i++) { t.false( uppermostElements[i].attribs['class'].includes('nomatch') ); } t.end(); }); test.test('Get text not in matching elements', async t => { const fixtures = [ { 'selector': '#fixture1', 'expectedText': 'This text A should match.' }, { 'selector': '#fixture2', 'expectedText': 'This text C should match. This text E should match.' }, { 'selector': '#fixture3', 'expectedText': 'This text F should match. This text G should match. This text I should match.' } ]; const htmlFile = 'tests/data/gi/unit/get-text-not-in-matching-elements.html'; const html = fs.readFileSync(htmlFile); const selector = 'div.foo, li.bar'; const $ = cheerio.load(html); for (let i = 0; i < fixtures.length; i++) { const f = fixtures[i]; const fixtureHtml = $(f.selector).html(); const textNotInMatchingElements = await giScraper.getTextNotInMatchingElements(fixtureHtml, selector); const reducedText = await giScraper._reduceWhiteSpace(textNotInMatchingElements); t.equal(reducedText, f.expectedText); } t.end(); }); t.end(); }); test.test('Entities', async t => { test.test('Gibraltar:: Process PS Entity 001', async t => { const htmlFile = 'tests/data/gi/ps_001.html'; t.test('🇬🇮::Extract entity details', async t => { const psDetail = fs.readFileSync(htmlFile); const expectedJSON = jsonfile.readFileSync('tests/data/gi/ps_001.json'); const output = await giScraper.extractEntityDetails(psDetail); // don't compare perms in these tests delete output.permissions; t.deepEquals(output, expectedJSON, 'Extracted entity details from Page'); t.end(); }); t.end(); }); test.test('Gibraltar:: Process EM Entity 001', async t => { const htmlFile = 'tests/data/gi/em_001.html'; t.test('🇬🇮::Extract entity details', async t => { const emDetail = fs.readFileSync(htmlFile); const expectedJSON = jsonfile.readFileSync('tests/data/gi/em_001.json'); const output = await giScraper.extractEntityDetails(emDetail); // don't compare perms in these tests delete output.permissions; t.deepEquals(output, expectedJSON, 'Extracted entity details from Page'); t.end(); }); t.end(); }); test.test('Gibraltar:: Process Agent Entity 001', async t => { const htmlFile = 'tests/data/gi/agent_001.html'; t.test('🇬🇮::Extract entity details', async t => { const agentDetail = fs.readFileSync(htmlFile); const expectedJSON = jsonfile.readFileSync('tests/data/gi/agent_001.json'); const output = await giScraper.extractEntityDetails(agentDetail); // don't compare perms in these tests delete output.permissions; t.deepEquals(output, expectedJSON, 'Extracted entity details from Page'); t.end(); }); t.end(); }); t.end(); }); test.test('Permissions, Agents, and other html fragments', async t => { test.test('Gibraltar:: Process permissions 001', async t => { const permissionsHtml = fs.readFileSync('tests/data/gi/perms_001.html'); const expectedJSON = jsonfile.readFileSync('tests/data/gi/perms_001.json'); const output = await giScraper.recurseDOM(permissionsHtml, giScraper.blockBoundaries); t.deepEquals(output, expectedJSON, 'Extracted permissions from html'); t.end(); }); test.test('Gibraltar:: Process permissions 002', async t => { const permissionsHtml = fs.readFileSync('tests/data/gi/perms_002.html'); const expectedJSON = jsonfile.readFileSync('tests/data/gi/perms_002.json'); const output = await giScraper.recurseDOM(permissionsHtml, giScraper.blockBoundaries); t.deepEquals(output, expectedJSON, 'Extracted permissions from html'); t.end(); }); test.test('Gibraltar:: Process agents in perms 001', async t => { const agentsHtml = fs.readFileSync('tests/data/gi/agents_in_perms_001.html'); const expectedJSON = jsonfile.readFileSync('tests/data/gi/agents_in_perms_001.json'); const output = await giScraper.extractAgents(agentsHtml); t.deepEquals(output, expectedJSON, 'Extracted agents from html'); t.end(); }); t.end(); });