167 lines
5.0 KiB
JavaScript
167 lines
5.0 KiB
JavaScript
const tape = require('tape');
|
|
const _test = require('tape-promise').default; // <---- notice 'default'
|
|
const test = _test(tape); // decorate tape
|
|
|
|
const cheerio = require('cheerio')
|
|
const diff = require('deep-diff');
|
|
const fs = require('fs');
|
|
const jsonfile = require('jsonfile');
|
|
|
|
const Gibraltar = require('../ncas/gi');
|
|
|
|
const giScraper = new Gibraltar();
|
|
|
|
const failure = { 'fail':true };
|
|
const empty = {};
|
|
|
|
test.test('Unit', async t => {
|
|
|
|
test.test('Get uppermost elements by selector', async t => {
|
|
const htmlFile = 'tests/data/gi/unit/get-uppermost-elements-by-selector.html';
|
|
const html = fs.readFileSync(htmlFile);
|
|
const selector = 'div.foo, li.bar';
|
|
const uppermostElements = await giScraper.getUppermostElementsBySelector(html, selector);
|
|
|
|
t.equal(uppermostElements.length, 7);
|
|
|
|
for (let i = 0; i < uppermostElements.length; i++) {
|
|
t.false(
|
|
uppermostElements[i].attribs['class'].includes('nomatch')
|
|
);
|
|
}
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Get text not in matching elements', async t => {
|
|
const fixtures = [
|
|
{ 'selector': '#fixture1', 'expectedText': 'This text A should match.' },
|
|
{ 'selector': '#fixture2', 'expectedText': 'This text C should match. This text E should match.' },
|
|
{ 'selector': '#fixture3', 'expectedText': 'This text F should match. This text G should match. This text I should match.' }
|
|
];
|
|
|
|
const htmlFile = 'tests/data/gi/unit/get-text-not-in-matching-elements.html';
|
|
const html = fs.readFileSync(htmlFile);
|
|
|
|
const selector = 'div.foo, li.bar';
|
|
const $ = cheerio.load(html);
|
|
|
|
for (let i = 0; i < fixtures.length; i++) {
|
|
const f = fixtures[i];
|
|
const fixtureHtml = $(f.selector).html();
|
|
const textNotInMatchingElements = await giScraper.getTextNotInMatchingElements(fixtureHtml, selector);
|
|
const reducedText = await giScraper._reduceWhiteSpace(textNotInMatchingElements);
|
|
|
|
t.equal(reducedText, f.expectedText);
|
|
}
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Entities', async t => {
|
|
test.test('Gibraltar:: Process PS Entity 001', async t => {
|
|
const htmlFile = 'tests/data/gi/ps_001.html';
|
|
|
|
t.test('🇬🇮::Extract entity details', async t => {
|
|
const psDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/ps_001.json');
|
|
|
|
const output = await giScraper.extractEntityDetails(psDetail);
|
|
|
|
// don't compare perms in these tests
|
|
delete output.permissions;
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Gibraltar:: Process EM Entity 001', async t => {
|
|
const htmlFile = 'tests/data/gi/em_001.html';
|
|
|
|
t.test('🇬🇮::Extract entity details', async t => {
|
|
const emDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/em_001.json');
|
|
|
|
const output = await giScraper.extractEntityDetails(emDetail);
|
|
|
|
// don't compare perms in these tests
|
|
delete output.permissions;
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Gibraltar:: Process Agent Entity 001', async t => {
|
|
const htmlFile = 'tests/data/gi/agent_001.html';
|
|
|
|
t.test('🇬🇮::Extract entity details', async t => {
|
|
const agentDetail = fs.readFileSync(htmlFile);
|
|
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/agent_001.json');
|
|
|
|
const output = await giScraper.extractEntityDetails(agentDetail);
|
|
|
|
// don't compare perms in these tests
|
|
delete output.permissions;
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted entity details from Page');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Permissions, Agents, and other html fragments', async t => {
|
|
test.test('Gibraltar:: Process permissions 001', async t => {
|
|
const permissionsHtml = fs.readFileSync('tests/data/gi/perms_001.html');
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/perms_001.json');
|
|
|
|
const output = await giScraper.recurseDOM(permissionsHtml, giScraper.blockBoundaries);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted permissions from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Gibraltar:: Process permissions 002', async t => {
|
|
const permissionsHtml = fs.readFileSync('tests/data/gi/perms_002.html');
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/perms_002.json');
|
|
|
|
const output = await giScraper.recurseDOM(permissionsHtml, giScraper.blockBoundaries);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted permissions from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
test.test('Gibraltar:: Process agents in perms 001', async t => {
|
|
const agentsHtml = fs.readFileSync('tests/data/gi/agents_in_perms_001.html');
|
|
const expectedJSON = jsonfile.readFileSync('tests/data/gi/agents_in_perms_001.json');
|
|
|
|
const output = await giScraper.extractAgents(agentsHtml);
|
|
|
|
t.deepEquals(output, expectedJSON, 'Extracted agents from html');
|
|
|
|
t.end();
|
|
});
|
|
|
|
t.end();
|
|
});
|