const cheerio = require('cheerio'); const tape = require('tape'); const _test = require('tape-promise').default; // <---- notice 'default' const test = _test(tape); // decorate tape const fs = require('fs'); const jsonfile = require('jsonfile'); const Belgium = require('../ncas/be'); const beScraper = new Belgium(); test.test('Entities', async t => { t.test('Extract main details...', async t => { t.test('...from td container', async t => { const htmlFile = 'tests/data/be/ps_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const detailsContainer = $('ul.List1 div.table-responsive tbody tr td').eq(0); const output = await beScraper.extractMainDetails(detailsContainer); const expectedJSON = jsonfile.readFileSync('tests/data/be/ps_001_mainDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.test('...from li container', async t => { const htmlFile = 'tests/data/be/ci_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const detailsContainer = $('ul.List1 ul.List2 > li > ul > li').eq(0); const output = await beScraper.extractMainDetails(detailsContainer); const expectedJSON = jsonfile.readFileSync('tests/data/be/ci_001_mainDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.test('...from unusual entity (3-line address and large spacing)', async t => { const htmlFile = 'tests/data/be/em_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const detailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(4).children('td').eq(0); const output = await beScraper.extractMainDetails(detailsContainer); const expectedJSON = jsonfile.readFileSync('tests/data/be/em_001_mainDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.end(); }); t.test('Extract full details...', async t => { t.test('...from payment service', async t => { const htmlFile = 'tests/data/be/ps_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const fullDetailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(0); const output = await beScraper.extractFullDetails(fullDetailsContainer, 0); const expectedJSON = jsonfile.readFileSync('tests/data/be/ps_001_fullDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.test('...from emoney service', async t => { const htmlFile = 'tests/data/be/em_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const fullDetailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(0); const output = await beScraper.extractFullDetails(fullDetailsContainer, 0); const expectedJSON = jsonfile.readFileSync('tests/data/be/em_001_fullDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.test('...from credit institution', async t => { const htmlFile = 'tests/data/be/ci_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const fullDetailsContainer = $('ul.List1 ul.List2 > li > ul > li').eq(0); const output = await beScraper.extractFullDetails(fullDetailsContainer, 2); const expectedJSON = jsonfile.readFileSync('tests/data/be/ci_001_fullDetails.json'); t.deepEquals(output, expectedJSON); t.end(); }); t.end(); }); t.test('Extract entities from container...', async t => { t.test('...of payment services (tbody)', async t => { const htmlFile = 'tests/data/be/ps_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const entitiesContainer = $('ul.List1 tbody'); const output = await beScraper.extractEntitiesFromContainer(entitiesContainer, 0); t.equals(output.length, 24); t.end(); }); t.test('...of credit institutions (ul)', async t => { const htmlFile = 'tests/data/be/ci_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const entitiesContainer = $('ul.List1 ul.List2 > li > ul').eq(0); // get the first list only for this test const output = await beScraper.extractEntitiesFromContainer(entitiesContainer, 2); t.equals(output.length, 25); t.end(); }); t.end(); }); t.test('Extract index...', async t => { t.test('...of payment services', async t => { const htmlFile = 'tests/data/be/ps_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const indexContainer = $('#PrudentialList'); const output = await beScraper.extractIndex(indexContainer, 0); // console.log(output); t.end(); }); t.test('...of credit institutions', async t => { const htmlFile = 'tests/data/be/ci_fullpage_001.html'; const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' }); const $ = cheerio.load(html, { 'decodeEntities': false }); const indexContainer = $('#PrudentialList'); const output = await beScraper.extractIndex(indexContainer, 2); // console.log(output); t.end(); }); t.end(); }); t.end(); });