obdfcascrape/tests/scrape.be.js
Martin Donnelly 534fd67b5d final update
2019-08-15 08:48:49 +01:00

167 lines
5.8 KiB
JavaScript

const cheerio = require('cheerio');
const tape = require('tape');
const _test = require('tape-promise').default; // <---- notice 'default'
const test = _test(tape); // decorate tape
const fs = require('fs');
const jsonfile = require('jsonfile');
const Belgium = require('../ncas/be');
const beScraper = new Belgium();
test.test('Entities', async t => {
t.test('Extract main details...', async t => {
t.test('...from td container', async t => {
const htmlFile = 'tests/data/be/ps_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const detailsContainer = $('ul.List1 div.table-responsive tbody tr td').eq(0);
const output = await beScraper.extractMainDetails(detailsContainer);
const expectedJSON = jsonfile.readFileSync('tests/data/be/ps_001_mainDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.test('...from li container', async t => {
const htmlFile = 'tests/data/be/ci_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const detailsContainer = $('ul.List1 ul.List2 > li > ul > li').eq(0);
const output = await beScraper.extractMainDetails(detailsContainer);
const expectedJSON = jsonfile.readFileSync('tests/data/be/ci_001_mainDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.test('...from unusual entity (3-line address and large spacing)', async t => {
const htmlFile = 'tests/data/be/em_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const detailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(4).children('td').eq(0);
const output = await beScraper.extractMainDetails(detailsContainer);
const expectedJSON = jsonfile.readFileSync('tests/data/be/em_001_mainDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.end();
});
t.test('Extract full details...', async t => {
t.test('...from payment service', async t => {
const htmlFile = 'tests/data/be/ps_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const fullDetailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(0);
const output = await beScraper.extractFullDetails(fullDetailsContainer, 0);
const expectedJSON = jsonfile.readFileSync('tests/data/be/ps_001_fullDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.test('...from emoney service', async t => {
const htmlFile = 'tests/data/be/em_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const fullDetailsContainer = $('ul.List1 div.table-responsive tbody tr').eq(0);
const output = await beScraper.extractFullDetails(fullDetailsContainer, 0);
const expectedJSON = jsonfile.readFileSync('tests/data/be/em_001_fullDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.test('...from credit institution', async t => {
const htmlFile = 'tests/data/be/ci_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const fullDetailsContainer = $('ul.List1 ul.List2 > li > ul > li').eq(0);
const output = await beScraper.extractFullDetails(fullDetailsContainer, 2);
const expectedJSON = jsonfile.readFileSync('tests/data/be/ci_001_fullDetails.json');
t.deepEquals(output, expectedJSON);
t.end();
});
t.end();
});
t.test('Extract entities from container...', async t => {
t.test('...of payment services (tbody)', async t => {
const htmlFile = 'tests/data/be/ps_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const entitiesContainer = $('ul.List1 tbody');
const output = await beScraper.extractEntitiesFromContainer(entitiesContainer, 0);
t.equals(output.length, 24);
t.end();
});
t.test('...of credit institutions (ul)', async t => {
const htmlFile = 'tests/data/be/ci_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const entitiesContainer = $('ul.List1 ul.List2 > li > ul').eq(0); // get the first list only for this test
const output = await beScraper.extractEntitiesFromContainer(entitiesContainer, 2);
t.equals(output.length, 25);
t.end();
});
t.end();
});
t.test('Extract index...', async t => {
t.test('...of payment services', async t => {
const htmlFile = 'tests/data/be/ps_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const indexContainer = $('#PrudentialList');
const output = await beScraper.extractIndex(indexContainer, 0);
// console.log(output);
t.end();
});
t.test('...of credit institutions', async t => {
const htmlFile = 'tests/data/be/ci_fullpage_001.html';
const html = fs.readFileSync(htmlFile, { 'encoding': 'utf-8' });
const $ = cheerio.load(html, { 'decodeEntities': false });
const indexContainer = $('#PrudentialList');
const output = await beScraper.extractIndex(indexContainer, 2);
// console.log(output);
t.end();
});
t.end();
});
t.end();
});