obdfcascrape/tests/scrape.fr.js
Martin Donnelly be5d3eae07 init
2019-05-05 20:13:56 +01:00

221 lines
6.6 KiB
JavaScript

const tape = require('tape');
const _test = require('tape-promise').default; // <---- notice 'default'
const test = _test(tape); // decorate tape
const fs = require('fs');
const jsonfile = require('jsonfile');
const cheerio = require('cheerio');
const France = require('../ncas/fr');
const piIndex = fs.readFileSync('tests/data/fr/pi_index.html');
const emoneyIndex = fs.readFileSync('tests/data/fr/emoney_index.html');
const correctLinks = [{
'link': '/spip.php?type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3&pg=6&page=af&id=355',
'title': 'Tempo France'
},
{
'link': '/spip.php?type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3&pg=6&page=af&id=364',
'title': 'U ETABLISSEMENT DE PAIEMENT'
},
{
'link': '/spip.php?type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3&pg=6&page=af&id=359',
'title': 'Webhelp Payment Services France'
}];
const creditList = fs.readFileSync('tests/data/fr/credit_list.html');
const paymentInstitute = fs.readFileSync('tests/data/fr/payment_institute.html');
const eMoney = fs.readFileSync('tests/data/fr/emoney.html');
const creditInstitute = fs.readFileSync('tests/data/fr/credit_institute.html');
const piDetails = jsonfile.readFileSync('tests/data/fr/piDetails.json');
const piFrenchActivity = jsonfile.readFileSync('tests/data/fr/piFrenchActivity.json');
const piEuropeActivity = jsonfile.readFileSync('tests/data/fr/piEuropeActivity.json');
const emoneyDetails = jsonfile.readFileSync('tests/data/fr/emoney_details.json');
const emoneyFrenchActivity = jsonfile.readFileSync('tests/data/fr/emoneyFrenchActivity.json');
const emoneyEuropeActivity = jsonfile.readFileSync('tests/data/fr/emoneyEuropeActivity.json');
const emoneyListData = jsonfile.readFileSync('tests/data/fr/emoney_index.json');
const creditListData = jsonfile.readFileSync('tests/data/fr/credit_list.json');
const creditDetails = jsonfile.readFileSync('tests/data/fr/creditDetails.json');
const creditData = jsonfile.readFileSync('tests/data/fr/creditInstituteData.json');
// *** Breaking Pges ***
const breakingCr001 = fs.readFileSync('tests/data/fr/breaking_CI_001.html');
const breakingCr001Data = jsonfile.readFileSync('tests/data/fr/breaking_CI_001.json');
test('FRANCE:: Scrape Indexes', async t => {
const frScraper = new France();
t.test('Extract PI Search links', async t => {
const $ = cheerio.load(piIndex);
const $table = $('table.table tr');
const links = await frScraper.extractLinks($table);
const linkCount = links.length;
t.equal(linkCount, 3, 'Scrapes the correct number of links (3)');
t.deepEquals(links, correctLinks, 'Links match the data');
t.end();
});
t.test('Extract EMoney Search links', async t => {
const $ = cheerio.load(emoneyIndex);
const $table = $('table.table tr');
const links = await frScraper.extractLinks($table);
const linkCount = links.length;
t.equal(linkCount, 10, 'Scrapes the correct number of links (10)');
t.deepEquals(links, emoneyListData, 'Links match the data');
t.end();
});
t.test('Extract Credit List Search links', async t => {
const $ = cheerio.load(creditList);
const $table = $('table.table tr');
const links = await frScraper.extractLinks($table, true);
const linkCount = links.length;
t.equal(linkCount, 1, 'Scrapes the correct number of links (1)');
t.deepEquals(links, creditListData, 'Credit Links match the data');
t.end();
});
t.end();
});
test('FRANCE Scrape a Payment Instititute', async t => {
const frScraper = new France();
t.test('FRANCE::Extract Details from Page', async t => {
const $ = cheerio.load(paymentInstitute);
const output = await frScraper.extractDetails($);
t.deepEquals(output, piDetails, 'Extracted Details from Page');
t.end();
});
t.test('FRANCE::Extract French Payment Activity', async t => {
const $ = cheerio.load(paymentInstitute);
const frenchTbl = $('#zone_en_france > table tr');
const output = await frScraper.extractDataFromTable(frenchTbl);
t.deepEquals(output, piFrenchActivity, 'Extracted French Payment Activity');
t.end();
});
t.test('FRANCE::Extract European Payment Activity', async t => {
const $ = cheerio.load(paymentInstitute);
const euroTbls = $('#zone_en_europe');
output = await frScraper.extractEuroData(euroTbls);
t.deepEquals(output, piEuropeActivity, 'Extracted European Payment Activity');
t.end();
});
t.end();
});
test('FRANCE Scrape EMoney', async t => {
const frScraper = new France();
t.test('FRANCE::Extract Details from Page', async t => {
const $ = cheerio.load(eMoney);
const output = await frScraper.extractDetails($);
t.deepEquals(output, emoneyDetails, 'Extracted Details from Page');
t.end();
});
t.test('FRANCE::Extract French Payment Activity', async t => {
const $ = cheerio.load(eMoney);
const frenchTbl = $('#zone_en_france > table tr');
const output = await frScraper.extractDataFromTable(frenchTbl);
t.deepEquals(output, emoneyFrenchActivity, 'Extracted French Payment Activity');
t.end();
});
t.test('FRANCE::Extract European Payment Activity', async t => {
const $ = cheerio.load(eMoney);
const euroTbls = $('#zone_en_europe');
output = await frScraper.extractEuroData(euroTbls);
t.deepEquals(output, emoneyEuropeActivity, 'Extracted European Payment Activity');
t.end();
});
t.end();
});
test('FRANCE Scrape Credit Insititute', async t => {
const frScraper = new France();
t.test('FRANCE::Extract Details from Page', async t => {
const $ = cheerio.load(creditInstitute);
const output = await frScraper.extractDetails($);
t.deepEquals(output, creditDetails, 'Extracted Details from Page');
t.end();
});
t.test('FRANCE::Extract French Payment Activity', async t => {
const $ = cheerio.load(creditInstitute);
const frenchTbl = $('#zone_en_france > table.petite-police.services-invest tr');
const output = await frScraper.extractDataFromInvestmentServicesTable(frenchTbl);
t.deepEquals(output, creditData, 'Extracted Credit Instituet Activity');
t.end();
});
t.end();
});
test('FRANCE Breaking CI 001', async t => {
const frScraper = new France();
t.test('FRANCE::Extract Details from Page', async t => {
const $ = cheerio.load(breakingCr001);
const output = await frScraper.extractDetails($);
t.deepEquals(output, breakingCr001Data, 'Extracted Details from Page');
t.end();
});
t.end();
});