obdfcascrape/tests/scraper.js

var tape = require('tape');
var _test = require('tape-promise').default; // <---- notice 'default'
var test = _test(tape); // decorate tape for Promis handling

const Scraper = require('../helpers/scraper');

test('SCRAPER::Explode URL', t => {
  const s = new Scraper();

  t.deepEqual(s.explodeURL('https://www.bbc.co.uk/news'), { 'tld': 'co.uk', 'domain': 'bbc.co.uk', 'sub': 'www' }, 'Check a standard co.uk domain');
  t.deepEqual(s.explodeURL('https://mail.caliban.io'), { 'tld': 'io', 'domain': 'caliban.io', 'sub': 'mail' }, 'One with an odd TLD');
  t.deepEqual(s.explodeURL('https://register.fca.org.uk/ShPo_HomePage'), { 'tld': 'org.uk', 'domain': 'fca.org.uk', 'sub': 'register' });
  t.deepEqual(s.explodeURL('http://registers.centralbank.ie/Home.aspx'), { 'tld': 'ie', 'domain': 'centralbank.ie', 'sub': 'registers' });
  t.deepEqual(s.explodeURL('http://vut.finanstilsynet.dk/en/Tal-og-fakta/Virksomheder-under-tilsyn/VUT-soegning.aspx?aid=Payment+services+area&ctid=Payment+institutions'), { 'tld': 'dk', 'domain': 'finanstilsynet.dk', 'sub': 'vut' });

  t.equal(s.explodeURL(''), null, 'Test against an empty string');
  t.equal(s.explodeURL(), null, 'Test against a null value');

  t.end();
});

test('SCRAPER::Scraper WHOIS', async function(t) {
  const s = new Scraper();

  await s._getWhoIsRaw('https://www.names.co.uk/').then((r) => {
    const testReg = /Namesco Limited/;
    t.equal(testReg.test(r), true, 'Get Raw WhoIS');
  });

  await s._getWhoIsJSON('https://www.names.co.uk/').then((r) => {
    t.equal(r.domainName, 'names.co.uk', 'Get JSON WhoIS');
  });

  await s._getWhoIsRaw().catch((e) => {
    t.true(e, '_getWhoIsRaw Promise is caught');
    // t.false(e);
  });

  await s._getWhoIsJSON().catch((e) => {
    t.true(e, '_getWhoIsJSON Promise is caught');
    // t.false(e);
  });

  t.end();
});

test('SCRAPER::Scraper WHOIS French Test', async function(t) {
  const s = new Scraper();
  t.plan(3);

  const url = 'https://www.regafi.fr/spip.php?page=results&type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3';

  await s._getWhoIsJSON(url).then((r) => {
    t.equal(r.domain, 'regafi.fr', 'Get FR TLD');
  });

  await s._getWhoIsRaw(url).then((r) => {
    const testReg = /regafi.fr/;

    t.equal(testReg.test(r), true, 'Get Raw FR WhoIS');
  });

  await s._getWhoIsIPJSON(url).then((r) => {
    t.equal(r.origin, 'AS3215', 'Get JSON WhoIS for IP Address');
  });

  t.end();
});

test('SCRAPER::Scraper IP Address WHOIS', async function(t) {
  const s = new Scraper();
  t.plan(4);

  await s._getWhoIsIPRaw('https://www.names.co.uk/').then((r) => {
    const testReg = /abuse@names.co.uk/;

    t.equal(testReg.test(r), true, 'Get Raw WhoIS for IP Address');
  });

  await s._getWhoIsIPJSON('https://www.names.co.uk/').then((r) => {
    t.equal(r.origin, 'AS8622', 'Get JSON WhoIS for IP Address');
  });

  await s._getWhoIsIPRaw().catch((e) => {
    t.true(e, '_getWhoIsIPRaw Promise is caught');
  });

  await s._getWhoIsIPJSON().catch((e) => {
    t.true(e, '_getWhoIsIPJSON Promise is caught');
  });

  t.end();
});

test('SCRAPER::Scraper SSL Certificate', async function(t) {
  const s = new Scraper();
  t.plan(3);
  await s._getSSLCert('https://www.names.co.uk/').then((r) => {
    t.equal(r.fingerprint, '08:0F:E9:A3:BC:61:FD:A4:97:92:C6:23:16:97:5E:B0:A0:A3:4D:2C', 'Match fingerprint of names.co.uk SSL certificate');
  });

  await s._getSSLCert('https://www.regafi.fr/spip.php?page=results&type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3', 5000)
    .then((r) => {
      t.equal(r.fingerprint, '1B:91:7D:B6:D4:34:FF:F7:7A:05:80:8A:B5:94:EF:22:18:61:39:DF', 'Match fingerprint of regafi.fr SSL certificate');
    });

  await s._getSSLCert('http://does.not.exists').catch((err) => {
    t.true(err instanceof Error, '_getSSLCert Promise is caught');
  });

  t.end();
});

test('SCRAPER::Sraper _getParamsFromUrl', function(t) {
  const s = new Scraper();
  t.plan(2);
  t.deepEqual(s._getParamsFromUrl('https://www.site.com/page.html?param1=A&param2=B&Param3=C'), { 'param1': 'A', 'param2': 'B', 'Param3': 'C' }, 'Handle a url with params');

  t.deepEqual(s._getParamsFromUrl('https://www.othersite.com/page.html'), { }, 'Handle a url with NO params');

  t.end();
});