var tape = require('tape'); var _test = require('tape-promise').default; // <---- notice 'default' var test = _test(tape); // decorate tape for Promis handling const Scraper = require('../helpers/scraper'); test('SCRAPER::Explode URL', t => { const s = new Scraper(); t.deepEqual(s.explodeURL('https://www.bbc.co.uk/news'), { 'tld': 'co.uk', 'domain': 'bbc.co.uk', 'sub': 'www' }, 'Check a standard co.uk domain'); t.deepEqual(s.explodeURL('https://mail.caliban.io'), { 'tld': 'io', 'domain': 'caliban.io', 'sub': 'mail' }, 'One with an odd TLD'); t.deepEqual(s.explodeURL('https://register.fca.org.uk/ShPo_HomePage'), { 'tld': 'org.uk', 'domain': 'fca.org.uk', 'sub': 'register' }); t.deepEqual(s.explodeURL('http://registers.centralbank.ie/Home.aspx'), { 'tld': 'ie', 'domain': 'centralbank.ie', 'sub': 'registers' }); t.deepEqual(s.explodeURL('http://vut.finanstilsynet.dk/en/Tal-og-fakta/Virksomheder-under-tilsyn/VUT-soegning.aspx?aid=Payment+services+area&ctid=Payment+institutions'), { 'tld': 'dk', 'domain': 'finanstilsynet.dk', 'sub': 'vut' }); t.equal(s.explodeURL(''), null, 'Test against an empty string'); t.equal(s.explodeURL(), null, 'Test against a null value'); t.end(); }); test('SCRAPER::Scraper WHOIS', async function(t) { const s = new Scraper(); await s._getWhoIsRaw('https://www.names.co.uk/').then((r) => { const testReg = /Namesco Limited/; t.equal(testReg.test(r), true, 'Get Raw WhoIS'); }); await s._getWhoIsJSON('https://www.names.co.uk/').then((r) => { t.equal(r.domainName, 'names.co.uk', 'Get JSON WhoIS'); }); await s._getWhoIsRaw().catch((e) => { t.true(e, '_getWhoIsRaw Promise is caught'); // t.false(e); }); await s._getWhoIsJSON().catch((e) => { t.true(e, '_getWhoIsJSON Promise is caught'); // t.false(e); }); t.end(); }); test('SCRAPER::Scraper WHOIS French Test', async function(t) { const s = new Scraper(); t.plan(3); const url = 'https://www.regafi.fr/spip.php?page=results&type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3'; await s._getWhoIsJSON(url).then((r) => { t.equal(r.domain, 'regafi.fr', 'Get FR TLD'); }); await s._getWhoIsRaw(url).then((r) => { const testReg = /regafi.fr/; t.equal(testReg.test(r), true, 'Get Raw FR WhoIS'); }); await s._getWhoIsIPJSON(url).then((r) => { t.equal(r.origin, 'AS3215', 'Get JSON WhoIS for IP Address'); }); t.end(); }); test('SCRAPER::Scraper IP Address WHOIS', async function(t) { const s = new Scraper(); t.plan(4); await s._getWhoIsIPRaw('https://www.names.co.uk/').then((r) => { const testReg = /abuse@names.co.uk/; t.equal(testReg.test(r), true, 'Get Raw WhoIS for IP Address'); }); await s._getWhoIsIPJSON('https://www.names.co.uk/').then((r) => { t.equal(r.origin, 'AS8622', 'Get JSON WhoIS for IP Address'); }); await s._getWhoIsIPRaw().catch((e) => { t.true(e, '_getWhoIsIPRaw Promise is caught'); }); await s._getWhoIsIPJSON().catch((e) => { t.true(e, '_getWhoIsIPJSON Promise is caught'); }); t.end(); }); test('SCRAPER::Scraper SSL Certificate', async function(t) { const s = new Scraper(); t.plan(3); await s._getSSLCert('https://www.names.co.uk/').then((r) => { t.equal(r.fingerprint, '08:0F:E9:A3:BC:61:FD:A4:97:92:C6:23:16:97:5E:B0:A0:A3:4D:2C', 'Match fingerprint of names.co.uk SSL certificate'); }); await s._getSSLCert('https://www.regafi.fr/spip.php?page=results&type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3', 5000) .then((r) => { t.equal(r.fingerprint, '1B:91:7D:B6:D4:34:FF:F7:7A:05:80:8A:B5:94:EF:22:18:61:39:DF', 'Match fingerprint of regafi.fr SSL certificate'); }); await s._getSSLCert('http://does.not.exists').catch((err) => { t.true(err instanceof Error, '_getSSLCert Promise is caught'); }); t.end(); }); test('SCRAPER::Sraper _getParamsFromUrl', function(t) { const s = new Scraper(); t.plan(2); t.deepEqual(s._getParamsFromUrl('https://www.site.com/page.html?param1=A¶m2=B&Param3=C'), { 'param1': 'A', 'param2': 'B', 'Param3': 'C' }, 'Handle a url with params'); t.deepEqual(s._getParamsFromUrl('https://www.othersite.com/page.html'), { }, 'Handle a url with NO params'); t.end(); });