fix for 3741
This commit is contained in:
parent
81ade092b9
commit
ff6985b72f
35
env
35
env
@ -1,35 +0,0 @@
|
||||
# AWS_ACCESS_KEY_ID=AKIAICAGOZCGAQIYCAUA
|
||||
# AWS_SECRET_ACCESS_KEY=RGIasVHVggutezlDpoofqI7BoLln86Ngms9avKTj
|
||||
# S3_BUCKET=obregstoretest
|
||||
|
||||
AWS_ACCESS_KEY_ID=AKIAJWJS75F7WNCGK64A
|
||||
AWS_SECRET_ACCESS_KEY=8irYxThCp4xxyrbr00HzWcODe2qdNrR7X7S5BKup
|
||||
AWS_REGION=eu-west-1
|
||||
S3_BUCKET=obregstoretest-mdtest
|
||||
SQS_ID=https://sqs.eu-west-1.amazonaws.com/115486161803/obdfcascrape
|
||||
SQS_ARN=arn:aws:sqs:eu-west-1:115486161803:obdfcascrape
|
||||
SQS_NAME=obdfcascrape
|
||||
|
||||
TOPIC_ARN=arn:aws:sns:eu-west-1:115486161803:obdfcascrape
|
||||
QUEUE_URL=https://sqs.eu-west-1.amazonaws.com/115486161803/obdfcascrape
|
||||
QUEUE_ARN=arn:aws:sqs:eu-west-1:115486161803:obdfcascrape
|
||||
|
||||
#PROXY_URI=us-wa.proxymesh.com:31280
|
||||
PROXY_URI=uk.proxymesh.com:31280
|
||||
|
||||
FILE_DATE_FORMAT=yyyymmdd
|
||||
|
||||
LOGGER_LEVEL=debug
|
||||
|
||||
#SCRAPE_START=EE
|
||||
CY_CRON=00 12 * * *
|
||||
#DE_CRON=15 12 * * *
|
||||
#DK_CRON=00 12 * * *
|
||||
#FR_CRON=15 12 * * *
|
||||
#IE_CRON=00 12 * * *
|
||||
MT_CRON=15 12 * * *
|
||||
NL_CRON=00 12 * * *
|
||||
#SE_CRON=15 12 * * *
|
||||
#IT_CRON=00 12 * * *
|
||||
#CZ_CRON=15 12 * * *
|
||||
#SK_CRON=00 12 * * *
|
41
ncas/fr.js
41
ncas/fr.js
@ -91,7 +91,7 @@ class FRScrape extends Scraper {
|
||||
const children = cheerio(elm).children();
|
||||
|
||||
if (children.length > 2) {
|
||||
if (children.length === 11)
|
||||
if (children.length === 11)
|
||||
children.each((step, fiElm) => {
|
||||
financialInstruments.push(this._cleanUp(cheerio(fiElm).text()));
|
||||
});
|
||||
@ -105,14 +105,14 @@ class FRScrape extends Scraper {
|
||||
while(offset < financialInstruments.length) {
|
||||
if (children.eq(offset).html().match(unchecked) === null)
|
||||
finInst.push(financialInstruments[offset - fiOffset]);
|
||||
|
||||
|
||||
offset++;
|
||||
}
|
||||
if (finInst.length > 0)
|
||||
if (finInst.length > 0)
|
||||
output.push([rowName, finInst]);
|
||||
}
|
||||
}
|
||||
else if (children.length === 2)
|
||||
else if (children.length === 2)
|
||||
|
||||
if (children.eq(0).html().match(unchecked) === null) {
|
||||
authorised.push(this._cleanUp(children.eq(1).text()));
|
||||
@ -173,13 +173,12 @@ class FRScrape extends Scraper {
|
||||
if (!creditInstFilter)
|
||||
// Default mode
|
||||
links.push({ link, title });
|
||||
else
|
||||
if ($row.children().length === 6) {
|
||||
const status = this._cleanUp($row.children().eq(5).text().toLowerCase());
|
||||
else
|
||||
if ($row.children().length >= 6) {
|
||||
const statusField = $row.children().length - 1;
|
||||
const status = this._cleanUp($row.children().eq(statusField).text().toLowerCase());
|
||||
|
||||
logger.debug(`Status:**${status}** ${title}`);
|
||||
if(wantedCIStatuses.indexOf(status) !== -1) {
|
||||
logger.debug(`Matched:**${status}** ${title}`);
|
||||
links.push({ link, title });
|
||||
}
|
||||
}
|
||||
@ -210,7 +209,7 @@ class FRScrape extends Scraper {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
return details;
|
||||
}
|
||||
|
||||
@ -244,9 +243,9 @@ class FRScrape extends Scraper {
|
||||
|
||||
const frenchTbl = $('#zone_en_france > table tr');
|
||||
|
||||
if (this.mode < 2)
|
||||
if (this.mode < 2)
|
||||
pageData.frActivities = await this.extractDataFromTable(frenchTbl).output;
|
||||
else
|
||||
else
|
||||
pageData.creditInstituteActivities = await this.extractDataFromInvestmentServicesTable(frenchTbl);
|
||||
|
||||
if (this.mode < 2) {
|
||||
@ -331,6 +330,10 @@ class FRScrape extends Scraper {
|
||||
|
||||
if ($table.length > 1)
|
||||
// The table contains more than just the heading row
|
||||
store.indexcount++;
|
||||
logger.debug(`Processing menu: ${this.modeTitles[this.mode]} // ${store.indexcount}`);
|
||||
|
||||
await this._makeScreenshotV2(this.page, `${this.path}/${this.modePrefix[this.mode]}_menu_${store.indexcount}`, null);
|
||||
|
||||
store.links = store.links.concat(await this.extractLinks($table, (this.mode === 2)));
|
||||
|
||||
@ -415,7 +418,7 @@ class FRScrape extends Scraper {
|
||||
async start() {
|
||||
await super._start();
|
||||
try {
|
||||
this.mode = 0;
|
||||
this.mode = 2;
|
||||
|
||||
this.paymentServices = {
|
||||
'items': 0,
|
||||
@ -423,7 +426,9 @@ class FRScrape extends Scraper {
|
||||
'step': 0,
|
||||
'visited': false,
|
||||
'done' : false,
|
||||
'searchDone' : false
|
||||
'searchDone' : false,
|
||||
'indexcount' :0
|
||||
|
||||
};
|
||||
|
||||
this.emoneyServices = {
|
||||
@ -432,7 +437,8 @@ class FRScrape extends Scraper {
|
||||
'step': 0,
|
||||
'visited': false,
|
||||
'done' : false,
|
||||
'searchDone' : false
|
||||
'searchDone' : false,
|
||||
'indexcount' :0
|
||||
};
|
||||
|
||||
this.creditServices = {
|
||||
@ -441,7 +447,8 @@ class FRScrape extends Scraper {
|
||||
'step': 0,
|
||||
'visited': false,
|
||||
'done' : false,
|
||||
'searchDone' : false
|
||||
'searchDone' : false,
|
||||
'indexcount' :0
|
||||
};
|
||||
|
||||
this.startPage = 'https://www.regafi.fr/spip.php?page=results&type=advanced&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=21-TBR07&retrait=0&lang=en&id_secteur=3';
|
||||
@ -464,7 +471,7 @@ class FRScrape extends Scraper {
|
||||
await this.page.tracing.start({ 'path': `${this.path}/trace.json`, 'screenshots':true });
|
||||
|
||||
await this.page.setViewport({ 'width': 1200, 'height': 800 });
|
||||
await this._goto(this.startPage);
|
||||
await this._goto(this.creditUrl);
|
||||
|
||||
await this._randomWait(this.page, 3, 5);
|
||||
}
|
||||
|
41
package-lock.json
generated
41
package-lock.json
generated
@ -2300,8 +2300,7 @@
|
||||
},
|
||||
"ansi-regex": {
|
||||
"version": "2.1.1",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"aproba": {
|
||||
"version": "1.2.0",
|
||||
@ -2319,13 +2318,11 @@
|
||||
},
|
||||
"balanced-match": {
|
||||
"version": "1.0.0",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@ -2338,18 +2335,15 @@
|
||||
},
|
||||
"code-point-at": {
|
||||
"version": "1.1.0",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"concat-map": {
|
||||
"version": "0.0.1",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"console-control-strings": {
|
||||
"version": "1.1.0",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"core-util-is": {
|
||||
"version": "1.0.2",
|
||||
@ -2452,8 +2446,7 @@
|
||||
},
|
||||
"inherits": {
|
||||
"version": "2.0.3",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"ini": {
|
||||
"version": "1.3.5",
|
||||
@ -2463,7 +2456,6 @@
|
||||
"is-fullwidth-code-point": {
|
||||
"version": "1.0.0",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"number-is-nan": "^1.0.0"
|
||||
}
|
||||
@ -2476,20 +2468,17 @@
|
||||
"minimatch": {
|
||||
"version": "3.0.4",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "0.0.8",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"minipass": {
|
||||
"version": "2.3.5",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"safe-buffer": "^5.1.2",
|
||||
"yallist": "^3.0.0"
|
||||
@ -2506,7 +2495,6 @@
|
||||
"mkdirp": {
|
||||
"version": "0.5.1",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"minimist": "0.0.8"
|
||||
}
|
||||
@ -2579,8 +2567,7 @@
|
||||
},
|
||||
"number-is-nan": {
|
||||
"version": "1.0.1",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"object-assign": {
|
||||
"version": "4.1.1",
|
||||
@ -2590,7 +2577,6 @@
|
||||
"once": {
|
||||
"version": "1.4.0",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
@ -2666,8 +2652,7 @@
|
||||
},
|
||||
"safe-buffer": {
|
||||
"version": "5.1.2",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"safer-buffer": {
|
||||
"version": "2.1.2",
|
||||
@ -2697,7 +2682,6 @@
|
||||
"string-width": {
|
||||
"version": "1.0.2",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"code-point-at": "^1.0.0",
|
||||
"is-fullwidth-code-point": "^1.0.0",
|
||||
@ -2715,7 +2699,6 @@
|
||||
"strip-ansi": {
|
||||
"version": "3.0.1",
|
||||
"bundled": true,
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"ansi-regex": "^2.0.0"
|
||||
}
|
||||
@ -2754,13 +2737,11 @@
|
||||
},
|
||||
"wrappy": {
|
||||
"version": "1.0.2",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
},
|
||||
"yallist": {
|
||||
"version": "3.0.3",
|
||||
"bundled": true,
|
||||
"optional": true
|
||||
"bundled": true
|
||||
}
|
||||
}
|
||||
},
|
||||
|
296
tests/data/fr/dir3741_001.html
Normal file
296
tests/data/fr/dir3741_001.html
Normal file
File diff suppressed because one or more lines are too long
1
tests/data/fr/dir3741_001.json
Normal file
1
tests/data/fr/dir3741_001.json
Normal file
@ -0,0 +1 @@
|
||||
[{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=2&page=af&id=70","title":"AGENCE FRANCE LOCALE"}]
|
296
tests/data/fr/dir3741_002.html
Normal file
296
tests/data/fr/dir3741_002.html
Normal file
File diff suppressed because one or more lines are too long
1
tests/data/fr/dir3741_002.json
Normal file
1
tests/data/fr/dir3741_002.json
Normal file
@ -0,0 +1 @@
|
||||
[{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=4&page=af&id=85","title":"Amundi"},{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=4&page=af&id=40","title":"Amundi finance"},{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=4&page=af&id=8943","title":"Andbank Monaco S.A.M."}]
|
296
tests/data/fr/dir3741_003.html
Normal file
296
tests/data/fr/dir3741_003.html
Normal file
File diff suppressed because one or more lines are too long
1
tests/data/fr/dir3741_003.json
Normal file
1
tests/data/fr/dir3741_003.json
Normal file
@ -0,0 +1 @@
|
||||
[{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=11&page=af&id=8972","title":"Bank Audi France"},{"link":"/spip.php?type=advanced&id_secteur=3&lang=en&denomination=&siren=&cib=&bic=&nom=&siren_agent=&num=&cat=01-TBR07&retrait=0&pg=11&page=af&id=8935","title":"Bank Julius Baer (Monaco) S.A.M."}]
|
@ -54,6 +54,9 @@ const creditData = jsonfile.readFileSync('tests/data/fr/creditInstituteData.json
|
||||
const breakingCr001 = fs.readFileSync('tests/data/fr/breaking_CI_001.html');
|
||||
const breakingCr001Data = jsonfile.readFileSync('tests/data/fr/breaking_CI_001.json');
|
||||
|
||||
const failure = { 'fail':true };
|
||||
const empty = {};
|
||||
|
||||
test('FRANCE:: Scrape Indexes', async t => {
|
||||
const frScraper = new France();
|
||||
t.test('Extract PI Search links', async t => {
|
||||
@ -218,3 +221,62 @@ test('FRANCE Breaking CI 001', async t => {
|
||||
t.end();
|
||||
});
|
||||
|
||||
test('FRANCE:: DIR-3741', async t => {
|
||||
const dir3741_001 = fs.readFileSync('tests/data/fr/dir3741_001.html');
|
||||
const dir3741_001Data = jsonfile.readFileSync('tests/data/fr/dir3741_001.json');
|
||||
|
||||
const dir3741_002 = fs.readFileSync('tests/data/fr/dir3741_002.html');
|
||||
const dir3741_002Data = jsonfile.readFileSync('tests/data/fr/dir3741_002.json');
|
||||
|
||||
const dir3741_003 = fs.readFileSync('tests/data/fr/dir3741_003.html');
|
||||
const dir3741_003Data = jsonfile.readFileSync('tests/data/fr/dir3741_003.json');
|
||||
|
||||
const frScraper = new France();
|
||||
t.test('FRANCE::Extract Details from Page 1/103', async t => {
|
||||
const $ = cheerio.load(dir3741_001);
|
||||
const $table = $('table.table tr');
|
||||
|
||||
const links = await frScraper.extractLinks($table, true);
|
||||
|
||||
|
||||
|
||||
const linkCount = links.length;
|
||||
|
||||
t.equal(linkCount, 1, 'Scrapes the correct number of links (1)');
|
||||
t.deepEquals(links, dir3741_001Data, 'Links match the data');
|
||||
t.end();
|
||||
});
|
||||
|
||||
t.test('FRANCE::Extract Details from Page 4/103', async t => {
|
||||
const $ = cheerio.load(dir3741_002);
|
||||
const $table = $('table.table tr');
|
||||
|
||||
const links = await frScraper.extractLinks($table, true);
|
||||
|
||||
|
||||
|
||||
const linkCount = links.length;
|
||||
|
||||
t.equal(linkCount, 3, 'Scrapes the correct number of links (3)');
|
||||
t.deepEquals(links, dir3741_002Data, 'Links match the data');
|
||||
t.end();
|
||||
});
|
||||
|
||||
t.test('FRANCE::Extract Details from Page 11/103', async t => {
|
||||
const $ = cheerio.load(dir3741_003);
|
||||
const $table = $('table.table tr');
|
||||
|
||||
const links = await frScraper.extractLinks($table, true);
|
||||
|
||||
|
||||
|
||||
|
||||
const linkCount = links.length;
|
||||
|
||||
t.equal(linkCount, 2, 'Scrapes the correct number of links (2)');
|
||||
t.deepEquals(links, dir3741_003Data, 'Links match the data');
|
||||
t.end();
|
||||
});
|
||||
|
||||
t.end();
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user