From f0de17cd3b86b813d6dd4bbf72f167ed49f1b78e Mon Sep 17 00:00:00 2001 From: martind2000 Date: Wed, 29 May 2019 16:04:31 +0100 Subject: [PATCH] 2019-05-29 --- .../Note 2019-05-22T10.10.51.md | 20 ++++++++ tasks/DIN-136 Batch2/it.js.md | 20 ++++++++ .../Note 2019-05-29T11.34.48.md | 44 ++++++++++++++++++ tasks/notes.sqlite | Bin 13312 -> 13312 bytes tasks/trash/10 | 3 ++ 5 files changed, 87 insertions(+) create mode 100644 tasks/DIN-136 Batch2/Note 2019-05-22T10.10.51.md create mode 100644 tasks/DIN-136 Batch2/it.js.md create mode 100644 tasks/DIN-329 NL Fixes/Note 2019-05-29T11.34.48.md create mode 100644 tasks/trash/10 diff --git a/tasks/DIN-136 Batch2/Note 2019-05-22T10.10.51.md b/tasks/DIN-136 Batch2/Note 2019-05-22T10.10.51.md new file mode 100644 index 0000000..aaa36f2 --- /dev/null +++ b/tasks/DIN-136 Batch2/Note 2019-05-22T10.10.51.md @@ -0,0 +1,20 @@ +Note 2019-05-22T10.10.51 +======================== + +This has been having random crashes on PPE so it has failed to complete for a while. Logging information wasn't being useful. It was returning that an error had occured bu did not display what the error was or where it was located. + +I have gone through the code and added some additional error catching and triggering for he restart and ran it locally yesterday. It completed with the only issue being when I was moving the laptop between rooms. + +I will check this in and hopefully have it moved to PPE this wek. + + +2019-05-23T14:46:30.012] + +2019-05-23T14:48:07.532] + +[2019-05-23T15:07:37.821] + +[2019-05-23T15:09:15.341] [Level { level: 20000, levelStr: 'INFO', colour: 'green' }] (IT) - We didnt transition back correctly, forcing another click.. + +[2019-05-23T15:10:52.845] [Level { level: 20000, levelStr: 'INFO', colour: 'green' }] (IT) - We didnt transition back correctly, forcing another click.. +[2019- \ No newline at end of file diff --git a/tasks/DIN-136 Batch2/it.js.md b/tasks/DIN-136 Batch2/it.js.md new file mode 100644 index 0000000..82fb50a --- /dev/null +++ b/tasks/DIN-136 Batch2/it.js.md @@ -0,0 +1,20 @@ +290 + + + + // wait for loading shroud to go away + await this.page.waitForSelector('div.loading', { 'visible':false, 'timeout':25000 }); + + let btnSuccess = false; + let breakCount = 0; + do { + await this.page.waitForSelector('button.btn.btn-success', { 'visible':true, 'timeout':45000 }).then(async (elm) => { + await elm.click({ 'delay':Scraper.notARobot() }); + }).catch(() => { + btnSuccess = true; + }); + await this._randomWait(this.page, 1, 1, 'preparePSSearch btnSuccess'); + breakCount++; + } + + while(!btnSuccess && breakCount < 5); \ No newline at end of file diff --git a/tasks/DIN-329 NL Fixes/Note 2019-05-29T11.34.48.md b/tasks/DIN-329 NL Fixes/Note 2019-05-29T11.34.48.md new file mode 100644 index 0000000..da6efeb --- /dev/null +++ b/tasks/DIN-329 NL Fixes/Note 2019-05-29T11.34.48.md @@ -0,0 +1,44 @@ +Note 2019-05-29T11.34.48 +======================== + +As discussed, I've added all the defects linked to the Netherlands scrapper to this ticket: + +Defect 001 - Json file data and main page screenshot are missing for CI entity: ABN AMRO Groenbank BV +Attached screenshots NL_Defect_001 and NL_Defect_001a are linked to the above issue + +Defect 002 - Not all 'Category' data is captured in the JSON file for some CI. Bank is captured but the rest are ignored. +Attached screenshots NL_Defect_002 is linked to the above issue + +Defect 003 - JSON files data and main page screenshots are missing for these PI entities: +detail.jsp?id=4366080d9645e911811b005056b60a9d&locale=en_GB +detail.jsp?id=bf85dc049745e911811b005056b60a9d&locale=en_GB +detail.jsp?id=bf85dc049745e911811b005056b60a9d&locale=en_GB +Attached screenshots NL_Defect_003 and NL_Defect_003a are linked to the above issue + +The entity we currently have on the NCA register website was also not scrapped: +detail.jsp?id=8d41e2ab5948e311b55a005056b672cf +Attached screenshots NL_Defect_003b is linked to the above issue + + + +Defect 1 +--- +The 'CS' ABN Amro Groenbank B V is indexed but not processed + + +Defect 2 +--- +NIBC Bank N.V. has a category with 2 items but one item is logged. + + + + +Defect 3 +--- +Coliding filenames + +**Solution** + +The ID query string value is taken from the href link, and the first 8 characters are extracted as a short hash. + +This short hash is added to the filename when it is created. \ No newline at end of file diff --git a/tasks/notes.sqlite b/tasks/notes.sqlite index d4e22e81e98c883af4ae1633e8836751bbcd23f1..42c462292fb714baa5f8861c4e03ebc93200cf8f 100644 GIT binary patch delta 92 zcmZq3Xvml#%~&>3#+k8fW5PmiX$A%cHl|8urX4`yBy$V1J<~5H8^%5m+}OCAar1Rv tWkx1vuE~!4B3woxh6Z{jW_l(DljHfNxr`JHEUiq;tW1qI+w&h(0RXAD7<~W$ delta 79 zcmZq3Xvml#%~(27#+k8nW5PmiX{PTCtW1>*Ogoq=nNKpeFxxX#GTAWpf#Al*-He;B X^C~kkdQ5CIIA5;MVqN5bq diff --git a/tasks/trash/10 b/tasks/trash/10 new file mode 100644 index 0000000..f57698a --- /dev/null +++ b/tasks/trash/10 @@ -0,0 +1,3 @@ +Note 2019-05-22T10.46.40 +======================== +