lot/libs/scraper.js
2021-01-02 19:30:30 +00:00

127 lines
2.6 KiB
JavaScript

const cheerio = require('cheerio');
const fecha = require('fecha');
const dateRegX = /(\d{2}-\d{2}-\d{4})/;
function scrapeResults(html) {
const outArray = [];
const $ = cheerio.load(html);
const rawUrl = $('meta[property=\'og:url\']').attr('content');
const rawDate = rawUrl.match(dateRegX)[0];
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
outArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
const cells = $('#ballsAscending li');
if ($(cells).length > 0)
cells.each((i, item) => {
outArray.push(parseInt($(item).text(), 10));
});
return outArray;
}
/*
<div class="archives">
<a class="title" href="/results/28-12-2018" title="More detailed information about the EuroMillons draw that took place on December 28th 2018 "><span>Friday</span><br>28th December 2018</a>
<ul class="balls small">
<li class="new ball">13</li>
<li class="new ball">16</li>
<li class="new ball">34</li>
<li class="new ball">35</li>
<li class="new ball">45</li>
<li class="new lucky-star">10</li>
<li class="new lucky-star">12</li>
</ul>
<div class="raffle">
<div class="title">Millionaire Maker:</div>
<div class="box">2 Codes</div>
</div>
</div>
*/
function scrapeArchive(html) {
const outArray = [];
const $ = cheerio.load(html);
const rawArchives = $('div.archives');
rawArchives.each((i, item) => {
const wArray = [];
const rawUrl = $(item).find('a').attr('href');
const rawDate = rawUrl.match(dateRegX)[0];
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
wArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
const cells = $(item).find('ul li');
if ($(cells).length > 0)
cells.each((i, item) => {
wArray.push(parseInt($(item).text(), 10));
});
outArray.push(wArray);
});
return outArray;
}
function scrapeSingle(html) {
const outArray = [];
const $ = cheerio.load(html);
const rawArchives = $('div.archives');
const wArray = [];
const item = $(rawArchives).first();
console.log('>>', $(item).html());
const rawUrl = $(item).find('a').attr('href');
const rawDate = rawUrl.match(dateRegX)[0];
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
wArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
const cells = $(item).find('ul li');
if ($(cells).length > 0)
cells.each((i, item) => {
wArray.push(parseInt($(item).text(), 10));
});
outArray.push(wArray);
return outArray;
}
module.exports = { scrapeResults, scrapeArchive, scrapeSingle };