127 lines
2.6 KiB
JavaScript
127 lines
2.6 KiB
JavaScript
const cheerio = require('cheerio');
|
|
const fecha = require('fecha');
|
|
|
|
const dateRegX = /(\d{2}-\d{2}-\d{4})/;
|
|
|
|
function scrapeResults(html) {
|
|
const outArray = [];
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const rawUrl = $('meta[property=\'og:url\']').attr('content');
|
|
|
|
const rawDate = rawUrl.match(dateRegX)[0];
|
|
|
|
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
|
|
|
|
outArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
|
|
|
|
const cells = $('#ballsAscending li');
|
|
|
|
if ($(cells).length > 0)
|
|
|
|
cells.each((i, item) => {
|
|
outArray.push(parseInt($(item).text(), 10));
|
|
});
|
|
|
|
return outArray;
|
|
}
|
|
|
|
/*
|
|
<div class="archives">
|
|
|
|
<a class="title" href="/results/28-12-2018" title="More detailed information about the EuroMillons draw that took place on December 28th 2018 "><span>Friday</span><br>28th December 2018</a>
|
|
|
|
<ul class="balls small">
|
|
|
|
<li class="new ball">13</li>
|
|
|
|
<li class="new ball">16</li>
|
|
|
|
<li class="new ball">34</li>
|
|
|
|
<li class="new ball">35</li>
|
|
|
|
<li class="new ball">45</li>
|
|
|
|
<li class="new lucky-star">10</li>
|
|
|
|
<li class="new lucky-star">12</li>
|
|
|
|
</ul>
|
|
|
|
<div class="raffle">
|
|
<div class="title">Millionaire Maker:</div>
|
|
<div class="box">2 Codes</div>
|
|
</div>
|
|
|
|
</div>
|
|
*/
|
|
|
|
function scrapeArchive(html) {
|
|
const outArray = [];
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const rawArchives = $('div.archives');
|
|
|
|
rawArchives.each((i, item) => {
|
|
const wArray = [];
|
|
|
|
const rawUrl = $(item).find('a').attr('href');
|
|
const rawDate = rawUrl.match(dateRegX)[0];
|
|
|
|
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
|
|
|
|
wArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
|
|
|
|
const cells = $(item).find('ul li');
|
|
|
|
if ($(cells).length > 0)
|
|
|
|
cells.each((i, item) => {
|
|
wArray.push(parseInt($(item).text(), 10));
|
|
});
|
|
|
|
outArray.push(wArray);
|
|
});
|
|
|
|
return outArray;
|
|
}
|
|
|
|
function scrapeSingle(html) {
|
|
const outArray = [];
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const rawArchives = $('div.archives');
|
|
|
|
const wArray = [];
|
|
|
|
const item = $(rawArchives).first();
|
|
|
|
console.log('>>', $(item).html());
|
|
|
|
const rawUrl = $(item).find('a').attr('href');
|
|
const rawDate = rawUrl.match(dateRegX)[0];
|
|
|
|
const newDate = fecha.parse(rawDate, 'DD-MM-YYYY');
|
|
|
|
wArray.push(fecha.format(newDate, 'YYYY-MM-DD'));
|
|
|
|
|
|
const cells = $(item).find('ul li');
|
|
|
|
if ($(cells).length > 0)
|
|
|
|
cells.each((i, item) => {
|
|
wArray.push(parseInt($(item).text(), 10));
|
|
});
|
|
|
|
outArray.push(wArray);
|
|
|
|
return outArray;
|
|
}
|
|
|
|
module.exports = { scrapeResults, scrapeArchive, scrapeSingle };
|