Added event scraping

This commit is contained in:
martind2000 2016-01-08 14:53:49 +00:00
parent d9e8f91fac
commit 6af81e1885
7 changed files with 224 additions and 1 deletions

72
lib/events.js Normal file
View File

@ -0,0 +1,72 @@
var http = require('http'), request = require('request'), cheerio = require('cheerio');
var eventCache = {
last: 0,
data: {},
expire: ((60 * 60) * 12) * 1000
};
module.exports = {
getEvents: function (req, res) {
console.log('Getting events...');
var j=[], url = 'https://www.list.co.uk/events/days-out/when:this%20weekend/location:Dumbarton(55.9460,-4.5556)/distance:20/';
var now = new Date();
if ((now - eventCache.last) > eventCache.expire)
{
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
// console.log($);
// TODO: scraping goes here!
$('.resultsRow').each( function(div)
{
var item={};
var eventSummary = $(this).find('.eventSummary').first();
var byDate = $(this).find('.byDate').first();
var title = eventSummary.find('.head').first();
var description = eventSummary.find('P').first();
var link = ' https://www.list.co.uk' + eventSummary.find('A').first().attr('href');
var price = byDate.find('.price').first();
var dt = byDate.find('.dtstart').first().attr('title');
item.title = title.text();
item.description = description.text();
item.link = link;
item.price = price.text();
item.date = dt;
j.push(item);
});
eventCache.last = now;
eventCache.data = j;
res.render('pages/events',eventCache );
});
} else
{
console.log('Using event cache...');
res.render('pages/events',eventCache );
}
}
};

View File

@ -2,12 +2,16 @@
"name": "silvrtree",
"version": "0.1.1",
"devDependencies": {
"cheerio": "^0.19.0",
"ejs": "^2.3.4",
"htmlparser": "^1.7.7",
"mammoth": "^0.3.25-pre.1",
"request": "^2.67.0",
"wordsoap": "^0.2.0"
},
"dependencies": {
"express": "3.x",
"scrape": "^0.2.3",
"unstyler": "^0.2.2"
}
}

57
scrapetest.js Normal file
View File

@ -0,0 +1,57 @@
var request = require('request');
var cheerio = require('cheerio');
var url = 'https://www.list.co.uk/events/days-out/when:this%20weekend/location:Dumbarton(55.9460,-4.5556)/distance:20/';
var j=[];
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
console.log($);
// TODO: scraping goes here!
$('.resultsRow').each( function(div)
{
var item={};
var eventSummary = $(this).find('.eventSummary').first();
var byDate = $(this).find('.byDate').first();
var title = eventSummary.find('.head').first();
var description = eventSummary.find('P').first();
var link = ' https://www.list.co.uk' + eventSummary.find('A').first().attr('href');
var price = byDate.find('.price').first();
var dt = byDate.find('.dtstart').first().attr('title');
console.log('+++');
// console.log($(this).html());
console.log('###');
console.log(description.text());
console.log(link);
console.log('---');
item.title = title.text();
item.description = description.text();
item.link = link;
item.price = price.text();
item.date = dt;
j.push(item);
});
console.log(j);
});
/*
https://www.list.co.uk/event/351218-pollock-parkrun/
<div class="eventSummary clearfix noImage">
<a href="/event/351218-pollock-parkrun/">
<h2 class="head">Pollock parkrun</h2>
</a>
<p>An informal weekly 5k run in Pollok Country Park. Everyone is welcome, no matter how fast or slow (you&apos;re welcome to walk the route, bring your dog or push a buggy), so you can use it as a one-off fitness test, a chance to get some fresh air or come every week to try to beat your personal best time. It&apos;s friendly and&#x2026;</p>
</div>*/

25
views/pages/events.ejs Normal file
View File

@ -0,0 +1,25 @@
<% include ../partials/head %>
<div class="mui-container">
<div class="mui-panel">
<div class="mui-text-headline mui-text-accent">Events</div>
</div>
<div id="container" class="mui-panel">
<%
for (var i = 0; i < data.length; i++) { %>
<div class="mui-row">
<div><a href='<%= data[i].link %>'> <%= data[i].title %> </a></div>
<p><%= data[i].description %></p>
<p><%= data[i].date %></p>
</div>
<% } %>
</div>
</div>
<% include ../partials/footer %>

View File

@ -0,0 +1,3 @@
</body>
</html>

57
views/partials/head.ejs Normal file
View File

@ -0,0 +1,57 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
<meta name="viewport" content="width=360; initial-scale=1;">
<meta charset="UTF-8">
<title>Events</title>
<meta name="Author" content="" />
<link rel="stylesheet" type="text/css" href="css/mui.css">
<style>
ul {
margin: 0;
padding: 0;
}
li {
display: inline;
margin: 0;
padding: 0 4px 0 0;
}
.dates {
padding: 2px;
border: solid 1px #80007e;
background-color: #ffffff;
}
#btc, #fx {
font-size: 75%;
}
.up, .ontime {
color: darkgreen;
}
.down, .delayed {
color: darkred;
}
.nochange {
color: #000000;
}
.password {
border: 1px solid #cccccc;
background-color: #efefef;
font-family: monospace;
white-space: pre;
}
</style>
<script src="//cdnjs.cloudflare.com/ajax/libs/zepto/1.1.4/zepto.min.js"></script>
</head>
<body>

View File

@ -1,6 +1,6 @@
var express = require('express'), path = require('path'), http = require('http'),
fx = require('./lib/fx'), btc = require('./lib/btc'), train = require('./lib/train'),
password = require('./lib/password') , clean = require('./lib/clean')
password = require('./lib/password') , clean = require('./lib/clean'), events = require('./lib/events')
//train = require('lib/train')
/* ,submit = require('./routes/mongo/submit') */
;
@ -10,6 +10,7 @@ var btcCache = {}, fxCache = {} , trainCache = {};
app.configure(function () {
app.set('port', process.env.PORT || 4545);
app.set('view engine', 'ejs');
app.use(express.logger('dev'));
app.use(express.cookieParser());
app.use(express.session({secret: '1234567890QWERTY'}));
@ -40,6 +41,10 @@ app.configure(function () {
app.use('/cleanit', clean.cleanit);
app.use('/events', events.getEvents);
app.use('/lot', function (req, res) {
var pg = require('pg');