From 6af81e1885f98411ba9793f0daa825495e35ad55 Mon Sep 17 00:00:00 2001 From: martind2000 Date: Fri, 8 Jan 2016 14:53:49 +0000 Subject: [PATCH] Added event scraping --- lib/events.js | 72 +++++++++++++++++++++++++++++++++++++++ package.json | 4 +++ scrapetest.js | 57 +++++++++++++++++++++++++++++++ views/pages/events.ejs | 25 ++++++++++++++ views/partials/footer.ejs | 3 ++ views/partials/head.ejs | 57 +++++++++++++++++++++++++++++++ web-server.js | 7 +++- 7 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 lib/events.js create mode 100644 scrapetest.js create mode 100644 views/pages/events.ejs create mode 100644 views/partials/footer.ejs create mode 100644 views/partials/head.ejs diff --git a/lib/events.js b/lib/events.js new file mode 100644 index 0000000..ba8ec93 --- /dev/null +++ b/lib/events.js @@ -0,0 +1,72 @@ +var http = require('http'), request = require('request'), cheerio = require('cheerio'); + +var eventCache = { + last: 0, + data: {}, + expire: ((60 * 60) * 12) * 1000 +}; + + + +module.exports = { + getEvents: function (req, res) { + + console.log('Getting events...'); + var j=[], url = 'https://www.list.co.uk/events/days-out/when:this%20weekend/location:Dumbarton(55.9460,-4.5556)/distance:20/'; + + var now = new Date(); + + if ((now - eventCache.last) > eventCache.expire) + { + request(url, function(err, resp, body) { + if (err) + throw err; + $ = cheerio.load(body); + // console.log($); + // TODO: scraping goes here! + + $('.resultsRow').each( function(div) + { + var item={}; + var eventSummary = $(this).find('.eventSummary').first(); + var byDate = $(this).find('.byDate').first(); + + var title = eventSummary.find('.head').first(); + var description = eventSummary.find('P').first(); + var link = ' https://www.list.co.uk' + eventSummary.find('A').first().attr('href'); + + var price = byDate.find('.price').first(); + var dt = byDate.find('.dtstart').first().attr('title'); + + + item.title = title.text(); + item.description = description.text(); + item.link = link; + item.price = price.text(); + item.date = dt; + + j.push(item); + }); + + eventCache.last = now; + eventCache.data = j; + + res.render('pages/events',eventCache ); + + }); + } else + { + console.log('Using event cache...'); + + res.render('pages/events',eventCache ); + } + + + + + + } + +}; + + diff --git a/package.json b/package.json index c240d07..30331a6 100644 --- a/package.json +++ b/package.json @@ -2,12 +2,16 @@ "name": "silvrtree", "version": "0.1.1", "devDependencies": { + "cheerio": "^0.19.0", + "ejs": "^2.3.4", "htmlparser": "^1.7.7", "mammoth": "^0.3.25-pre.1", + "request": "^2.67.0", "wordsoap": "^0.2.0" }, "dependencies": { "express": "3.x", + "scrape": "^0.2.3", "unstyler": "^0.2.2" } } diff --git a/scrapetest.js b/scrapetest.js new file mode 100644 index 0000000..dfd3435 --- /dev/null +++ b/scrapetest.js @@ -0,0 +1,57 @@ +var request = require('request'); +var cheerio = require('cheerio'); + +var url = 'https://www.list.co.uk/events/days-out/when:this%20weekend/location:Dumbarton(55.9460,-4.5556)/distance:20/'; + + +var j=[]; + + +request(url, function(err, resp, body) { + if (err) + throw err; + $ = cheerio.load(body); + console.log($); + // TODO: scraping goes here! + + $('.resultsRow').each( function(div) + { + var item={}; + var eventSummary = $(this).find('.eventSummary').first(); + var byDate = $(this).find('.byDate').first(); + + var title = eventSummary.find('.head').first(); + var description = eventSummary.find('P').first(); + var link = ' https://www.list.co.uk' + eventSummary.find('A').first().attr('href'); + + var price = byDate.find('.price').first(); + var dt = byDate.find('.dtstart').first().attr('title'); + console.log('+++'); + // console.log($(this).html()); + console.log('###'); + console.log(description.text()); + console.log(link); + console.log('---'); + + item.title = title.text(); + item.description = description.text(); + item.link = link; + item.price = price.text(); + item.date = dt; + + j.push(item); + }); + + console.log(j); +}); + +/* + + https://www.list.co.uk/event/351218-pollock-parkrun/ + +
+ +

Pollock parkrun

+
+

An informal weekly 5k run in Pollok Country Park. Everyone is welcome, no matter how fast or slow (you're welcome to walk the route, bring your dog or push a buggy), so you can use it as a one-off fitness test, a chance to get some fresh air or come every week to try to beat your personal best time. It's friendly and…

+
*/ diff --git a/views/pages/events.ejs b/views/pages/events.ejs new file mode 100644 index 0000000..4a5e9d4 --- /dev/null +++ b/views/pages/events.ejs @@ -0,0 +1,25 @@ +<% include ../partials/head %> + +
+
+
Events
+
+ + +
+ + <% + for (var i = 0; i < data.length; i++) { %> +
+
<%= data[i].title %>
+

<%= data[i].description %>

+

<%= data[i].date %>

+ +
+ + <% } %> + +
+
+ +<% include ../partials/footer %> diff --git a/views/partials/footer.ejs b/views/partials/footer.ejs new file mode 100644 index 0000000..9870926 --- /dev/null +++ b/views/partials/footer.ejs @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/views/partials/head.ejs b/views/partials/head.ejs new file mode 100644 index 0000000..afcde36 --- /dev/null +++ b/views/partials/head.ejs @@ -0,0 +1,57 @@ + + + + + + + + Events + + + + + + + + \ No newline at end of file diff --git a/web-server.js b/web-server.js index bcd4028..f72cd60 100644 --- a/web-server.js +++ b/web-server.js @@ -1,6 +1,6 @@ var express = require('express'), path = require('path'), http = require('http'), fx = require('./lib/fx'), btc = require('./lib/btc'), train = require('./lib/train'), - password = require('./lib/password') , clean = require('./lib/clean') + password = require('./lib/password') , clean = require('./lib/clean'), events = require('./lib/events') //train = require('lib/train') /* ,submit = require('./routes/mongo/submit') */ ; @@ -10,6 +10,7 @@ var btcCache = {}, fxCache = {} , trainCache = {}; app.configure(function () { app.set('port', process.env.PORT || 4545); + app.set('view engine', 'ejs'); app.use(express.logger('dev')); app.use(express.cookieParser()); app.use(express.session({secret: '1234567890QWERTY'})); @@ -40,6 +41,10 @@ app.configure(function () { app.use('/cleanit', clean.cleanit); + + + app.use('/events', events.getEvents); + app.use('/lot', function (req, res) { var pg = require('pg');