mirror of
https://gitlab.silvrtree.co.uk/martind2000/recipes.git
synced 2025-01-10 23:45:07 +00:00
273 lines
6.4 KiB
JavaScript
273 lines
6.4 KiB
JavaScript
/**
|
|
*
|
|
* User: Martin Donnelly
|
|
* Date: 2016-07-05
|
|
* Time: 15:01
|
|
*
|
|
*/
|
|
'use strict';
|
|
|
|
var cheerio = require('cheerio');
|
|
var log4js = require('log4js');
|
|
var logger = log4js.getLogger();
|
|
var STRING = require('string');
|
|
var URL = require('url');
|
|
var converter = require('html-to-markdown');
|
|
|
|
function cleaner(b) {
|
|
var _b = b;
|
|
|
|
var unwanted = [
|
|
'LINK',
|
|
'META',
|
|
'TITLE',
|
|
'div#disqus_thread',
|
|
'SCRIPT',
|
|
'FOOTER',
|
|
'div.ssba',
|
|
'.shareaholic-canvas',
|
|
'.yarpp-related',
|
|
'div.dfad',
|
|
'div.postFooterShare',
|
|
'div#nextPrevLinks',
|
|
'.post-comments',
|
|
'HEADER',
|
|
'.post-title',
|
|
'#side-menu',
|
|
'.footer-container',
|
|
'#pre-footer',
|
|
'#cakephp-global-navigation',
|
|
'.masthead',
|
|
'.breadcrumb-header',
|
|
'.single-recipe-sidebar',
|
|
'#recipe-related-videos',
|
|
'#tnav',
|
|
'.footer',
|
|
'#tb-wrapper',
|
|
'#comments',
|
|
'#menu',
|
|
'aside',
|
|
'#ad-mpu-premium-1-mobile',
|
|
'#recipetools',
|
|
'.adsense-ads-separator',
|
|
'.comments',
|
|
'.related-content',
|
|
'.tip-wrapper',
|
|
'#recipe-related-video-mobile',
|
|
'.float-wrapper',
|
|
'.source-jamie',
|
|
'.ad.mobile',
|
|
'.foodity-wrapper',
|
|
'#ad-most-watched-mobile',
|
|
'#sticky',
|
|
'.nutrition-expand',
|
|
'.grid-list-wrapper',
|
|
'#recipe-finder__box',
|
|
'.browser-upgrade-alert-message',
|
|
'.main-menu',
|
|
'.recipe-media',
|
|
'.method-mobile-prompt-wrapper',
|
|
'.sharebox-wrapper',
|
|
'.jumbotron',
|
|
'.slideshow',
|
|
'.top-cat-recipe',
|
|
'.analytic-box',
|
|
'.recipe-reviews',
|
|
'.recipe-tools',
|
|
'.promo-module',
|
|
'.widgettitle',
|
|
'.post_related'
|
|
|
|
];
|
|
|
|
for (var i = 0; i < unwanted.length; i++) {
|
|
_b.find(unwanted[i]).remove();
|
|
}
|
|
|
|
return _b;
|
|
}
|
|
|
|
|
|
exports = module.exports = {
|
|
doBBCGoodFood: function(body, url) {
|
|
logger.info('GRABBING BBCGoodFood');
|
|
var obj = {}, tdihbody, i, urlObj, urlPrefix;
|
|
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
tdihbody = $('DIV#main-content');
|
|
|
|
logger.debug('Length:' , tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug('Title: ', title);
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
|
|
try {
|
|
tdihbody.find('IMG').each(function(i, elem) {
|
|
var s, src = $(this).attr('src');
|
|
|
|
if (src !== null) {
|
|
if (!STRING(src).startsWith('http')) {
|
|
logger.debug('Stripping:' + src);
|
|
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
|
|
}
|
|
|
|
if (typeof obj.thumbnail === 'undefined') {
|
|
obj.thumbnail = src;
|
|
}
|
|
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr('src', s);
|
|
}
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = STRING(url).trim().s;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
obj.markdown = converter.convert(obj.reduced);
|
|
|
|
return obj;
|
|
},
|
|
doEngadget: function(body, url) {
|
|
logger.info('GRABBING Engadget');
|
|
var obj = {}, tdihbody, i, urlObj, urlPrefix;
|
|
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
tdihbody = $('DIV#page_body');
|
|
|
|
logger.debug('Length:' , tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug('Title: ', title);
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
|
|
try {
|
|
tdihbody.find('IMG').each(function(i, elem) {
|
|
let s, src = $(this).attr('src');
|
|
|
|
if (src !== null) {
|
|
if (!STRING(src).startsWith('http')) {
|
|
logger.debug('Stripping:' + src);
|
|
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
|
|
}
|
|
|
|
if (typeof obj.thumbnail === 'undefined') {
|
|
obj.thumbnail = src;
|
|
}
|
|
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr('src', s);
|
|
}
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = STRING(url).trim().s;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
obj.markdown = converter.convert(obj.reduced);
|
|
|
|
return obj;
|
|
},
|
|
doAndroidDeveloper: function(body, url) {
|
|
logger.info('GRABBING AndroidDeveloper');
|
|
var obj = {}, tdihbody, i, urlObj, urlPrefix;
|
|
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
tdihbody = $('DIV.jd-descr');
|
|
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
urlObj = URL.parse(url);
|
|
urlPrefix = urlObj.protocol + '//' + urlObj.host + '/';
|
|
|
|
try {
|
|
tdihbody.find('IMG').each(function(i, elem) {
|
|
let s, src = $(this).attr('src');
|
|
|
|
if (src !== null) {
|
|
if (!STRING(src).startsWith('http')) {
|
|
logger.debug('Stripping:' + src);
|
|
src = urlPrefix + STRING(src).stripLeft('/').trim().s;
|
|
}
|
|
|
|
if (typeof obj.thumbnail === 'undefined') {
|
|
obj.thumbnail = src;
|
|
}
|
|
|
|
s = 'http://image.silvrtree.co.uk/900,fit/' + src;
|
|
|
|
$(this).attr('src', s);
|
|
}
|
|
|
|
});
|
|
}
|
|
catch (e) {
|
|
logger.error(e);
|
|
}
|
|
|
|
obj.url = STRING(url).trim().s;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
obj.markdown = converter.convert(obj.reduced);
|
|
|
|
return obj;
|
|
},
|
|
doReddit: function(body, url) {
|
|
logger.info('GRABBING REDDIT');
|
|
var obj = {}, tdihbody, i, urlObj, urlPrefix;
|
|
|
|
var $ = cheerio.load(body);
|
|
var title = $('TITLE').text();
|
|
|
|
tdihbody = $('DIV.entry');
|
|
|
|
tdihbody.find('A.thumbnail').each(function(i, elem) {
|
|
|
|
logger.warn($(this));
|
|
});
|
|
|
|
logger.info('++++++');
|
|
// Logger.debug(tdihbody.html());
|
|
|
|
logger.debug(tdihbody.length);
|
|
tdihbody = cleaner(tdihbody);
|
|
logger.debug(title);
|
|
|
|
obj.url = STRING(url).trim().s;
|
|
obj.html = $.html();
|
|
obj.reduced = STRING(tdihbody.html()).trim().s;
|
|
obj.nib = STRING(tdihbody.text()).collapseWhitespace().trim().left(300).s;
|
|
obj.title = STRING(title).collapseWhitespace().s;
|
|
obj.markdown = converter.convert(obj.reduced);
|
|
|
|
return obj;
|
|
}
|
|
};
|