const cheerio = require('cheerio'); const logger = require('log4js').getLogger('Euronews 🔧'); const { get, isEmpty } = require('lodash'); logger.level = 'debug'; const htmlTidy = /<(\/*?)(?!(em|p|br\s*\/|strong|h1|h2|h3))\w+?.+?>/gim; function reduceArticle(body = '') { if (body === '') return {}; const obj = {}; const $ = cheerio.load(body); const title = $('meta[property="og:title"]').attr('content'); const image = `https://image.silvrtree.co.uk/640,fit,q80/${ $('meta[property="og:image"]').attr('content')}`; const stuff = $('[itemprop="articleBody"]'); const html = []; const content = $('div.article__content'); for (let top = 0, topLen = content.length; top < topLen; top++) { const children = $(content[top]).children(); for (let index = 0, len = children.length; index < len; index++) { let line = $.html($(children[index])).replace('amp-img', 'img'); const tag = children[index].name; const symbol = /src=(['"])(http[s]?:\/\/)/.exec(line) || []; if (tag === 'amp-twitter') { const tweetid = $(children[index]).data('tweetid'); line = ` `; } // logger.debug(symbol); if (symbol.length !== 0) line = line.replace(/src=['"]http[s]?:\/\//, `src=${symbol[1]}https://image.silvrtree.co.uk/640,fit,q80/${symbol[2]}`); html.push(line); } } html.push('
'); // const outputHTML = html.join('').replace(htmlTidy, ''); const outputHTML = html.join(''); obj.title = title; obj.image = image; obj.html = outputHTML; logger.debug(JSON.stringify(obj)); return obj; } module.exports = { reduceArticle };