135 lines
3.7 KiB
JavaScript
135 lines
3.7 KiB
JavaScript
const cheerio = require('cheerio');
|
|
|
|
const logger = require('log4js').getLogger('Euronews 🔧');
|
|
|
|
const { get, isEmpty } = require('lodash');
|
|
logger.level = 'debug';
|
|
|
|
const htmlTidy = /<(\/*?)(?!(em|p|br\s*\/|strong|h1|h2|h3))\w+?.+?>/gim;
|
|
|
|
function reduceArticle(body = '') {
|
|
if (body === '') return {};
|
|
|
|
const obj = {};
|
|
const $ = cheerio.load(body);
|
|
|
|
const title = $('meta[property="og:title"]').attr('content');
|
|
const image = `https://image.silvrtree.co.uk/640,fit,q80/${ $('meta[property="og:image"]').attr('content')}`;
|
|
|
|
const stuff = $('[itemprop="articleBody"]');
|
|
|
|
const html = [];
|
|
|
|
const content = $('div.c-article-content');
|
|
const ampAddStripper = /<\s*amp-ad(\s+.*?>|>).*?<\s*\/\s*amp-ad\s*>/ig;
|
|
|
|
$(content).find('amp-ad').remove();
|
|
|
|
$(content).find('div.widget__wrapper').remove();
|
|
$(content).find('div.widget').remove();
|
|
|
|
for (let top = 0, topLen = content.length; top < topLen; top++) {
|
|
const children = $(content[top]).children();
|
|
|
|
for (let index = 0, len = children.length; index < len; index++) {
|
|
let line = $.html($(children[index])).replace('amp-img', 'img');
|
|
const tag = children[index].name;
|
|
|
|
const symbol = /src=(['"])(http[s]?:\/\/)/.exec(line) || [];
|
|
|
|
|
|
|
|
if (tag === 'amp-twitter') {
|
|
const tweetid = $(children[index]).data('tweetid');
|
|
line = `<amp-twitter width="375"
|
|
height="472"
|
|
layout="responsive" data-tweetid="${tweetid}" > </amp-twitter>`;
|
|
}
|
|
// logger.debug(symbol);
|
|
|
|
if (symbol.length !== 0)
|
|
line = line.replace(/src=['"]http[s]?:\/\//, `src=${symbol[1]}https://image.silvrtree.co.uk/640,fit,q80/${symbol[2]}`);
|
|
|
|
html.push(line);
|
|
}
|
|
}
|
|
|
|
html.push('<div class="endbumper"></div>');
|
|
// const outputHTML = html.join('').replace(htmlTidy, '');
|
|
const outputHTML = html.join('');
|
|
|
|
console.log(outputHTML);
|
|
|
|
obj.title = title;
|
|
obj.image = image;
|
|
obj.html = outputHTML;
|
|
|
|
logger.debug(JSON.stringify(obj));
|
|
|
|
return obj;
|
|
}
|
|
|
|
function reduceArticleV2(body = '') {
|
|
if (body === '') return {};
|
|
|
|
const obj = {};
|
|
const $ = cheerio.load(body);
|
|
|
|
$('amp-ad').remove();
|
|
|
|
const title = $('meta[property="og:title"]').attr('content');
|
|
const image = `https://image.silvrtree.co.uk/640,fit,q80/${ $('meta[property="og:image"]').attr('content')}`;
|
|
|
|
const stuff = $('[itemprop="articleBody"]');
|
|
|
|
const html = [];
|
|
|
|
const content = $('div.c-article-content');
|
|
const ampAddStripper = /<\s*amp-ad(\s+.*?>|>).*?<\s*\/\s*amp-ad\s*>/ig;
|
|
|
|
$(content).find('amp-ad').remove();
|
|
|
|
$(content).find('div.widget__wrapper').remove();
|
|
|
|
for (let top = 0, topLen = content.length; top < topLen; top++) {
|
|
const children = $(content[top]).children();
|
|
|
|
for (let index = 0, len = children.length; index < len; index++) {
|
|
let line = $.html($(children[index])).replace('amp-img', 'img');
|
|
const tag = children[index].name;
|
|
|
|
const symbol = /src=(['"])(http[s]?:\/\/)/.exec(line) || [];
|
|
|
|
if (tag === 'amp-twitter') {
|
|
const tweetid = $(children[index]).data('tweetid');
|
|
line = `<amp-twitter width="375"
|
|
height="472"
|
|
layout="responsive" data-tweetid="${tweetid}" > </amp-twitter>`;
|
|
}
|
|
// logger.debug(symbol);
|
|
|
|
if (symbol.length !== 0)
|
|
line = line.replace(/src=['"]http[s]?:\/\//, `src=${symbol[1]}https://image.silvrtree.co.uk/640,fit,q80/${symbol[2]}`);
|
|
|
|
html.push(line);
|
|
}
|
|
}
|
|
|
|
html.push('<div class="endbumper"></div>');
|
|
// const outputHTML = html.join('').replace(htmlTidy, '');
|
|
const outputHTML = html.join('');
|
|
|
|
console.log(outputHTML);
|
|
|
|
obj.title = title;
|
|
obj.image = image;
|
|
obj.html = outputHTML;
|
|
|
|
logger.debug(JSON.stringify(obj));
|
|
|
|
return obj;
|
|
}
|
|
|
|
module.exports = { reduceArticle, reduceArticleV2 };
|
|
|