old-silvrgit/lib/clean.js

49 lines
2.4 KiB
JavaScript
Raw Normal View History

2016-01-08 11:48:30 +00:00
var http = require('http'), sys = require('sys');
module.exports = {
cleanit: function (req, res) {
var r = {
// from http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx
msoTags: /<[\/]?(font|span|xml|del|ins|[ovwxp]:\w+)[^>]*?>/g,
msoAttributes: /<([^>]*)(?:class|lang|style|size|face|[ovwxp]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/,
msoParagraphs: /<([^>]*)(?:|[p]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/g,
crlf: /(\\r\\n)/g
};
var front = '<?xml version="1.0" encoding="utf-8"?>\r\n <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\r\n <html xmlns="http://www.w3.org/1999/xhtml">\r\n <head>\r\n <title>Spellbinder - Chapter </title>\r\n <link rel="stylesheet" type="text/css" href="imperaWeb.css"/>\r\n <link rel="stylesheet" type= "application/vnd.adobe-page-template+xml" href= "page-template.xpgt"/>\r\n </head>\r\n <body>\r\n <div id="text">\r\n <div class="section" id="xhtmldocuments">\r\n';
var back = ' </div> </div> </body> </html> ';
var source = req.body.source;
// console.log(source);
var output = source.replace(r.msoTags, "");
output = output.replace(r.msoParagraphs, '<p>');
output = output.replace(/(\r\n)/g, " ");
output = output.replace(/(\\r\\n)/g, " ");
output = output.replace(/<i><\/i>/g, "");
output = output.replace(/[“|”]/g, '"');
output = output.replace(//g, "'");
output = output.replace(/…/g, "&#8230;");
output = output.replace(/<i>(.*?)<\/i>/g, "<em>$1</em>");
output = output.replace(/<b>(.*?)<\/b>/g, "<strong>$1</strong>");
output = output.replace(/<p>\*\*\*<\/p>/g, "<p class='break'>* * *</p>");
output = output.replace(/<p>CHAPTER\s(\d.?)<\/p>/, "<h1>$1</h1>");
output = output.replace(/<p>(&nbsp;|\s|<em>\s<\/em>)<\/p>/g, "");
output = output.replace(/&nbsp;/g, " ");
output = output.replace(/<p><em>\s<\/em><\/p>/g, "");
output = output.replace(/<p>\s<\/p>/g, "");
output = output.replace(/\s+/g, " ");
output = output.replace(/<\/p>/g, "</p>\r\n");
// sys.puts(sys.inspect(output, false, null));
res.setHeader('Content-Type', 'application/html');
res.end(front + output + back);
}
};