var http = require('http'), sys = require('sys'); module.exports = { 'cleanit': function (req, res) { var r = { // from http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx 'msoTags': /<[\/]?(font|span|xml|del|ins|[ovwxp]:\w+)[^>]*?>/g, 'msoAttributes': /<([^>]*)(?:class|lang|style|size|face|[ovwxp]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/, 'msoParagraphs': /<([^>]*)(?:|[p]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/g, 'crlf': /(\\r\\n)/g }; var front = '\r\n \r\n \r\n
\r\n'); output = output.replace(/(\r\n)/g, ' '); output = output.replace(/(\\r\\n)/g, ' '); output = output.replace(/<\/i>/g, ''); output = output.replace(/[“|”]/g, '"'); output = output.replace(/’/g, '\''); output = output.replace(/…/g, '…'); output = output.replace(/(.*?)<\/i>/g, '$1'); output = output.replace(/(.*?)<\/b>/g, '$1'); output = output.replace(/
\*\*\*<\/p>/g, '
* * *
'); output = output.replace(/CHAPTER\s(\d.?)<\/p>/, '
( |\s|\s<\/em>)<\/p>/g, '');
output = output.replace(/ /g, ' ');
output = output.replace(/ \s<\/em><\/p>/g, '');
output = output.replace(/ \s<\/p>/g, '');
output = output.replace(/\s+/g, ' ');
output = output.replace(/<\/p>/g, '