var http = require('http'), sys = require('sys'); module.exports = { cleanit: function (req, res) { var r = { // from http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx msoTags: /<[\/]?(font|span|xml|del|ins|[ovwxp]:\w+)[^>]*?>/g, msoAttributes: /<([^>]*)(?:class|lang|style|size|face|[ovwxp]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/, msoParagraphs: /<([^>]*)(?:|[p]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/g, crlf: /(\\r\\n)/g }; var front = '\r\n \r\n \r\n
\r\n'); output = output.replace(/(\r\n)/g, " "); output = output.replace(/(\\r\\n)/g, " "); output = output.replace(/<\/i>/g, ""); output = output.replace(/[“|”]/g, '"'); output = output.replace(/’/g, "'"); output = output.replace(/…/g, "…"); output = output.replace(/(.*?)<\/i>/g, "$1"); output = output.replace(/(.*?)<\/b>/g, "$1"); output = output.replace(/
\*\*\*<\/p>/g, "
* * *
"); output = output.replace(/CHAPTER\s(\d.?)<\/p>/, "
( |\s|\s<\/em>)<\/p>/g, "");
output = output.replace(/ /g, " ");
output = output.replace(/ \s<\/em><\/p>/g, "");
output = output.replace(/ \s<\/p>/g, "");
output = output.replace(/\s+/g, " ");
output = output.replace(/<\/p>/g, "