var http = require('http'), sys = require('sys'); module.exports = { cleanit: function (req, res) { var r = { // from http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx msoTags: /<[\/]?(font|span|xml|del|ins|[ovwxp]:\w+)[^>]*?>/g, msoAttributes: /<([^>]*)(?:class|lang|style|size|face|[ovwxp]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/, msoParagraphs: /<([^>]*)(?:|[p]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/g, crlf: /(\\r\\n)/g }; var front = '\r\n \r\n \r\n \r\n Spellbinder - Chapter \r\n \r\n \r\n \r\n \r\n
\r\n
\r\n'; var back = '
'; var source = req.body.source; // console.log(source); var output = source.replace(r.msoTags, ""); output = output.replace(r.msoParagraphs, '

'); output = output.replace(/(\r\n)/g, " "); output = output.replace(/(\\r\\n)/g, " "); output = output.replace(/<\/i>/g, ""); output = output.replace(/[“|”]/g, '"'); output = output.replace(/’/g, "'"); output = output.replace(/…/g, "…"); output = output.replace(/(.*?)<\/i>/g, "$1"); output = output.replace(/(.*?)<\/b>/g, "$1"); output = output.replace(/

\*\*\*<\/p>/g, "

* * *

"); output = output.replace(/

CHAPTER\s(\d.?)<\/p>/, "

$1

"); output = output.replace(/

( |\s|\s<\/em>)<\/p>/g, ""); output = output.replace(/ /g, " "); output = output.replace(/

\s<\/em><\/p>/g, ""); output = output.replace(/

\s<\/p>/g, ""); output = output.replace(/\s+/g, " "); output = output.replace(/<\/p>/g, "

\r\n"); // sys.puts(sys.inspect(output, false, null)); res.setHeader('Content-Type', 'application/html'); res.end(front + output + back); } };