var http = require('http'), sys = require('sys'); module.exports = { 'cleanit': function (req, res) { var r = { // from http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx 'msoTags': /<[\/]?(font|span|xml|del|ins|[ovwxp]:\w+)[^>]*?>/g, 'msoAttributes': /<([^>]*)(?:class|lang|style|size|face|[ovwxp]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/, 'msoParagraphs': /<([^>]*)(?:|[p]:\w+)=(?:'[^']*'|""[^""]*""|[^\s>]+)([^>]*)>/g, 'crlf': /(\\r\\n)/g }; var front = '\r\n \r\n \r\n \r\n Spellbinder - Chapter \r\n \r\n \r\n \r\n \r\n
\r\n
\r\n'; var back = '
'; var source = req.body.source; // console.log(source); var output = source.replace(r.msoTags, ''); output = output.replace(r.msoParagraphs, '

'); output = output.replace(/(\r\n)/g, ' '); output = output.replace(/(\\r\\n)/g, ' '); output = output.replace(/<\/i>/g, ''); output = output.replace(/[“|”]/g, '"'); output = output.replace(/’/g, '\''); output = output.replace(/…/g, '…'); output = output.replace(/(.*?)<\/i>/g, '$1'); output = output.replace(/(.*?)<\/b>/g, '$1'); output = output.replace(/

\*\*\*<\/p>/g, '

* * *

'); output = output.replace(/

CHAPTER\s(\d.?)<\/p>/, '

$1

'); output = output.replace(/

( |\s|\s<\/em>)<\/p>/g, ''); output = output.replace(/ /g, ' '); output = output.replace(/

\s<\/em><\/p>/g, ''); output = output.replace(/

\s<\/p>/g, ''); output = output.replace(/\s+/g, ' '); output = output.replace(/<\/p>/g, '

\r\n'); // sys.puts(sys.inspect(output, false, null)); res.setHeader('Content-Type', 'application/html'); res.end(front + output + back); } };