Rinser/bayes.js

116 lines
2.3 KiB
JavaScript
Raw Normal View History

2017-05-30 08:23:57 +00:00
/**
* Created by mdonnel on 18/05/2017.
*/
let outputFile = 'jobs-special';
let log4js = require('log4js');
let logger = log4js.getLogger();
const stopwords = require('stopwords-en');
let jsonfile = require('jsonfile');
var striptags = require('striptags');
let strippedArray = [];
2017-05-31 14:47:53 +00:00
let corpus = {};
2017-05-30 08:23:57 +00:00
function getNoneStopWords(sentence) {
var common = getStopWords();
var wordArr = sentence.match(/\w+/g),
commonObj = {},
uncommonArr = [],
word, i;
for (i = 0; i < common.length; i++) {
commonObj[ common[i].trim() ] = true;
}
for (i = 0; i < wordArr.length; i++) {
word = wordArr[i].trim().toLowerCase();
if (!commonObj[word]) {
uncommonArr.push(word);
}
}
return uncommonArr;
}
function getStopWords() {
return stopwords;
}
function processFile(data) {
// console.log(stopwords);
let fileName = ".\\dist\\stripped.json";
console.log(fileName);
for (let item of data.items) {
let description = striptags(item.description);
let stripped = getNoneStopWords(description);
strippedArray.push(stripped);
}
//let o = JSON.stringify({s:strippedArray});
//console.log(strippedArray);
jsonfile.writeFile(fileName, strippedArray,function(err, obj) {
console.error(err);
console.log(obj);
});
}
2017-05-31 14:47:53 +00:00
function buildCorpus() {
console.log('buildCorpus');
console.log(strippedArray.length);
let fileName = ".\\dist\\corpus.json";
for(let i of strippedArray) {
//console.log(i);
for(let t of i) {
//console.log(t);
if (!corpus.hasOwnProperty(t)) {
corpus[t] = {score:0, common:1};
} else {
corpus[t].common++;
}
}
}
jsonfile.writeFile(fileName, corpus,function(err, obj) {
console.error(err);
console.log(obj);
});
}
function loadCorpus() {
let fileName = ".\\dist\\corpus.json";
console.log(fileName);
let data = jsonfile.readFile(fileName, function(err, obj) {
corpus = obj;
buildCorpus();
});
}
2017-05-30 08:23:57 +00:00
function go() {
//let fileName = __dirname + "\\dist\\" + outputFile + ".json";
let fileName = ".\\dist\\" + outputFile + ".json";
console.log(fileName);
let data = jsonfile.readFile(fileName, function(err, obj) {
processFile(obj);
2017-05-31 14:47:53 +00:00
loadCorpus();
2017-05-30 08:23:57 +00:00
});
}
go();