diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 3bb7154..663b50e 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,27 +2,11 @@ - - + + + - - - - - - - - - - - - - - - - - @@ -46,16 +30,109 @@ - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -79,29 +156,11 @@ - - - - - - - - - - - - - - - - - - - - - - - + + + + + @@ -109,46 +168,20 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -164,7 +197,18 @@ logg 6DA9769BA89834AA + es6 + nodejs + node + getstop + stripped + scot + html + processJson + + C:\dev\md\Rinser + + + + + + - - - - - @@ -522,6 +553,7 @@ + 1458043875334 @@ -870,25 +902,24 @@ - - + - - - + + - - + + - + @@ -897,7 +928,7 @@ - + @@ -926,6 +957,32 @@ + + + - - - - - - - - - - - - - - - - @@ -1179,7 +1220,6 @@ - @@ -1188,7 +1228,6 @@ - @@ -1196,7 +1235,6 @@ - @@ -1204,7 +1242,6 @@ - @@ -1212,7 +1249,6 @@ - @@ -1220,18 +1256,63 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1240,76 +1321,93 @@ - + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - - + + + + + + + + + + + + - - + + - + - - - + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + diff --git a/bayes.js b/bayes.js index 261e9c2..2fd3d89 100644 --- a/bayes.js +++ b/bayes.js @@ -10,6 +10,7 @@ let jsonfile = require('jsonfile'); var striptags = require('striptags'); let strippedArray = []; +let corpus = {}; function getNoneStopWords(sentence) { var common = getStopWords(); @@ -58,6 +59,43 @@ function processFile(data) { } +function buildCorpus() { + console.log('buildCorpus'); + console.log(strippedArray.length); + let fileName = ".\\dist\\corpus.json"; + + for(let i of strippedArray) { + //console.log(i); + for(let t of i) { + //console.log(t); + + if (!corpus.hasOwnProperty(t)) { + corpus[t] = {score:0, common:1}; + } else { + corpus[t].common++; + } + } + + } + + jsonfile.writeFile(fileName, corpus,function(err, obj) { + console.error(err); + console.log(obj); + }); + +} + +function loadCorpus() { + let fileName = ".\\dist\\corpus.json"; + console.log(fileName); + + let data = jsonfile.readFile(fileName, function(err, obj) { + corpus = obj; + buildCorpus(); + }); +} + + function go() { //let fileName = __dirname + "\\dist\\" + outputFile + ".json"; @@ -66,6 +104,7 @@ console.log(fileName); let data = jsonfile.readFile(fileName, function(err, obj) { processFile(obj); + loadCorpus(); }); } diff --git a/corpus.json b/corpus.json new file mode 100644 index 0000000..bd19721 --- /dev/null +++ b/corpus.json @@ -0,0 +1,2650 @@ +{ + "3": { + "score": 0, + "common": 2 + }, + "4": { + "score": 0, + "common": 2 + }, + "5": { + "score": 0, + "common": 6 + }, + "6": { + "score": 0, + "common": 16 + }, + "12": { + "score": 0, + "common": 4 + }, + "29": { + "score": 0, + "common": 2 + }, + "50": { + "score": 0, + "common": 2 + }, + "100": { + "score": 0, + "common": 2 + }, + "225": { + "score": 0, + "common": 1 + }, + "250": { + "score": 0, + "common": 4 + }, + "300": { + "score": 0, + "common": 5 + }, + "400": { + "score": 0, + "common": 26 + }, + "425": { + "score": 0, + "common": 4 + }, + "450": { + "score": 0, + "common": 2 + }, + "467": { + "score": 0, + "common": 4 + }, + "485": { + "score": 0, + "common": 3 + }, + "500": { + "score": 0, + "common": 14 + }, + "1105": { + "score": 0, + "common": 2 + }, + "2009": { + "score": 0, + "common": 2 + }, + "2013": { + "score": 0, + "common": 2 + }, + "2014": { + "score": 0, + "common": 2 + }, + "2016": { + "score": 0, + "common": 3 + }, + "3105": { + "score": 0, + "common": 1 + }, + "9800": { + "score": 0, + "common": 1 + }, + "13981": { + "score": 0, + "common": 2 + }, + "21299": { + "score": 0, + "common": 1 + }, + "89825": { + "score": 0, + "common": 2 + }, + "helping": { + "score": 0, + "common": 6 + }, + "recruit": { + "score": 0, + "common": 4 + }, + "cutting": { + "score": 0, + "common": 4 + }, + "edge": { + "score": 0, + "common": 4 + }, + "project": { + "score": 0, + "common": 14 + }, + "require": { + "score": 0, + "common": 5 + }, + "experienced": { + "score": 0, + "common": 10 + }, + "postgresql": { + "score": 1, + "common": 8 + }, + "dba": { + "score": 0, + "common": 26 + }, + "edinburgh": { + "score": 1, + "common": 10 + }, + "team": { + "score": 0, + "common": 20 + }, + "dbas": { + "score": 0, + "common": 4 + }, + "minimum": { + "score": 0, + "common": 5 + }, + "months": { + "score": 0, + "common": 9 + }, + "strong": { + "score": 0, + "common": 27 + }, + "chance": { + "score": 0, + "common": 6 + }, + "extension": { + "score": 0, + "common": 6 + }, + "contract": { + "score": 0, + "common": 23 + }, + "offer": { + "score": 0, + "common": 4 + }, + "daily": { + "score": 0, + "common": 8 + }, + "rate": { + "score": 0, + "common": 43 + }, + "organisation": { + "score": 0, + "common": 5 + }, + "based": { + "score": 0, + "common": 7 + }, + "city": { + "score": 0, + "common": 4 + }, + "centre": { + "score": 0, + "common": 4 + }, + "easily": { + "score": 0, + "common": 4 + }, + "commutable": { + "score": 0, + "common": 4 + }, + "public": { + "score": 0, + "common": 8 + }, + "transport": { + "score": 0, + "common": 4 + }, + "car": { + "score": 0, + "common": 4 + }, + "ideal": { + "score": 0, + "common": 8 + }, + "candidate": { + "score": 0, + "common": 10 + }, + "role": { + "score": 0, + "common": 30 + }, + "experience": { + "score": 0, + "common": 56 + }, + "sql": { + "score": 1, + "common": 51 + }, + "comm": { + "score": 0, + "common": 2 + }, + "day": { + "score": 0, + "common": 27 + }, + "nbsp": { + "score": 0, + "common": 283 + }, + "location": { + "score": 0, + "common": 22 + }, + "commercial": { + "score": 0, + "common": 4 + }, + "involve": { + "score": 0, + "common": 2 + }, + "multiple": { + "score": 0, + "common": 2 + }, + "projects": { + "score": 0, + "common": 7 + }, + "candidates": { + "score": 0, + "common": 4 + }, + "exceptional": { + "score": 0, + "common": 2 + }, + "communication": { + "score": 0, + "common": 9 + }, + "skills": { + "score": 0, + "common": 46 + }, + "client": { + "score": 0, + "common": 21 + }, + "senior": { + "score": 0, + "common": 5 + }, + "level": { + "score": 0, + "common": 3 + }, + "hit": { + "score": 0, + "common": 2 + }, + "running": { + "score": 0, + "common": 2 + }, + "key": { + "score": 0, + "common": 12 + }, + "database": { + "score": 1, + "common": 6 + }, + "administration": { + "score": 0, + "common": 4 + }, + "exposure": { + "score": 0, + "common": 2 + }, + "knowledge": { + "score": 0, + "common": 16 + }, + "etl": { + "score": 0, + "common": 7 + }, + "tools": { + "score": 0, + "common": 18 + }, + "ssis": { + "score": 0, + "common": 4 + }, + "programming": { + "score": 0, + "common": 4 + }, + "ssrs": { + "score": 0, + "common": 4 + }, + "ssas": { + "score": 0, + "common": 4 + }, + "server": { + "score": 0, + "common": 10 + }, + "excellent": { + "score": 0, + "common": 6 + }, + "opportunity": { + "score": 0, + "common": 5 + }, + "involved": { + "score": 0, + "common": 2 + }, + "largest": { + "score": 0, + "common": 2 + }, + "scotland": { + "score": 1, + "common": 5 + }, + "moment": { + "score": 0, + "common": 2 + }, + "prospect": { + "score": 0, + "common": 2 + }, + "term": { + "score": 0, + "common": 8 + }, + "apply": { + "score": 0, + "common": 3 + }, + "andy": { + "score": 0, + "common": 2 + }, + "cathcart": { + "score": 0, + "common": 4 + }, + "associates": { + "score": 0, + "common": 4 + }, + "daytype": { + "score": 0, + "common": 13 + }, + "contractlocation": { + "score": 0, + "common": 19 + }, + "edinburghcountry": { + "score": 0, + "common": 2 + }, + "ukcontact": { + "score": 0, + "common": 16 + }, + "andrew": { + "score": 0, + "common": 2 + }, + "weiradvertiser": { + "score": 0, + "common": 2 + }, + "limitedstart": { + "score": 0, + "common": 2 + }, + "asapreference": { + "score": 0, + "common": 15 + }, + "js": { + "score": 2, + "common": 15 + }, + "vr": { + "score": 0, + "common": 2 + }, + "london": { + "score": 1, + "common": 38 + }, + "data": { + "score": 0, + "common": 49 + }, + "analyst": { + "score": 0, + "common": 15 + }, + "required": { + "score": 0, + "common": 13 + }, + "hands": { + "score": 0, + "common": 4 + }, + "analysis": { + "score": 0, + "common": 7 + }, + "technical": { + "score": 0, + "common": 23 + }, + "sas": { + "score": 0, + "common": 18 + }, + "excel": { + "score": 0, + "common": 8 + }, + "specific": { + "score": 0, + "common": 2 + }, + "internal": { + "score": 0, + "common": 8 + }, + "create": { + "score": 0, + "common": 3 + }, + "timely": { + "score": 0, + "common": 2 + }, + "relevant": { + "score": 0, + "common": 6 + }, + "actionable": { + "score": 0, + "common": 2 + }, + "insights": { + "score": 0, + "common": 2 + }, + "functional": { + "score": 0, + "common": 2 + }, + "interpretation": { + "score": 0, + "common": 2 + }, + "insight": { + "score": 0, + "common": 4 + }, + "solution": { + "score": 0, + "common": 3 + }, + "design": { + "score": 0, + "common": 17 + }, + "development": { + "score": 0, + "common": 16 + }, + "statistical": { + "score": 0, + "common": 4 + }, + "modeling": { + "score": 0, + "common": 2 + }, + "quality": { + "score": 0, + "common": 7 + }, + "assurance": { + "score": 0, + "common": 2 + }, + "testing": { + "score": 0, + "common": 6 + }, + "visualisation": { + "score": 0, + "common": 2 + }, + "understanding": { + "score": 0, + "common": 9 + }, + "secondary": { + "score": 0, + "common": 2 + }, + "market": { + "score": 0, + "common": 6 + }, + "desk": { + "score": 0, + "common": 3 + }, + "story": { + "score": 0, + "common": 2 + }, + "telling": { + "score": 0, + "common": 2 + }, + "solving": { + "score": 0, + "common": 2 + }, + "quantitative": { + "score": 0, + "common": 2 + }, + "qualitative": { + "score": 0, + "common": 2 + }, + "techniques": { + "score": 0, + "common": 4 + }, + "successful": { + "score": 0, + "common": 4 + }, + "ideally": { + "score": 0, + "common": 6 + }, + "degree": { + "score": 0, + "common": 2 + }, + "subject": { + "score": 0, + "common": 6 + }, + "contact": { + "score": 0, + "common": 6 + }, + "chris": { + "score": 0, + "common": 6 + }, + "bumpstead": { + "score": 0, + "common": 4 + }, + "square": { + "score": 0, + "common": 4 + }, + "resources": { + "score": 0, + "common": 2 + }, + "londoncountry": { + "score": 0, + "common": 11 + }, + "bumpsteadadvertiser": { + "score": 0, + "common": 2 + }, + "resourcesemail": { + "score": 0, + "common": 2 + }, + "c994e": { + "score": 0, + "common": 2 + }, + "63c0a": { + "score": 0, + "common": 2 + }, + "apps": { + "score": 0, + "common": 12 + }, + "jobserve": { + "score": 0, + "common": 12 + }, + "comstart": { + "score": 0, + "common": 10 + }, + "05": { + "score": 0, + "common": 2 + }, + "2017reference": { + "score": 0, + "common": 2 + }, + "jsdataanalyst": { + "score": 0, + "common": 2 + }, + "highly": { + "score": 0, + "common": 12 + }, + "competitve": { + "score": 0, + "common": 4 + }, + "euro": { + "score": 0, + "common": 4 + }, + "sterling": { + "score": 0, + "common": 2 + }, + "frankfurt": { + "score": 0, + "common": 6 + }, + "germany": { + "score": 0, + "common": 2 + }, + "appdynamics": { + "score": -5, + "common": 5 + }, + "monitoring": { + "score": 0, + "common": 12 + }, + "performance": { + "score": 0, + "common": 8 + }, + "management": { + "score": 0, + "common": 13 + }, + "analytical": { + "score": 0, + "common": 2 + }, + "unix": { + "score": 0, + "common": 4 + }, + "linux": { + "score": 0, + "common": 4 + }, + "perl": { + "score": 0, + "common": 4 + }, + "scripts": { + "score": 0, + "common": 4 + }, + "queries": { + "score": 0, + "common": 4 + }, + "oracle": { + "score": 0, + "common": 4 + }, + "sybase": { + "score": 0, + "common": 4 + }, + "java": { + "score": 0, + "common": 6 + }, + "jvm": { + "score": 0, + "common": 4 + }, + "parameters": { + "score": 0, + "common": 4 + }, + "application": { + "score": 0, + "common": 22 + }, + "support": { + "score": 0, + "common": 12 + }, + "training": { + "score": 0, + "common": 6 + }, + "mentoring": { + "score": 0, + "common": 4 + }, + "installation": { + "score": 0, + "common": 4 + }, + "deployment": { + "score": 0, + "common": 4 + }, + "lifecycle": { + "score": 0, + "common": 2 + }, + "documentation": { + "score": 0, + "common": 4 + }, + "mansion": { + "score": 0, + "common": 4 + }, + "house": { + "score": 0, + "common": 4 + }, + "leading": { + "score": 0, + "common": 9 + }, + "international": { + "score": 0, + "common": 5 + }, + "business": { + "score": 0, + "common": 26 + }, + "technology": { + "score": 0, + "common": 3 + }, + "consultancy": { + "score": 0, + "common": 3 + }, + "focused": { + "score": 0, + "common": 2 + }, + "exclusively": { + "score": 0, + "common": 2 + }, + "financial": { + "score": 1, + "common": 2 + }, + "services": { + "score": 0, + "common": 12 + }, + "sector": { + "score": 0, + "common": 8 + }, + "practical": { + "score": 0, + "common": 2 + }, + "robust": { + "score": 0, + "common": 2 + }, + "solutions": { + "score": 0, + "common": 10 + }, + "industry": { + "score": 0, + "common": 2 + }, + "teams": { + "score": 0, + "common": 14 + }, + "qualified": { + "score": 0, + "common": 2 + }, + "consultants": { + "score": 0, + "common": 2 + }, + "matter": { + "score": 0, + "common": 4 + }, + "experts": { + "score": 0, + "common": 2 + }, + "specialising": { + "score": 0, + "common": 3 + }, + "change": { + "score": 0, + "common": 2 + }, + "mhc": { + "score": 0, + "common": 4 + }, + "delivers": { + "score": 0, + "common": 2 + }, + "base": { + "score": 0, + "common": 2 + }, + "tier": { + "score": 0, + "common": 2 + }, + "clients": { + "score": 0, + "common": 5 + }, + "europe": { + "score": 0, + "common": 5 + }, + "usa": { + "score": 0, + "common": 2 + }, + "asia": { + "score": 0, + "common": 2 + }, + "pac": { + "score": 0, + "common": 2 + }, + "including": { + "score": 0, + "common": 3 + }, + "names": { + "score": 0, + "common": 2 + }, + "banking": { + "score": 0, + "common": 3 + }, + "arena": { + "score": 0, + "common": 2 + }, + "established": { + "score": 0, + "common": 2 + }, + "headquartered": { + "score": 0, + "common": 2 + }, + "global": { + "score": 0, + "common": 3 + }, + "presence": { + "score": 0, + "common": 4 + }, + "offices": { + "score": 0, + "common": 2 + }, + "singapore": { + "score": 0, + "common": 2 + }, + "york": { + "score": 0, + "common": 2 + }, + "growing": { + "score": 0, + "common": 4 + }, + "india": { + "score": 0, + "common": 2 + }, + "firm": { + "score": 0, + "common": 4 + }, + "included": { + "score": 0, + "common": 2 + }, + "sunday": { + "score": 0, + "common": 2 + }, + "times": { + "score": 0, + "common": 2 + }, + "hiscox": { + "score": 0, + "common": 2 + }, + "tech": { + "score": 0, + "common": 2 + }, + "track": { + "score": 0, + "common": 2 + }, + "listing": { + "score": 0, + "common": 2 + }, + "meaning": { + "score": 0, + "common": 2 + }, + "fastest": { + "score": 0, + "common": 2 + }, + "consulting": { + "score": 0, + "common": 2 + }, + "firms": { + "score": 0, + "common": 2 + }, + "urgently": { + "score": 0, + "common": 2 + }, + "seek": { + "score": 0, + "common": 2 + }, + "hire": { + "score": 0, + "common": 3 + }, + "basis": { + "score": 0, + "common": 3 + }, + "enthusiastic": { + "score": 0, + "common": 2 + }, + "driven": { + "score": 0, + "common": 2 + }, + "consultant": { + "score": 0, + "common": 7 + }, + "engineer": { + "score": 0, + "common": 4 + }, + "central": { + "score": 0, + "common": 10 + }, + "provide": { + "score": 0, + "common": 6 + }, + "globe": { + "score": 0, + "common": 2 + }, + "platform": { + "score": 0, + "common": 4 + }, + "engineering": { + "score": 0, + "common": 4 + }, + "designing": { + "score": 0, + "common": 3 + }, + "implementing": { + "score": 0, + "common": 3 + }, + "wide": { + "score": 0, + "common": 2 + }, + "deployments": { + "score": 0, + "common": 2 + }, + "strategic": { + "score": 0, + "common": 2 + }, + "infrastructure": { + "score": 0, + "common": 2 + }, + "applications": { + "score": 0, + "common": 6 + }, + "expertise": { + "score": 0, + "common": 2 + }, + "target": { + "score": 0, + "common": 2 + }, + "improve": { + "score": 0, + "common": 4 + }, + "efficiency": { + "score": 0, + "common": 2 + }, + "stages": { + "score": 0, + "common": 2 + }, + "configure": { + "score": 0, + "common": 2 + }, + "alerting": { + "score": 0, + "common": 2 + }, + "production": { + "score": 0, + "common": 9 + }, + "additionally": { + "score": 0, + "common": 2 + }, + "responsible": { + "score": 0, + "common": 3 + }, + "software": { + "score": 0, + "common": 6 + }, + "maintenance": { + "score": 0, + "common": 2 + }, + "activities": { + "score": 0, + "common": 2 + }, + "nature": { + "score": 0, + "common": 2 + }, + "sme": { + "score": 0, + "common": 2 + }, + "offers": { + "score": 0, + "common": 2 + }, + "facing": { + "score": 0, + "common": 5 + }, + "happy": { + "score": 0, + "common": 2 + }, + "analytics": { + "score": 0, + "common": 7 + }, + "distinct": { + "score": 0, + "common": 2 + }, + "advantage": { + "score": 0, + "common": 2 + }, + "junior": { + "score": 0, + "common": 2 + }, + "proven": { + "score": 0, + "common": 4 + }, + "background": { + "score": 0, + "common": 14 + }, + "scripting": { + "score": 0, + "common": 2 + }, + "j2ee": { + "score": 0, + "common": 2 + }, + "memory": { + "score": 0, + "common": 2 + }, + "life": { + "score": 0, + "common": 2 + }, + "cycles": { + "score": 0, + "common": 2 + }, + "ability": { + "score": 0, + "common": 4 + }, + "communicate": { + "score": 0, + "common": 6 + }, + "people": { + "score": 0, + "common": 2 + }, + "levels": { + "score": 0, + "common": 2 + }, + "influence": { + "score": 0, + "common": 2 + }, + "progress": { + "score": 0, + "common": 2 + }, + "tasks": { + "score": 0, + "common": 2 + }, + "delivering": { + "score": 0, + "common": 2 + }, + "presentations": { + "score": 0, + "common": 2 + }, + "courses": { + "score": 0, + "common": 2 + }, + "writing": { + "score": 0, + "common": 2 + }, + "sterlingtype": { + "score": 0, + "common": 2 + }, + "germanycountry": { + "score": 0, + "common": 2 + }, + "germanycontact": { + "score": 0, + "common": 2 + }, + "beverley": { + "score": 0, + "common": 4 + }, + "thomasadvertiser": { + "score": 0, + "common": 2 + }, + "consultingemail": { + "score": 0, + "common": 2 + }, + "thomas": { + "score": 0, + "common": 2 + }, + "e3bc7": { + "score": 0, + "common": 2 + }, + "23e58": { + "score": 0, + "common": 2 + }, + "local": { + "score": 0, + "common": 9 + }, + "authority": { + "score": 0, + "common": 8 + }, + "manage": { + "score": 0, + "common": 6 + }, + "product": { + "score": 0, + "common": 4 + }, + "troubled": { + "score": 0, + "common": 4 + }, + "families": { + "score": 0, + "common": 4 + }, + "program": { + "score": 0, + "common": 4 + }, + "solid": { + "score": 0, + "common": 8 + }, + "social": { + "score": 0, + "common": 4 + }, + "care": { + "score": 0, + "common": 4 + }, + "master": { + "score": 0, + "common": 4 + }, + "month": { + "score": 0, + "common": 7 + }, + "scientist": { + "score": 0, + "common": 6 + }, + "advanced": { + "score": 0, + "common": 7 + }, + "machine": { + "score": 0, + "common": 4 + }, + "learning": { + "score": 0, + "common": 4 + }, + "mining": { + "score": 0, + "common": 4 + }, + "develop": { + "score": 0, + "common": 4 + }, + "requirements": { + "score": 0, + "common": 9 + }, + "expert": { + "score": 0, + "common": 4 + }, + "pyhton": { + "score": 0, + "common": 2 + }, + "ownership": { + "score": 0, + "common": 2 + }, + "investigating": { + "score": 0, + "common": 2 + }, + "aqcuiring": { + "score": 0, + "common": 2 + }, + "creating": { + "score": 0, + "common": 2 + }, + "pipelines": { + "score": 0, + "common": 2 + }, + "deploying": { + "score": 0, + "common": 2 + }, + "models": { + "score": 0, + "common": 2 + }, + "quantitive": { + "score": 0, + "common": 2 + }, + "insurance": { + "score": 0, + "common": 2 + }, + "advantageous": { + "score": 0, + "common": 4 + }, + "initial": { + "score": 0, + "common": 3 + }, + "extensions": { + "score": 0, + "common": 2 + }, + "paying": { + "score": 0, + "common": 2 + }, + "competitive": { + "score": 0, + "common": 2 + }, + "rates": { + "score": 0, + "common": 2 + }, + "type": { + "score": 0, + "common": 3 + }, + "charlie": { + "score": 0, + "common": 2 + }, + "daveyadvertiser": { + "score": 0, + "common": 2 + }, + "austin": { + "score": 0, + "common": 2 + }, + "fraser": { + "score": 0, + "common": 2 + }, + "ltdstart": { + "score": 0, + "common": 2 + }, + "ds18": { + "score": 0, + "common": 2 + }, + "reporting": { + "score": 0, + "common": 8 + }, + "vba": { + "score": -1, + "common": 4 + }, + "requires": { + "score": 0, + "common": 2 + }, + "supervise": { + "score": 0, + "common": 2 + }, + "analysts": { + "score": 0, + "common": 2 + }, + "conduct": { + "score": 0, + "common": 2 + }, + "updates": { + "score": 0, + "common": 2 + }, + "distribution": { + "score": 0, + "common": 2 + }, + "regular": { + "score": 0, + "common": 3 + }, + "reports": { + "score": 0, + "common": 8 + }, + "enterprise": { + "score": 0, + "common": 7 + }, + "guide": { + "score": 0, + "common": 2 + }, + "objects": { + "score": 0, + "common": 4 + }, + "optimisation": { + "score": 0, + "common": 2 + }, + "generate": { + "score": 0, + "common": 2 + }, + "dashboard": { + "score": 0, + "common": 2 + }, + "ensure": { + "score": 0, + "common": 2 + }, + "ease": { + "score": 0, + "common": 2 + }, + "consistency": { + "score": 0, + "common": 2 + }, + "approach": { + "score": 0, + "common": 2 + }, + "definite": { + "score": 0, + "common": 2 + }, + "build": { + "score": 0, + "common": 5 + }, + "db2": { + "score": 0, + "common": 2 + }, + "extraction": { + "score": 0, + "common": 3 + }, + "coding": { + "score": 0, + "common": 2 + }, + "efficiencies": { + "score": 0, + "common": 2 + }, + "submit": { + "score": 0, + "common": 2 + }, + "james": { + "score": 0, + "common": 5 + }, + "grahamadvertiser": { + "score": 0, + "common": 2 + }, + "trilogy": { + "score": 0, + "common": 2 + }, + "internationalstart": { + "score": 0, + "common": 2 + }, + "jg": { + "score": 0, + "common": 2 + }, + "sassqlrepb": { + "score": 0, + "common": 2 + }, + "glasgow": { + "score": 2, + "common": 7 + }, + "fantastic": { + "score": 0, + "common": 2 + }, + "exciting": { + "score": 0, + "common": 3 + }, + "profile": { + "score": 0, + "common": 5 + }, + "ux": { + "score": 0, + "common": 13 + }, + "designer": { + "score": 0, + "common": 7 + }, + "joining": { + "score": 0, + "common": 3 + }, + "existing": { + "score": 0, + "common": 6 + }, + "skilled": { + "score": 0, + "common": 3 + }, + "designers": { + "score": 0, + "common": 6 + }, + "core": { + "score": 0, + "common": 3 + }, + "sketching": { + "score": 0, + "common": 3 + }, + "hand": { + "score": 0, + "common": 3 + }, + "coded": { + "score": 0, + "common": 3 + }, + "css": { + "score": 0, + "common": 3 + }, + "rapid": { + "score": 0, + "common": 3 + }, + "prototyping": { + "score": 0, + "common": 3 + }, + "mobile": { + "score": 0, + "common": 3 + }, + "interface": { + "score": 0, + "common": 3 + }, + "agile": { + "score": 0, + "common": 6 + }, + "position": { + "score": 0, + "common": 3 + }, + "delivery": { + "score": 0, + "common": 4 + }, + "produce": { + "score": 0, + "common": 5 + }, + "implement": { + "score": 0, + "common": 5 + }, + "concepts": { + "score": 0, + "common": 3 + }, + "digital": { + "score": 0, + "common": 3 + }, + "scotlandcountry": { + "score": 0, + "common": 3 + }, + "david": { + "score": 0, + "common": 4 + }, + "gillespieadvertiser": { + "score": 0, + "common": 2 + }, + "bridge": { + "score": 0, + "common": 3 + }, + "ltdemail": { + "score": 0, + "common": 6 + }, + "gillespie": { + "score": 0, + "common": 2 + }, + "2fd70": { + "score": 0, + "common": 2 + }, + "bca98": { + "score": 0, + "common": 2 + }, + "jsdgux": { + "score": 0, + "common": 2 + }, + "time": { + "score": 0, + "common": 2 + }, + "check": { + "score": 0, + "common": 2 + }, + "hearing": { + "score": 0, + "common": 2 + }, + "opportunities": { + "score": 0, + "common": 5 + }, + "couple": { + "score": 0, + "common": 2 + }, + "datawarehouse": { + "score": 0, + "common": 7 + }, + "architect": { + "score": 0, + "common": 7 + }, + "accounting": { + "score": 0, + "common": 2 + }, + "details": { + "score": 0, + "common": 6 + }, + "start": { + "score": 0, + "common": 2 + }, + "asap": { + "score": 0, + "common": 3 + }, + "duration": { + "score": 0, + "common": 3 + }, + "renewable": { + "score": 0, + "common": 2 + }, + "salary": { + "score": 0, + "common": 2 + }, + "power": { + "score": 0, + "common": 2 + }, + "proceed": { + "score": 0, + "common": 2 + }, + "send": { + "score": 0, + "common": 3 + }, + "email": { + "score": 0, + "common": 3 + }, + "share": { + "score": 0, + "common": 2 + }, + "quick": { + "score": 0, + "common": 2 + }, + "chat": { + "score": 0, + "common": 2 + }, + "mary": { + "score": 0, + "common": 6 + }, + "priscilina": { + "score": 0, + "common": 4 + }, + "accion": { + "score": 0, + "common": 4 + }, + "labs": { + "score": 0, + "common": 4 + }, + "limited": { + "score": 0, + "common": 2 + }, + "priscilinaadvertiser": { + "score": 0, + "common": 2 + }, + "06dd5": { + "score": 0, + "common": 2 + }, + "_": { + "score": 0, + "common": 2 + }, + "dw": { + "score": 0, + "common": 2 + }, + "html": { + "score": 1, + "common": 1 + }, + "html5": { + "score": 1, + "common": 1 + }, + "node": { + "score": 1, + "common": 3 + }, + "es6": { + "score": 1, + "common": 1 + }, + "nodejs": { + "score": 1, + "common": 1 + }, + "node.js": { + "score": 0, + "common": 1 + }, + "emea": { + "score": 0, + "common": 2 + }, + "company": { + "score": 0, + "common": 1 + }, + "seeking": { + "score": 0, + "common": 1 + }, + "travel": { + "score": 0, + "common": 4 + }, + "split": { + "score": 0, + "common": 1 + }, + "facilitate": { + "score": 0, + "common": 1 + }, + "personal": { + "score": 0, + "common": 1 + }, + "circumstances": { + "score": 0, + "common": 1 + }, + "visa": { + "score": 0, + "common": 1 + }, + "passport": { + "score": 0, + "common": 1 + }, + "considered": { + "score": 0, + "common": 1 + }, + "l2": { + "score": 0, + "common": 1 + }, + "l3": { + "score": 0, + "common": 1 + }, + "bespoke": { + "score": 0, + "common": 1 + }, + "technologies": { + "score": 0, + "common": 1 + }, + "main": { + "score": 0, + "common": 1 + }, + "responsibilities": { + "score": 0, + "common": 1 + }, + "perform": { + "score": 0, + "common": 3 + }, + "manual": { + "score": 0, + "common": 1 + }, + "lab": { + "score": 0, + "common": 1 + }, + "desktop": { + "score": 0, + "common": 1 + }, + "load": { + "score": 0, + "common": 1 + }, + "configured": { + "score": 0, + "common": 1 + }, + "xml": { + "score": 0, + "common": 4 + }, + "files": { + "score": 0, + "common": 1 + }, + "javascript": { + "score": 5, + "common": 3 + }, + "desired": { + "score": 0, + "common": 1 + }, + "presentation": { + "score": 0, + "common": 2 + }, + "documents": { + "score": 0, + "common": 1 + }, + "feature": { + "score": 0, + "common": 1 + }, + "enabling": { + "score": 0, + "common": 1 + }, + "guides": { + "score": 0, + "common": 1 + }, + "forge": { + "score": 0, + "common": 1 + }, + "maintain": { + "score": 0, + "common": 1 + }, + "professional": { + "score": 0, + "common": 1 + }, + "relationships": { + "score": 0, + "common": 2 + }, + "customer": { + "score": 0, + "common": 1 + }, + "organisations": { + "score": 0, + "common": 1 + }, + "departments": { + "score": 0, + "common": 2 + }, + "document": { + "score": 0, + "common": 1 + }, + "procedures": { + "score": 0, + "common": 1 + }, + "recommendations": { + "score": 0, + "common": 1 + }, + "markets": { + "score": 0, + "common": 1 + }, + "skill": { + "score": 0, + "common": 1 + }, + "desirable": { + "score": 0, + "common": 2 + }, + "interpersonal": { + "score": 0, + "common": 1 + }, + "fluency": { + "score": 0, + "common": 1 + }, + "english": { + "score": 0, + "common": 1 + }, + "spoken": { + "score": 0, + "common": 1 + }, + "written": { + "score": 0, + "common": 1 + }, + "reading": { + "score": 0, + "common": 1 + }, + "comprehension": { + "score": 0, + "common": 1 + }, + "previous": { + "score": 0, + "common": 2 + }, + "service": { + "score": 0, + "common": 2 + }, + "jira": { + "score": 0, + "common": 1 + }, + "expenses": { + "score": 0, + "common": 1 + }, + "meal": { + "score": 0, + "common": 1 + }, + "allowance": { + "score": 0, + "common": 1 + }, + "discuss": { + "score": 0, + "common": 2 + }, + "turley": { + "score": 0, + "common": 2 + }, + "recruitment": { + "score": 0, + "common": 2 + }, + "relatedtype": { + "score": 0, + "common": 1 + }, + "englandcontact": { + "score": 0, + "common": 1 + }, + "turleyadvertiser": { + "score": 0, + "common": 1 + }, + "concept": { + "score": 0, + "common": 1 + }, + "resourcingemail": { + "score": 0, + "common": 1 + }, + "1dc3d": { + "score": 0, + "common": 1 + }, + "25d2d": { + "score": 0, + "common": 1 + }, + "comreference": { + "score": 0, + "common": 2 + }, + "js21895": { + "score": 0, + "common": 1 + }, + "tableau": { + "score": -5, + "common": 12 + }, + "nigel": { + "score": 0, + "common": 3 + }, + "frank": { + "score": 0, + "common": 3 + }, + "recruiting": { + "score": 0, + "common": 1 + }, + "behalf": { + "score": 0, + "common": 1 + }, + "implementations": { + "score": 0, + "common": 1 + }, + "perspective": { + "score": 0, + "common": 1 + }, + "essential": { + "score": 0, + "common": 1 + }, + "translate": { + "score": 0, + "common": 2 + }, + "audience": { + "score": 0, + "common": 2 + }, + "dashboards": { + "score": 0, + "common": 2 + }, + "branded": { + "score": 0, + "common": 2 + }, + "suite": { + "score": 0, + "common": 2 + }, + "entire": { + "score": 0, + "common": 2 + }, + "alteryx": { + "score": -5, + "common": 4 + }, + "providing": { + "score": 0, + "common": 3 + }, + "guidance": { + "score": 0, + "common": 2 + }, + "structure": { + "score": 0, + "common": 1 + }, + "engage": { + "score": 0, + "common": 1 + }, + "users": { + "score": 0, + "common": 1 + }, + "stakeholder": { + "score": 0, + "common": 2 + }, + "gathering": { + "score": 0, + "common": 2 + }, + "implementation": { + "score": 0, + "common": 5 + }, + "callum": { + "score": 0, + "common": 3 + }, + "runnegar": { + "score": 0, + "common": 3 + }, + "mundy": { + "score": 0, + "common": 1 + }, + "alternatively": { + "score": 0, + "common": 1 + }, + "leader": { + "score": 0, + "common": 1 + }, + "solely": { + "score": 0, + "common": 1 + }, + "placing": { + "score": 0, + "common": 1 + }, + "built": { + "score": 0, + "common": 1 + }, + "employers": { + "score": 0, + "common": 1 + }, + "unrivaled": { + "score": 0, + "common": 1 + }, + "jobs": { + "score": 0, + "common": 1 + }, + "mundyadvertiser": { + "score": 0, + "common": 1 + }, + "internationalemail": { + "score": 0, + "common": 1 + }, + "3a2fa": { + "score": 0, + "common": 1 + }, + "40ea2": { + "score": 0, + "common": 1 + }, + "class": { + "score": 0, + "common": 1 + }, + "manipulation": { + "score": 0, + "common": 1 + }, + "remediation": { + "score": 0, + "common": 1 + }, + "prestigious": { + "score": 0, + "common": 1 + }, + "extend": { + "score": 0, + "common": 1 + }, + "retail": { + "score": 0, + "common": 1 + }, + "thrive": { + "score": 0, + "common": 1 + }, + "environment": { + "score": 0, + "common": 1 + }, + "tasked": { + "score": 0, + "common": 1 + }, + "analysing": { + "score": 0, + "common": 2 + }, + "complex": { + "score": 0, + "common": 1 + }, + "datasets": { + "score": 0, + "common": 1 + }, + "determine": { + "score": 0, + "common": 1 + }, + "accurate": { + "score": 0, + "common": 1 + }, + "populations": { + "score": 0, + "common": 1 + }, + "understand": { + "score": 0, + "common": 2 + }, + "clarify": { + "score": 0, + "common": 1 + }, + "birmingham": { + "score": 0, + "common": 1 + }, + "sab": { + "score": 0, + "common": 2 + }, + "choudhury": { + "score": 0, + "common": 1 + }, + "implemented": { + "score": 0, + "common": 1 + }, + "complexities": { + "score": 0, + "common": 1 + }, + "technically": { + "score": 0, + "common": 2 + }, + "liaise": { + "score": 0, + "common": 1 + }, + "deliver": { + "score": 0, + "common": 1 + }, + "wider": { + "score": 0, + "common": 1 + }, + "scale": { + "score": 0, + "common": 1 + }, + "plan": { + "score": 0, + "common": 1 + }, + "objective": { + "score": 0, + "common": 1 + }, + "improvement": { + "score": 0, + "common": 1 + }, + "programme": { + "score": 0, + "common": 1 + }, + "structures": { + "score": 0, + "common": 2 + }, + "capability": { + "score": 0, + "common": 1 + }, + "choudhuryadvertiser": { + "score": 0, + "common": 1 + }, + "networkersstart": { + "score": 0, + "common": 1 + }, + "emp391496": { + "score": 0, + "common": 1 + }, + "united": { + "score": 0, + "common": 2 + }, + "kingdom": { + "score": 0, + "common": 1 + }, + "intelligence": { + "score": 0, + "common": 1 + }, + "hadoop": { + "score": 0, + "common": 3 + }, + "aws": { + "score": 1, + "common": 2 + }, + "description": { + "score": 0, + "common": 1 + }, + "centers": { + "score": 0, + "common": 1 + }, + "oversight": { + "score": 0, + "common": 1 + }, + "creation": { + "score": 0, + "common": 1 + }, + "bigdata": { + "score": 0, + "common": 1 + }, + "cloud": { + "score": 0, + "common": 2 + }, + "prior": { + "score": 0, + "common": 1 + }, + "evaluation": { + "score": 0, + "common": 1 + }, + "selection": { + "score": 0, + "common": 1 + }, + "framework": { + "score": 0, + "common": 1 + }, + "architecture": { + "score": 0, + "common": 2 + }, + "proposed": { + "score": 0, + "common": 1 + }, + "transition": { + "score": 0, + "common": 1 + }, + "operational": { + "score": 0, + "common": 1 + }, + "include": { + "score": 0, + "common": 1 + }, + "covering": { + "score": 0, + "common": 1 + }, + "flows": { + "score": 0, + "common": 1 + }, + "nosql": { + "score": 1, + "common": 1 + }, + "apis": { + "score": 1, + "common": 1 + }, + "json": { + "score": 1, + "common": 1 + }, + "rdbms": { + "score": 0, + "common": 1 + }, + "sources": { + "score": 0, + "common": 1 + }, + "sap": { + "score": 0, + "common": 1 + }, + "generic": { + "score": 0, + "common": 1 + }, + "extractors": { + "score": 0, + "common": 1 + }, + "function": { + "score": 0, + "common": 1 + }, + "modules": { + "score": 0, + "common": 1 + }, + "kingdomcountry": { + "score": 0, + "common": 1 + }, + "ravindraadvertiser": { + "score": 0, + "common": 1 + }, + "veda": { + "score": 0, + "common": 1 + }, + "ravindra": { + "score": 0, + "common": 1 + }, + "71ea7": { + "score": 0, + "common": 1 + }, + "blue": { + "score": 0, + "common": 1 + }, + "chip": { + "score": 0, + "common": 1 + }, + "rosie": { + "score": 0, + "common": 2 + }, + "pickersgilladvertiser": { + "score": 0, + "common": 1 + }, + "pickersgill": { + "score": 0, + "common": 1 + }, + "31fc9": { + "score": 0, + "common": 1 + }, + "be575": { + "score": 0, + "common": 1 + }, + "jsdguxc": { + "score": 0, + "common": 1 + } +} diff --git a/jobs-corpus.js b/jobs-corpus.js new file mode 100644 index 0000000..01c62a3 --- /dev/null +++ b/jobs-corpus.js @@ -0,0 +1,351 @@ +var outputFile = 'jobs-special', RssBraider = require('rss-braider'), fs = require( + 'fs'), ejs = require('ejs'), read = require('fs').readFileSync, join = require( + 'path').join, str = read(join(__dirname, '/templates/rss.ejs'), + 'utf8'), feeds = {}; + +var log4js = require('log4js'); +var logger = log4js.getLogger(); +const stopwords = require('stopwords-en'); +var striptags = require('striptags'); +let jsonfile = require('jsonfile'); +let strippedArray = []; +let corpus = {}; + +// Pull feeds from config files: +// feeds.simple_test_feed = require("./config/feed").feed; +// Or define in-line +feeds.simple_test_feed = { + "feed_name": "feed", "default_count": 1, "no_cdata_fields": [], // Don't wrap these fields in CDATA tags + "plugins": [ + 'filter_location', 'filter_reject', 'filter_md_jobs', 'filter_today_only' + /*'filter_location', 'filter_today_only'*/ + ], "meta": { + "title": "Jobs", + "description": "Combined Jobs Feed", + 'site_url': 'http://pipes.silvrtree.co.uk/jobs-special.xml' + }, "sources": [ + + { + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss", + "page": "https://www.jobserve.com/gb/en/JobSearch.aspx?shid=A7ACEE7915E274717C" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=47820652" + }, + + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/6DA9769BA89834AA.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/EDF47BEA6B31EF.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/3CAD044BEF2BFA.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/C7B25D86D0844A.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.JobServe.com/MySearch/64A3EEF615FA4C.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564698" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21564712" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=21942123" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33166238" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888173" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/m7dp711z2r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/pfvf7o7z2r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/lluqnt8z2r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/tu33qt8z2r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/u3btnz8z2r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33256062" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=33450169" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34517029" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.purelyit.co.uk/cgi-bin/advsearch?rss_feed=1&daysback=1&jbe_id=34888105" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.technojobs.co.uk/rss.php/glasgow/searchtypeand/locationScotland/sortbyrelevant/jobtypeall" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=London&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CO_LONDON&compare_search=London&search_emp_mkt_cd=ALL" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Glasgow&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=TO_G1_GLASGOW&compare_search=Glasgow&search_emp_mkt_cd=ALL" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&search_emp_mkt_cd=ALL" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html%20,%20asp%20,%20web%20,%20sql%20,%20delphi%20,%20vb%20,%20vbscript%20,%20php%20,%20ajax%20,%20mysql%20,%20sqlserver%20,%20javascript%20,%20intranet%20,%20vmware%20,%20virtulization&location_include=Germany&location_within=10&reqd_salary=ANY|&daysback=7&scc=UK&compare_resolved=CY_GERMANY&compare_search=Germany&search_emp_mkt_cd=ALL" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/b1d7e6c3a9a11964z3r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.jobserve.com/MySearch/CA49421A86CA3F74.rss" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/ddeded091b6f6d33z3r.xml" + }, + { + /* "name" : "JobServe",*/ + "count": 100, + "feed_url": "http://www.s1jobs.com/xml/3eafc1ea20f1ca02z3r.xml" + } + + ] +}; + +var braider_options = { + feeds: feeds, + indent: " ", + date_sort_order: "desc", // Newest first + log_level: "warn", + dedupe_fields: ['link', 'guid'], + plugins_directories: [__dirname + "/plugins/"] +}; +console.log('Working..'); +var rss_braider = RssBraider.createClient(braider_options); + +// Override logging level (debug, info, warn, err, off) +rss_braider.logger.level('error'); + +rss_braider.processFeed('simple_test_feed', 'json', function(err, data) { + if (err) { + return console.log(err); + } + + var j = JSON.parse(data); + + j = processJson(j); + /*for (let mm in j) { + console.log(mm); + }*/ + var ejsOutput = ejs.compile(str)(j); + + fs.writeFile(__dirname + "/dist/" + outputFile + ".html", + ejsOutput, + function(err) { + + if (err) { + return console.log(err); + } + + console.log("The file was saved!"); + }); + + fs.writeFile(__dirname + "/dist/" + outputFile + ".json", + data, + function(err) { + if (err) { + logger.error(err); + return console.log(err); + } + + console.log("The file was saved!"); + }); +}); + +function scoreEntry(s) { + let score = 0; + let used = []; + for(let t of s) { + //console.log(t); + + if (!corpus.hasOwnProperty(t)) { + corpus[t] = {score:0, common:1}; + } else { + corpus[t].common++; + if (used.indexOf(t) === -1) { + score = score + corpus[t].score; + used.push(t); + } + + } + } + + return score; +} +function processJson(j) { + console.log(j); + + for (let item of j.items) { + console.log(item.title); + let description = striptags(item.description); + let stripped = getNoneStopWords(description); + //console.log(stripped); + let score = scoreEntry(stripped); + console.log(`Score ${score}`); + item.description = `
Score ${score}
` + item.description; + } + + return j; +} +function getNoneStopWords(sentence) { + let common = getStopWords(); + let wordArr = sentence.match(/\w+/g), + commonObj = {}, + uncommonArr = [], + word, i; + + for (i = 0; i < common.length; i++) { + commonObj[ common[i].trim() ] = true; + } + + for (i = 0; i < wordArr.length; i++) { + word = wordArr[i].trim().toLowerCase(); + if (!commonObj[word]) { + uncommonArr.push(word); + } + } + return uncommonArr; + } + + function getStopWords() { + return stopwords; + } + + + + + function doFeeds() { + rss_braider.processFeed('simple_test_feed', 'rss', function(err, data) { + if (err) { + return console.log(err); + } + + + + console.log('Saving', __dirname + "/dist/" + outputFile + ".xml"); + + fs.writeFile(__dirname + "/dist/" + outputFile + ".xml", data, function(err) { + if (err) { + return console.log(err); + } + + console.log("The file was saved!"); + }); + }); + + } + + +function loadCorpus() { + let fileName = ".\\dist\\corpus.json"; + console.log(fileName); + + jsonfile.readFile(fileName, function(err, obj) { + corpus = obj; + doFeeds(); + }); +} + +function saveCorpus() { + let fileName = ".\\dist\\corpus.json"; + + jsonfile.writeFile(fileName, corpus,function(err, obj) { + console.error(err); + console.log(obj); + }); +} + + +function go() { + loadCorpus(); +} + +go(); diff --git a/jobs-special.js b/jobs-special.js index e940b6c..4aad8ad 100644 --- a/jobs-special.js +++ b/jobs-special.js @@ -137,11 +137,6 @@ feeds.simple_test_feed = { "count": 100, "feed_url": "http://www.jobserve.com/MySearch/6FC7E9ED5F042ECB.rss" }, - { - /* "name" : "JobServe",*/ - "count": 100, - "feed_url": "http://www.jobsite.co.uk/cgi-bin/advsearch?rss_feed=1&skill_atleast=html,%20asp,%20web,%20sql,%20delphi,%20vb,%20vbscript,%20php,%20ajax,%20mysql,%20sqlserver,%20javascript,%20intranet,%20vmware,%20virtulization&location_include=Abu%20Dhabi&compare_resolved=RE_ABUDHABI_UNITEDARABEMIRATES&compare_search=Abu%20Dhabi&jobtype=X&search_emp_mkt_cd=ALL" - }, { /* "name" : "JobServe",*/ "count": 100, @@ -237,8 +232,8 @@ rss_braider.processFeed('simple_test_feed', 'json', function(err, data) { }); function getNoneStopWords(sentence) { - var common = getStopWords(); - var wordArr = sentence.match(/\w+/g), + let common = getStopWords(); + let wordArr = sentence.match(/\w+/g), commonObj = {}, uncommonArr = [], word, i;