Break each new anagram of an array item to a new line - javascript

I am working an anagram generator and am trying to break off each new item in the array into a new line. The way this works is it slices each array item and loops through each character.
The output needs to be:
cat, cta, act, atc, tca, tac,
bat, bta, abt, atb, tba, tab,
rat, rta, art, atr, tra, tar,
But it is:
cat, cta, act, atc, tca, tac, bat, bta, abt, atb, tba, tab, rat, rta, art, atr, tra, tar, splat, splta, spalt, spatl,...
So far the code I have is this:
HTML:
<div id="anagrams"></div>
JS:
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
};
recurse(' ', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
To accomplish a new line per array item I basically want to check if the amount of the characters exceeds the amount of characters in the string/array item and if it does, insert a break into the HTML. I tried doing that by:
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
// check if string length is greater than the count and
// if it is, insert a break between the string
if (i > str.length) {
recurse(' <br>', str);
}
};
recurse(' ', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
However it still prints across a single line. Am I approaching this the correct way? I also tried using ana in place of i but I think I need to use i since that's the actual count - is that correct?
A jsfiddle can be seen here: https://jsfiddle.net/4eqhd1m4/1/

I would slightly restructure the anagram creation.
Anagrams is now a string.
Recurse no longer takes care of adding break lines. Considering you want a break per element, it's cleaner to add it in the Array.forEach
jsfiddle
Edit
Adding a second jsfiddle to demonstrate the same behavior, except instead of using strings directly it returns an array (which gets split and rejoined using breaklines). It may be preferable to have the anagrams returned as an array.
jsfiddle

Is this is what you need ? All anagrams in different lines
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
// check if string length is greater than the count and
// if it is, insert a break between the string
if (i > str.length) {
recurse(' <br \/>', str);
}
};
recurse(' <br \/>', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
<div id="anagrams"></div>

Related

Implement the .split method

I need to implement the .split method in my own way without using prebuilt functions. The method should receive a string divided into 2 sentences by a dot and divide them through a separator.
For example, there is this string:
'word wooord wooooooooord wooooooord. wooooooooord woooooord woooord wooooooooord', separator in this case: '. '
The result should be:
['word wooord wooooooooord wooooooord", "wooooooooord woooooord woooord wooooooooord']
I tried to implement it myself, the first problem I encountered is that the words from the string are added character by character to the new array. The second problem is that the output is still a string even though I declared an array earlier.
function split(str, splitter){
let arrSent = []
for (let i = 0; i < str.length; i++){
if (str[i] != splitter){
arrSent += str[i]
}
}
return arrSent
}
console.log(split('word wooord wooooooooord wooooooord. wooooooooord woooooord woooord wooooooooord', '. '))
Since the delimiter can have more than one character, you need a system to upfront collect a sample of characters (of the same length as the delimiter) to be then compared with the delimiter:
const split = (str, delimiter) => {
// If delimiter is empty string just return an array of characters
if (delimiter === "") return [...str];
const len = delimiter.length;
const iter = str.length - len + 1; // max needed iterations
const arr = [""]; // Prefill it with empty string
let idx = 0; // arr insertion pointer
for (let i = 0; i < iter; i++) {
// Collect len chars from str as a sample
// to compare with the delimiter:
let sample = "";
for (let x = i; x < i + len; x++) {
sample += str[x];
}
const isSplit = sample === delimiter;
const isEnded = i === iter - 1;
if (isSplit) {
i += len - 1; // Consume splitted characters
idx += 1; // Increment arr pointer
arr[idx] = ""; // Prepare the new array key as empty string
} else {
// If loop ended append the entire sample.
// Otherwise, append a single character:
arr[idx] += isEnded ? sample : str[i];
}
}
return arr
}
console.log(split("word. etc", ". "));
console.log(split("word. etc. ", ". "));
console.log(split(". word yep. . etc. ", ". "));
console.log(split("word", ". "));
console.log(split("word", "word"));
console.log(split("word", ""));
console.log(split("", ""));
above, idx (starting at 0) is used as the output's arr insertion pointer. The idx is incremented if the sample matches the delimiter. Also, if there's a match, we need to skip iterations i += len, to not include the delimiter in the output array.
To test, create many examples and right before return arr; use console.log(JSON.stringify(arr) === JSON.stringify(str.split(delimiter))); - it should return true for all the submitted tests.

How to get odd and even position characters from a string?

I'm trying to figure out how to remove every second character (starting from the first one) from a string in Javascript.
For example, the string "This is a test!" should become "hsi etTi sats!"
I also want to save every deleted character into another array.
I have tried using replace method and splice method, but wasn't able to get them to work properly. Mostly because replace only replaces the first character.
function encrypt(text, n) {
if (text === "NULL") return n;
if (n <= 0) return text;
var encArr = [];
var newString = text.split("");
var j = 0;
for (var i = 0; i < text.length; i += 2) {
encArr[j++] = text[i];
newString.splice(i, 1); // this line doesn't work properly
}
}
You could reduce the characters of the string and group them to separate arrays using the % operator. Use destructuring to get the 2D array returned to separate variables
let str = "This is a test!";
const [even, odd] = [...str].reduce((r,char,i) => (r[i%2].push(char), r), [[],[]])
console.log(odd.join(''))
console.log(even.join(''))
Using a for loop:
let str = "This is a test!",
odd = [],
even = [];
for (var i = 0; i < str.length; i++) {
i % 2 === 0
? even.push(str[i])
: odd.push(str[i])
}
console.log(odd.join(''))
console.log(even.join(''))
It would probably be easier to use a regular expression and .replace: capture two characters in separate capturing groups, add the first character to a string, and replace with the second character. Then, you'll have first half of the output you need in one string, and the second in another: just concatenate them together and return:
function encrypt(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
console.log(encrypt('This is a test!'));
Pretty simple with .reduce() to create the two arrays you seem to want.
function encrypt(text) {
return text.split("")
.reduce(({odd, even}, c, i) =>
i % 2 ? {odd: [...odd, c], even} : {odd, even: [...even, c]}
, {odd: [], even: []})
}
console.log(encrypt("This is a test!"));
They can be converted to strings by using .join("") if you desire.
I think you were on the right track. What you missed is replace is using either a string or RegExp.
The replace() method returns a new string with some or all matches of a pattern replaced by a replacement. The pattern can be a string or a RegExp, and the replacement can be a string or a function to be called for each match. If pattern is a string, only the first occurrence will be replaced.
Source: String.prototype.replace()
If you are replacing a value (and not a regular expression), only the first instance of the value will be replaced. To replace all occurrences of a specified value, use the global (g) modifier
Source: JavaScript String replace() Method
So my suggestion would be to continue still with replace and pass the right RegExp to the function, I guess you can figure out from this example - this removes every second occurrence for char 't':
let count = 0;
let testString = 'test test test test';
console.log('original', testString);
// global modifier in RegExp
let result = testString.replace(/t/g, function (match) {
count++;
return (count % 2 === 0) ? '' : match;
});
console.log('removed', result);
like this?
var text = "This is a test!"
var result = ""
var rest = ""
for(var i = 0; i < text.length; i++){
if( (i%2) != 0 ){
result += text[i]
} else{
rest += text[i]
}
}
console.log(result+rest)
Maybe with split, filter and join:
const remaining = myString.split('').filter((char, i) => i % 2 !== 0).join('');
const deleted = myString.split('').filter((char, i) => i % 2 === 0).join('');
You could take an array and splice and push each second item to the end of the array.
function encrypt(string) {
var array = [...string],
i = 0,
l = array.length >> 1;
while (i <= l) array.push(array.splice(i++, 1)[0]);
return array.join('');
}
console.log(encrypt("This is a test!"));
function encrypt(text) {
text = text.split("");
var removed = []
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed.push(letter)
return false;
}
return true
}).join("")
return {
full: encrypted + removed.join(""),
encrypted: encrypted,
removed: removed
}
}
console.log(encrypt("This is a test!"))
Splice does not work, because if you remove an element from an array in for loop indexes most probably will be wrong when removing another element.
I don't know how much you care about performance, but using regex is not very efficient.
Simple test for quite a long string shows that using filter function is on average about 3 times faster, which can make quite a difference when performed on very long strings or on many, many shorts ones.
function test(func, n){
var text = "";
for(var i = 0; i < n; ++i){
text += "a";
}
var start = new Date().getTime();
func(text);
var end = new Date().getTime();
var time = (end-start) / 1000.0;
console.log(func.name, " took ", time, " seconds")
return time;
}
function encryptREGEX(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
function encrypt(text) {
text = text.split("");
var removed = "";
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed += letter;
return false;
}
return true
}).join("")
return encrypted + removed
}
var timeREGEX = test(encryptREGEX, 10000000);
var timeFilter = test(encrypt, 10000000);
console.log("Using filter is faster ", timeREGEX/timeFilter, " times")
Using actually an array for storing removed letters and then joining them is much more efficient, than using a string and concatenating letters to it.
I changed an array to string in filter solution to make it the same like in regex solution, so they are more comparable.

word frequency in javascript

How can I implement javascript function to calculate frequency of each word in a given sentence.
this is my code:
function search () {
var data = document.getElementById('txt').value;
var temp = data;
var words = new Array();
words = temp.split(" ");
var uniqueWords = new Array();
var count = new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f = 0;
for (j = 0; j < uniqueWords.length; j++) {
if (words[i] == uniqueWords[j]) {
count[j] = count[j] + 1;
//uniqueWords[j]=words[i];
f = 1;
}
}
if (f == 0) {
count[i] = 1;
uniqueWords[i] = words[i];
}
console.log("count of " + uniqueWords[i] + " - " + count[i]);
}
}
am unable to trace out the problem ..any help is greatly appriciated.
output in this format:
count of is - 1
count of the - 2..
input: this is anil is kum the anil
Here is a JavaScript function to get the frequency of each word in a sentence:
function wordFreq(string) {
var words = string.replace(/[.]/g, '').split(/\s/);
var freqMap = {};
words.forEach(function(w) {
if (!freqMap[w]) {
freqMap[w] = 0;
}
freqMap[w] += 1;
});
return freqMap;
}
It will return a hash of word to word count. So for example, if we run it like so:
console.log(wordFreq("I am the big the big bull."));
> Object {I: 1, am: 1, the: 2, big: 2, bull: 1}
You can iterate over the words with Object.keys(result).sort().forEach(result) {...}. So we could hook that up like so:
var freq = wordFreq("I am the big the big bull.");
Object.keys(freq).sort().forEach(function(word) {
console.log("count of " + word + " is " + freq[word]);
});
Which would output:
count of I is 1
count of am is 1
count of big is 2
count of bull is 1
count of the is 2
JSFiddle: http://jsfiddle.net/ah6wsbs6/
And here is wordFreq function in ES6:
function wordFreq(string) {
return string.replace(/[.]/g, '')
.split(/\s/)
.reduce((map, word) =>
Object.assign(map, {
[word]: (map[word])
? map[word] + 1
: 1,
}),
{}
);
}
JSFiddle: http://jsfiddle.net/r1Lo79us/
I feel you have over-complicated things by having multiple arrays, strings, and engaging in frequent (and hard to follow) context-switching between loops, and nested loops.
Below is the approach I would encourage you to consider taking. I've inlined comments to explain each step along the way. If any of this is unclear, please let me know in the comments and I'll revisit to improve clarity.
(function () {
/* Below is a regular expression that finds alphanumeric characters
Next is a string that could easily be replaced with a reference to a form control
Lastly, we have an array that will hold any words matching our pattern */
var pattern = /\w+/g,
string = "I I am am am yes yes.",
matchedWords = string.match( pattern );
/* The Array.prototype.reduce method assists us in producing a single value from an
array. In this case, we're going to use it to output an object with results. */
var counts = matchedWords.reduce(function ( stats, word ) {
/* `stats` is the object that we'll be building up over time.
`word` is each individual entry in the `matchedWords` array */
if ( stats.hasOwnProperty( word ) ) {
/* `stats` already has an entry for the current `word`.
As a result, let's increment the count for that `word`. */
stats[ word ] = stats[ word ] + 1;
} else {
/* `stats` does not yet have an entry for the current `word`.
As a result, let's add a new entry, and set count to 1. */
stats[ word ] = 1;
}
/* Because we are building up `stats` over numerous iterations,
we need to return it for the next pass to modify it. */
return stats;
}, {} );
/* Now that `counts` has our object, we can log it. */
console.log( counts );
}());
const sentence = 'Hi my friend how are you my friend';
const countWords = (sentence) => {
const convertToObject = sentence.split(" ").map( (i, k) => {
return {
element: {
word: i,
nr: sentence.split(" ").filter(j => j === i).length + ' occurrence',
}
}
});
return Array.from(new Set(convertToObject.map(JSON.stringify))).map(JSON.parse)
};
console.log(countWords(sentence));
Here is an updated version of your own code...
<!DOCTYPE html>
<html>
<head>
<title>string frequency</title>
<style type="text/css">
#text{
width:250px;
}
</style>
</head>
<body >
<textarea id="txt" cols="25" rows="3" placeholder="add your text here"> </textarea></br>
<button type="button" onclick="search()">search</button>
<script >
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var unique = {};
for (var i = 0; i < words.length; i++) {
var word = words[i];
console.log(word);
if (word in unique)
{
console.log("word found");
var count = unique[word];
count ++;
unique[word]=count;
}
else
{
console.log("word NOT found");
unique[word]=1;
}
}
console.log(unique);
}
</script>
</body>
I think your loop was overly complicated. Also, trying to produce the final count while still doing your first pass over the array of words is bound to fail because you can't test for uniqueness until you have checked each word in the array.
Instead of all your counters, I've used a Javascript object to work as an associative array, so we can store each unique word, and the count of how many times it occurs.
Then, once we exit the loop, we can see the final result.
Also, this solution uses no regex ;)
I'll also add that it's very hard to count words just based on spaces. In this code, "one, two, one" will results in "one," and "one" as being different, unique words.
While both of the answers here are correct maybe are better but none of them address OP's question (what is wrong with the his code).
The problem with OP's code is here:
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
On every new word (unique word) the code adds it to uniqueWords at index at which the word was in words. Hence there are gaps in uniqueWords array. This is the reason for some undefined values.
Try printing uniqueWords. It should give something like:
["this", "is", "anil", 4: "kum", 5: "the"]
Note there no element for index 3.
Also the printing of final count should be after processing all the words in the words array.
Here's corrected version:
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var uniqueWords=new Array();
var count=new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f=0;
for(j=0;j<uniqueWords.length;j++){
if(words[i]==uniqueWords[j]){
count[j]=count[j]+1;
//uniqueWords[j]=words[i];
f=1;
}
}
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
}
for ( i = 0; i < uniqueWords.length; i++) {
if (typeof uniqueWords[i] !== 'undefined')
console.log("count of "+uniqueWords[i]+" - "+count[i]);
}
}
I have just moved the printing of count out of the processing loop into a new loop and added a if not undefined check.
Fiddle: https://jsfiddle.net/cdLgaq3a/
I had a similar assignment. This is what I did:
Assignment : Clean the following text and find the most frequent word (hint, use replace and regular expressions).
const sentence = '%I $am#% a %tea#cher%, &and& I lo%#ve %te#a#ching%;. The#re $is no#th#ing; &as& mo#re rewarding as educa#ting &and& #emp%o#weri#ng peo#ple. ;I found tea#ching m%o#re interesting tha#n any ot#her %jo#bs. %Do#es thi%s mo#tiv#ate yo#u to be a tea#cher!? %Th#is 30#Days&OfJavaScript &is al#so $the $resu#lt of &love& of tea&ching'
console.log(`\n\n 03.Clean the following text and find the most frequent word (hint, use replace and regular expressions) \n\n ${sentence} \n\n`)
console.log(`Cleared sentence : ${sentence.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()#]/g, "")}`)
console.log(mostFrequentWord(sentence))
function mostFrequentWord(sentence) {
sentence = sentence.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()#]/g, "").trim().toLowerCase()
let sentenceArray = sentence.split(" ")
let word = null
let count = 0
for (i = 0; i < sentenceArray.length; i++) {
word = sentenceArray[i]
count = sentence.match(RegExp(sentenceArray[i], 'gi')).length
if (count > count) {
count = count
word = word
}
}
return `\n Count of most frequent word "${word}" is ${count}`
}
I'd go with Sampson's match-reduce method for slightly better efficiency. Here's a modified version of it that is more production-ready. It's not perfect, but it should cover the vast majority of scenarios (i.e., "good enough").
function calcWordFreq(s) {
// Normalize
s = s.toLowerCase();
// Strip quotes and brackets
s = s.replace(/["“”(\[{}\])]|\B['‘]([^'’]+)['’]/g, '$1');
// Strip dashes and ellipses
s = s.replace(/[‒–—―…]|--|\.\.\./g, ' ');
// Strip punctuation marks
s = s.replace(/[!?;:.,]\B/g, '');
return s.match(/\S+/g).reduce(function(oFreq, sWord) {
if (oFreq.hasOwnProperty(sWord)) ++oFreq[sWord];
else oFreq[sWord] = 1;
return oFreq;
}, {});
}
calcWordFreq('A ‘bad’, “BAD” wolf-man...a good ol\' spook -- I\'m frightened!') returns
{
"a": 2
"bad": 2
"frightened": 1
"good": 1
"i'm": 1
"ol'": 1
"spook": 1
"wolf-man": 1
}

How do I add specific requirements to this recursive hashtag generator?

I'm trying to build a hashtag generator with one specific requirement. The function below works well but I want to exclude all hashtags that are comprised of just one word. This is a brain teaser I just can't wrap my mind around.
function hashtag(string) {
var words = string.split(" ");
var result = [];
var f = function(prefix, words) {
for (var i = 0; i < words.length; i++) {
result.push(prefix + words[i]);
f(prefix + words[i], words.slice(i + 1));
}
};
f('#', words);
return result;
}
Running hashtag("Golden Gate Bridge"); will produce...
[ '#Golden', '#GoldenGate', '#GoldenGateBridge', '#GoldenBridge', '#Gate', '#GateBridge', '#Bridge' ]
I would like it to produce...
['#GoldenGate', '#GoldenGateBridge', '#GoldenBridge', '#GateBridge']
Note that all the single word origin hashtags are gone.
I tried adding in a "depth" variable that incremented with each additional level of recursion and tried some "if" statements but couldn't find the right combination.
Thanks for your help! I really appreciate it.
You were on the right path adding a depth counter.
function hashtag(string) {
var words = string.split(" ");
var result = [];
var f = function(prefix, words, depth) {
for (var i = 0; i < words.length; i++) {
// Only add this hashtag if our prefix depth is > 1
if (depth > 1) {
result.push(prefix + words[i]);
}
// Increase the prefix depth with each iteration.
f(prefix + words[i], words.slice(i + 1), depth + 1);
}
};
// Pass the default depth
f('#', words, 1);
return result;
}
You can see this working here: http://jsfiddle.net/3Kgzf/

What's the best way to count keywords in JavaScript?

What's the best and most efficient way to count keywords in JavaScript? Basically, I'd like to take a string and get the top N words or phrases that occur in the string, mainly for the use of suggesting tags. I'm looking more for conceptual hints or links to real-life examples than actual code, but I certainly wouldn't mind if you'd like to share code as well. If there are particular functions that would help, I'd also appreciate that.
Right now I think I'm at using the split() function to separate the string by spaces and then cleaning punctuation out with a regular expression. I'd also want it to be case-insensitive.
Cut, paste + execute demo:
var text = "Text to be examined to determine which n words are used the most";
// Find 'em!
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
// Sort 'em!
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
// Come back any time, straaanger!
var n = 10;
var message = ["The top " + n + " words are:"];
for (var i = 0; i < n; i++)
{
message.push(wordList[i][0] + " - " + wordList[i][1] + " occurance" +
(wordList[i][1] == 1 ? "" : "s"));
}
alert(message.join("\n"));
Reusable function:
function getTopNWords(text, n)
{
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
var topWords = [];
for (var i = 0; i < n; i++)
{
topWords.push(wordList[i][0]);
}
return topWords;
}
Once you have that array of words cleaned up, and let's say you call it wordArray:
var keywordRegistry = {};
for(var i = 0; i < wordArray.length; i++) {
if(keywordRegistry.hasOwnProperty(wordArray[i]) == false) {
keywordRegistry[wordArray[i]] = 0;
}
keywordRegistry[wordArray[i]] = keywordRegistry[wordArray[i]] + 1;
}
// now keywordRegistry will have, as properties, all of the
// words in your word array with their respective counts
// this will alert (choose something better than alert) all words and their counts
for(var keyword in keywordRegistry) {
alert("The keyword '" + keyword + "' occurred " + keywordRegistry[keyword] + " times");
}
That should give you the basics of doing this part of the work.
Try to split you string on words and count the resulting words, then sort on the counts.
This builds upon a previous answer by insin by only having one loop:
function top_words(text, n) {
// Split text on non word characters
var words = text.toLowerCase().split(/\W+/)
var positions = new Array()
var word_counts = new Array()
for (var i=0; i<words.length; i++) {
var word = words[i]
if (!word) {
continue
}
if (typeof positions[word] == 'undefined') {
positions[word] = word_counts.length
word_counts.push([word, 1])
} else {
word_counts[positions[word]][1]++
}
}
// Put most frequent words at the beginning.
word_counts.sort(function (a, b) {return b[1] - a[1]})
// Return the first n items
return word_counts.slice(0, n)
}
// Let's see if it works.
var text = "Words in here are repeated. Are repeated, repeated!"
alert(top_words(text, 3))
The result of the example is: [['repeated',3], ['are',2], ['words', 1]]
I would do exactly what you have mentioned above to isolate each word. I would then probably add each word as the index of an array with the number of occurrences as the value.
For example:
var a = new Array;
a[word] = a[word]?a[word]+1:1;
Now you know how many unique words there are (a.length) and how many occurrences of each word existed (a[word]).

Categories

Resources