What's the best way to count keywords in JavaScript? - javascript

What's the best and most efficient way to count keywords in JavaScript? Basically, I'd like to take a string and get the top N words or phrases that occur in the string, mainly for the use of suggesting tags. I'm looking more for conceptual hints or links to real-life examples than actual code, but I certainly wouldn't mind if you'd like to share code as well. If there are particular functions that would help, I'd also appreciate that.
Right now I think I'm at using the split() function to separate the string by spaces and then cleaning punctuation out with a regular expression. I'd also want it to be case-insensitive.

Cut, paste + execute demo:
var text = "Text to be examined to determine which n words are used the most";
// Find 'em!
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
// Sort 'em!
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
// Come back any time, straaanger!
var n = 10;
var message = ["The top " + n + " words are:"];
for (var i = 0; i < n; i++)
{
message.push(wordList[i][0] + " - " + wordList[i][1] + " occurance" +
(wordList[i][1] == 1 ? "" : "s"));
}
alert(message.join("\n"));
Reusable function:
function getTopNWords(text, n)
{
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
var topWords = [];
for (var i = 0; i < n; i++)
{
topWords.push(wordList[i][0]);
}
return topWords;
}

Once you have that array of words cleaned up, and let's say you call it wordArray:
var keywordRegistry = {};
for(var i = 0; i < wordArray.length; i++) {
if(keywordRegistry.hasOwnProperty(wordArray[i]) == false) {
keywordRegistry[wordArray[i]] = 0;
}
keywordRegistry[wordArray[i]] = keywordRegistry[wordArray[i]] + 1;
}
// now keywordRegistry will have, as properties, all of the
// words in your word array with their respective counts
// this will alert (choose something better than alert) all words and their counts
for(var keyword in keywordRegistry) {
alert("The keyword '" + keyword + "' occurred " + keywordRegistry[keyword] + " times");
}
That should give you the basics of doing this part of the work.

Try to split you string on words and count the resulting words, then sort on the counts.

This builds upon a previous answer by insin by only having one loop:
function top_words(text, n) {
// Split text on non word characters
var words = text.toLowerCase().split(/\W+/)
var positions = new Array()
var word_counts = new Array()
for (var i=0; i<words.length; i++) {
var word = words[i]
if (!word) {
continue
}
if (typeof positions[word] == 'undefined') {
positions[word] = word_counts.length
word_counts.push([word, 1])
} else {
word_counts[positions[word]][1]++
}
}
// Put most frequent words at the beginning.
word_counts.sort(function (a, b) {return b[1] - a[1]})
// Return the first n items
return word_counts.slice(0, n)
}
// Let's see if it works.
var text = "Words in here are repeated. Are repeated, repeated!"
alert(top_words(text, 3))
The result of the example is: [['repeated',3], ['are',2], ['words', 1]]

I would do exactly what you have mentioned above to isolate each word. I would then probably add each word as the index of an array with the number of occurrences as the value.
For example:
var a = new Array;
a[word] = a[word]?a[word]+1:1;
Now you know how many unique words there are (a.length) and how many occurrences of each word existed (a[word]).

Related

How can I extract all contained characters in a String? [duplicate]

I have a string with repeated letters. I want letters that are repeated more than once to show only once.
Example input: aaabbbccc
Expected output: abc
I've tried to create the code myself, but so far my function has the following problems:
if the letter doesn't repeat, it's not shown (it should be)
if it's repeated once, it's show only once (i.e. aa shows a - correct)
if it's repeated twice, shows all (i.e. aaa shows aaa - should be a)
if it's repeated 3 times, it shows 6 (if aaaa it shows aaaaaa - should be a)
function unique_char(string) {
var unique = '';
var count = 0;
for (var i = 0; i < string.length; i++) {
for (var j = i+1; j < string.length; j++) {
if (string[i] == string[j]) {
count++;
unique += string[i];
}
}
}
return unique;
}
document.write(unique_char('aaabbbccc'));
The function must be with loop inside a loop; that's why the second for is inside the first.
Fill a Set with the characters and concatenate its unique entries:
function unique(str) {
return String.prototype.concat.call(...new Set(str));
}
console.log(unique('abc')); // "abc"
console.log(unique('abcabc')); // "abc"
Convert it to an array first, then use Josh Mc’s answer at How to get unique values in an array, and rejoin, like so:
var nonUnique = "ababdefegg";
var unique = Array.from(nonUnique).filter(function(item, i, ar){ return ar.indexOf(item) === i; }).join('');
All in one line. :-)
Too late may be but still my version of answer to this post:
function extractUniqCharacters(str){
var temp = {};
for(var oindex=0;oindex<str.length;oindex++){
temp[str.charAt(oindex)] = 0; //Assign any value
}
return Object.keys(temp).join("");
}
You can use a regular expression with a custom replacement function:
function unique_char(string) {
return string.replace(/(.)\1*/g, function(sequence, char) {
if (sequence.length == 1) // if the letter doesn't repeat
return ""; // its not shown
if (sequence.length == 2) // if its repeated once
return char; // its show only once (if aa shows a)
if (sequence.length == 3) // if its repeated twice
return sequence; // shows all(if aaa shows aaa)
if (sequence.length == 4) // if its repeated 3 times
return Array(7).join(char); // it shows 6( if aaaa shows aaaaaa)
// else ???
return sequence;
});
}
Using lodash:
_.uniq('aaabbbccc').join(''); // gives 'abc'
Per the actual question: "if the letter doesn't repeat its not shown"
function unique_char(str)
{
var obj = new Object();
for (var i = 0; i < str.length; i++)
{
var chr = str[i];
if (chr in obj)
{
obj[chr] += 1;
}
else
{
obj[chr] = 1;
}
}
var multiples = [];
for (key in obj)
{
// Remove this test if you just want unique chars
// But still keep the multiples.push(key)
if (obj[key] > 1)
{
multiples.push(key);
}
}
return multiples.join("");
}
var str = "aaabbbccc";
document.write(unique_char(str));
Your problem is that you are adding to unique every time you find the character in string. Really you should probably do something like this (since you specified the answer must be a nested for loop):
function unique_char(string){
var str_length=string.length;
var unique='';
for(var i=0; i<str_length; i++){
var foundIt = false;
for(var j=0; j<unique.length; j++){
if(string[i]==unique[j]){
foundIt = true;
break;
}
}
if(!foundIt){
unique+=string[i];
}
}
return unique;
}
document.write( unique_char('aaabbbccc'))
In this we only add the character found in string to unique if it isn't already there. This is really not an efficient way to do this at all ... but based on your requirements it should work.
I can't run this since I don't have anything handy to run JavaScript in ... but the theory in this method should work.
Try this if duplicate characters have to be displayed once, i.e.,
for i/p: aaabbbccc o/p: abc
var str="aaabbbccc";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 ){
return obj;
}
}
).join("");
//output: "abc"
And try this if only unique characters(String Bombarding Algo) have to be displayed, add another "and" condition to remove the characters which came more than once and display only unique characters, i.e.,
for i/p: aabbbkaha o/p: kh
var str="aabbbkaha";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 && str.lastIndexOf(obj,i-1)==-1){ // another and condition
return obj;
}
}
).join("");
//output: "kh"
<script>
uniqueString = "";
alert("Displays the number of a specific character in user entered string and then finds the number of unique characters:");
function countChar(testString, lookFor) {
var charCounter = 0;
document.write("Looking at this string:<br>");
for (pos = 0; pos < testString.length; pos++) {
if (testString.charAt(pos) == lookFor) {
charCounter += 1;
document.write("<B>" + lookFor + "</B>");
} else
document.write(testString.charAt(pos));
}
document.write("<br><br>");
return charCounter;
}
function findNumberOfUniqueChar(testString) {
var numChar = 0,
uniqueChar = 0;
for (pos = 0; pos < testString.length; pos++) {
var newLookFor = "";
for (pos2 = 0; pos2 <= pos; pos2++) {
if (testString.charAt(pos) == testString.charAt(pos2)) {
numChar += 1;
}
}
if (numChar == 1) {
uniqueChar += 1;
uniqueString = uniqueString + " " + testString.charAt(pos)
}
numChar = 0;
}
return uniqueChar;
}
var testString = prompt("Give me a string of characters to check", "");
var lookFor = "startvalue";
while (lookFor.length > 1) {
if (lookFor != "startvalue")
alert("Please select only one character");
lookFor = prompt(testString + "\n\nWhat should character should I look for?", "");
}
document.write("I found " + countChar(testString, lookFor) + " of the<b> " + lookFor + "</B> character");
document.write("<br><br>I counted the following " + findNumberOfUniqueChar(testString) + " unique character(s):");
document.write("<br>" + uniqueString)
</script>
Here is the simplest function to do that
function remove(text)
{
var unique= "";
for(var i = 0; i < text.length; i++)
{
if(unique.indexOf(text.charAt(i)) < 0)
{
unique += text.charAt(i);
}
}
return unique;
}
The one line solution will be to use Set. const chars = [...new Set(s.split(''))];
If you want to return values in an array, you can use this function below.
const getUniqueChar = (str) => Array.from(str)
.filter((item, index, arr) => arr.slice(index + 1).indexOf(item) === -1);
console.log(getUniqueChar("aaabbbccc"));
Alternatively, you can use the Set constructor.
const getUniqueChar = (str) => new Set(str);
console.log(getUniqueChar("aaabbbccc"));
Here is the simplest function to do that pt. 2
const showUniqChars = (text) => {
let uniqChars = "";
for (const char of text) {
if (!uniqChars.includes(char))
uniqChars += char;
}
return uniqChars;
};
const countUnique = (s1, s2) => new Set(s1 + s2).size
a shorter way based on #le_m answer
let unique=myArray.filter((item,index,array)=>array.indexOf(item)===index)

Counting Words Between Two Variable Strings

Total newbie + first time poster here with very little experience though I feel this problem is one I could solve with the help of some generous strangers.
I am querying a GDoc and attempting to create a function to count words between two strings for two possible end strings, for example:
Example #1
Definitive Title
*Count these words*
===============
OR
Example #2
Definitive Title
*Count these words*
Other words that are in a table
Definitive Title
*Count these other different words*
===============
In both of the above examples I looking to count the words between a pre-defined string and an end string.
If I ran the function that I am trying to create on Example #1 I am hoping it'd return 3 words. For Example #2 I'd hope that my function returns 8 words.
So far my function looks like this:
function doPost(e) {
var docUrl = e.parameter.docUrl
var text = DocumentApp.openByUrl(docUrl).getBody().getText()
var wordCount = text.split(" ").length
return ContentService.createTextOutput(wordCount.toString()).setMimeType(ContentService.MimeType.TEXT)
}
This returns a word count for the entire document. Any advice to point me in the right direction?
For more dynamic, appropriate and accurate solution, execute the following snippets before the split () function. Regular Expressions often used to provide dynamic solutions. It is a must have skill.
text = text.replace(/(^\s*)|(\s*$)/gi,""); // remove the start and end spaces of the string (like trim ())
text = text.replace(/[ ]{2,}/gi," "); // filter out one or more spaces
text = text.replace(/\n /,"\n"); // filter out news lines with spacing at beginning
wordCount = text.split(" ").length;
Here is a solution to your problem you can log the difference of characters and words or you can log the total amount of words or characters in the two sentaces. You are also going to want to put the bigger sentence on top, otherwise it will give you a negative number.
var x = "count these words";
var y = "count words";
function findCharDif(word1, word2) {
var word1length = word1.length;
var word2length = word2.length;
var difference = word1length - word2length;
var total = word1length + word2length;
console.log(difference);
console.log(total);
}
function findWordDif(sentence1, sentence2) {
var words1 = 0;
var words2 = 0;
for (var i = 0; i < sentence1.length; i++) {
if (sentence1[i] == " ") {
words1++;
} else {
continue
}
}
for (var a = 0; a < sentence2.length; a++) {
if (sentence2[a] == " ") {
words2++;
} else {
continue
}
}
var difference = (words1 + 1) - (words2 + 1); // this logs out the difference of words between the sentences
var totalWords = (words1 + 1) + (words2 + 1); // this logs out the total amount of words
console.log(difference);
console.log(totalWords);
}
findCharDif(x, y);
findWordDif(x, y);
The below code seems to have worked! Was able to sit down with someone and solve it with them:
function doPost(e) {
var docUrl = e.parameter.docUrl
/*
var text = DocumentApp.openByUrl(docUrl).getBody().getText()
var wordCount = text.split(" ").length
*/
var wordCount = countScenario2(docUrl);
return ContentService.createTextOutput(wordCount.toString()).setMimeType(ContentService.MimeType.TEXT)
}
/**
* Count the words from Start Test to a table or ====
*/
function countScenario2(docUrl) {
//var docUrl = 'https://docs.google.com/document/d/';
var doc = DocumentApp.openByUrl(docUrl);
var body = doc.getBody();
var reference = body.findText('Start Text');
var start = getIndex('Start Text', body);
var tables = body.getTables();
var count = 0;
for(var j = 1; j < tables.length ; j ++) {
var end = body.getChildIndex(tables[j]);
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
var reference = body.findText('Start Text', reference);
var element = reference.getElement();
var start = body.getChildIndex(element.getParent());
}
var end = getIndex('=========================================================', body);
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
return count ;
}
/**
* This will return the index of the element
*
* #param {string} keyword The text to be found
* #param {Body} body This is the body of the document
*/
function getIndex(keyword, body, previous) {
var reference = body.findText(keyword, previous);
var element = reference.getElement();
return body.getChildIndex(element.getParent());
}
/************ */
function testPost(){
var e = {parameter:{docUrl:'https://docs.google.com/document/d/'}};
var result = doPost(e);
console.log(JSON.stringify(result.getContent()));}
/**
* Count the words from Start Text to ====
*/
function countScenario1(docUrl) {
//var docUrl = 'https://docs.google.com/document/d/';
var doc = DocumentApp.openByUrl(docUrl);
var body = doc.getBody();
var start = getIndex('Start Text', body);
var end = getIndex('=========================================================', body);
var count = 0;
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
return count;
}
function test(){
var docUrl = 'https://docs.google.com/document/d/';
var wordCount = countScenario2(docUrl);
console.log(wordCount);
}
As what #Rishabh K said in his answer, you should definitely want to replace trailing spaces and multiple spaces to avoid inaccurate results.
However on the other hand, I don't think it answers the OP's question. Correct me if I'm wrong but I think this is what you want:
var sample1 = `This is the start identifier
These words should be included
As well As these ones
Even this
Until it ends
now
Ending identifier
These words shouldn't be included
If any of these appears, the logic is wrong`;
var sample2 = sample1 + `
This is the start identifier
These some few words
should also be included in the result set
Ending identifier`;
var sample3 = sample2 + `
This is the start identifier
Although we have the start identifier above
These words shouldn't be included
because there is no corresponding end identifier`;
function getWordDiffBetween(source, str1, str2) {
// make sure newSource, str1 and str2 are all strings
var args = Array.prototype.slice.call(arguments);
args.forEach(function(str, idx) {
if (typeof str !== 'string') {
throw `Argument ${[idx + 1]} is not a string.`;
}
});
var startId = '<==start==>',
endId = '<==end==>';
var newSource = source.replace(new RegExp(str1, 'g'), startId) // replace the start identifier with our own
.replace(new RegExp(str2 + '|={2,}', 'g'), endId) // replace the end identifier with our own
.replace(/(^\s*)|(\s*$)/gi, "") // remove the start and end spaces of the string (like trim ())
.replace(/\s+/g, ' ') //replace all 1 or more spaces/newline/linefeed with a single space
//separate text into words which are separated by a space since we replaced all newlines with space
var words = newSource.split(' ');
// get the indexes where the start and end identifiers occured
var strOneIdx = getAllIndexes(words, startId, true);
var strTwoIdx = getAllIndexes(words, endId, true);
var results = [], // we will store our results here
i;
for (i = 0; i < strOneIdx.length; i++) {
var idxOne = strOneIdx[i]; // current index for str1
var idxTwo = strTwoIdx.find(x => x > idxOne);
//make sure that idxOne has a partner
if (idxTwo) {
var wordsInBetween = words.slice(idxOne + 1, idxTwo); //get range between idxOne and idxTwo
results = results.concat(wordsInBetween); // add the result
}
}
return results;
}
function getAllIndexes(arr, val) {
var indexes = [],
i;
for (i = 0; i < arr.length; i++) {
if (arr[i] === val) {
indexes.push(i);
}
}
return indexes;
}
var startIdentifier = 'This is the start identifier',
endIdentifier = 'Ending identifier',
wordResults = {
sample1: getWordDiffBetween(sample1, startIdentifier, endIdentifier),
sample2: getWordDiffBetween(sample2, startIdentifier, endIdentifier),
sample3: getWordDiffBetween(sample3, startIdentifier, endIdentifier) //should be equal to sample2
};
console.log(wordResults);
We have 2 functions - getWordDiffBetween and getAllIndexes. For explanation, check the comments I added in noteworthy lines.
Edit (updated snippet above):
It seems like you also want "====================" included as your end identifier. This can be done by changing the code:
.replace(new RegExp(str2, 'g'), endId) // replace the end identifier with our own
into
.replace(new RegExp(str2 + '|={2,}', 'g'), endId) // replace the end identifier with our own
which means match occurence of your <end string> or if there is 2 or more occurences of =. You can also change the number 2 in {2,} to your desired count.

Perform a merge on two strings

I'm trying to build a collaborative doc editor and implement operational transformation. Imagine we have a string that is manipulated simultaneously by 2 users. They can only add characters, not remove them. We want to incorporate both of their changes.
The original string is: catspider
The first user does this: cat<span id>spider</span>
The second user does this: c<span id>atspi</span>der
I'm trying to write a function that will produce: c<span id>at<span id>spi</span>der</span>
The function I've written is close, but it produces c<span id>at<span i</span>d>spider</span> codepen here
String.prototype.splice = function(start, newSubStr) {
return this.slice(0, start) + newSubStr + this.slice(start);
};
function merge(saved, working, requested) {
if (!saved || !working || !requested) {
return false;
}
var diffSavedWorking = createDiff(working, saved);
var diffRequestedWorking = createDiff(working, requested);
var newStr = working;
for (var i = 0; i < Math.max(diffRequestedWorking.length, diffSavedWorking.length); i++) {
//splice does an insert `before` -- we will assume that the saved document characters
//should always appear before the requested document characters in this merger operation
//so we first insert requested and then saved, which means that the final string will have the
//original characters first.
if (diffRequestedWorking[i]) {
newStr = newStr.splice(i, diffRequestedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffRequestedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
if (diffSavedWorking[i]) {
newStr = newStr.splice(i, diffSavedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffSavedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
}
return newStr;
}
//arr1 should be the shorter array.
//returns inserted characters at their
//insertion index.
function createDiff(arr1, arr2) {
var diff = [];
var j = 0;
for (var i = 0; i < arr1.length; i++) {
diff[i] = "";
while (arr2[j] !== arr1[i]) {
diff[i] += arr2[j];
j++;
}
j++;
}
var remainder = arr2.substr(j);
if (remainder) diff[i] = remainder;
return diff;
}
function insertNatX(arr, length, pos) {
for (var j = 0; j < length; j++) {
arr.splice(pos, 0, "");
}
}
var saved = 'cat<span id>spider</span>';
var working = 'catspider';
var requested = 'c<span id>atspi</span>der';
console.log(merge(saved, working, requested));
Would appreciate any thoughts on a better / simpler way to achieve this.

Return the first word with the greatest number of repeated letters

This is a question from coderbyte’s easy set. Many people asked about it already, but I’m really curious about what’s wrong with my particular solution (I know it’s a pretty dumb and inefficient one..)
Original question:
Have the function LetterCountI(str) take the str parameter being passed and return the first word with the greatest number of repeated letters. For example: "Today, is the greatest day ever!" should return greatest because it has 2 e's (and 2 t's) and it comes before ever which also has 2 e's. If there are no words with repeating letters return -1. Words will be separated by spaces.
My solution works most of the time. But if it seems the last word of the input isn’t valued by my code. For example, for “a bb ccc”, “bb” will be returned instead of “ccc”. But the funny thing here is if the string only contains one word, the result is correct. For example, “ccc” returns “ccc”.
Please tell me where I was wrong. Thank you in advance!
function LetterCountI(str) {
str.toLowerCase();
var arr = str.split(" ");
var count = 0;
var word = "-1";
for (var i = 0; i < arr.length; i++) {
for (var a = 0; a < arr[i].length; a++) {
var countNew = 0;
for (var b = a + 1; b < arr[i].length; b++) {
if(arr[i][a] === arr[i][b])
countNew += 1;
}
if (countNew > count) {
count = countNew;
word = arr[i];
}
}
return word;
}
}
Please find below the workable version of your code:
function LetterCountI(str) {
str = str.toLowerCase();
var arr = str.split(" ");
var count = 0;
var word = "-1";
for (var i = 0; i < arr.length; i++) {
for (var a = 0; a < arr[i].length; a++) {
var countNew = 0;
for (var b = a + 1; b < arr[i].length; b++) {
if (arr[i][a] === arr[i][b])
countNew += 1;
}
if (countNew > count) {
count = countNew;
word = arr[i];
}
}
}
return word;
}
Here is the Java code soln for your problem.
You have returned your answer incorrectly. You should have returned word/Answer/res out of "for loops".
Check my chode here.
public static String StringChallenge( String str) {
String[] arr = str.split(" ");
int count = 0; String res = "-1";
for (int i = 0; i < arr.length ; i++) {
for (int j = 0; j < arr[i].length() ; j++) {
int counter = 0;
for (int k = j + 1; k < arr[i].length() ; k++) {
if(arr[i].charAt(j) === arr[i].charAt(k) )
counter ++;
}
if (counter > count) {
count = counter; res = arr[i];
}
}
return res;
}
}
I think the problem is that you're placing the return statement inside your outermost loop. It should be inside your inner loop.
So you have to place the return statement within the inner loop.
Correct use of return
if (countNew > count) {
count = countNew;
word = arr[i];
}
return word;
}
}
}
You need to move the return word; statement outside of the loop to fix your version.
I also put together another take on the algorithm that relies on a few built in javascript methods like Array.map and Math.max, just for reference. I ran a few tests and it seems to be a few milliseconds faster, but not by much.
function LetterCountI(str) {
var maxCount = 0;
var word = '-1';
//split string into words based on spaces and count repeated characters
str.toLowerCase().split(" ").forEach(function(currentWord){
var hash = {};
//split word into characters and increment a hash map for repeated values
currentWord.split('').forEach(function(letter){
if (hash.hasOwnProperty(letter)) {
hash[letter]++;
} else {
hash[letter] = 1;
}
});
//covert the hash map to an array of character counts
var characterCounts = Object.keys(hash).map(function(key){ return hash[key]; });
//find the maximum value in the squashed array
var currentMaxRepeatedCount = Math.max.apply(null, characterCounts);
//if the current word has a higher repeat count than previous max, replace it
if (currentMaxRepeatedCount > maxCount) {
maxCount = currentMaxRepeatedCount;
word = currentWord;
}
});
return word;
}
Yet another solution in a more functional programming style:
JavaScript
function LetterCountI(str) {
return ((str = str.split(' ').map(function(word) {
var letters = word.split('').reduce(function(map, letter) {
map[letter] = map.hasOwnProperty(letter) ? map[letter] + 1 : 1;
return map;
}, {}); // map of letters to number of occurrences in the word
return {
word: word,
count: Object.keys(letters).filter(function(letter) {
return letters[letter] > 1;
}).length // number of repeated letters
};
}).sort(function(a, b) { // Sort words by number of repeated letters
return b.count - a.count;
}).shift()) && str.count && str.word) || -1; // return first word with maximum repeated letters or -1
}
console.log(LetterCountI('Today, is the greatest day ever!')); // => greatest
Plunker
http://plnkr.co/edit/BRywasUkQ3KYdhRpBfU2?p=preview
I recommend use regular expression: /a+/g to find a list of letter with a key word a.
My example :
var str = aa yyyyy bb cccc cc dd bbb;
Fist, find a list of different word :
>>> ["a", "y", "b", "c", "d"]
Use regular expression for each word in list of different word :
var word = lstDiffWord[1];
var
wordcount = str.match(new RegExp(word+'+','g'));
console.log(wordcount);
>>>>["yyyyy"]
Here is full example: http://jsfiddle.net/sxro0sLq/4/

word frequency in javascript

How can I implement javascript function to calculate frequency of each word in a given sentence.
this is my code:
function search () {
var data = document.getElementById('txt').value;
var temp = data;
var words = new Array();
words = temp.split(" ");
var uniqueWords = new Array();
var count = new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f = 0;
for (j = 0; j < uniqueWords.length; j++) {
if (words[i] == uniqueWords[j]) {
count[j] = count[j] + 1;
//uniqueWords[j]=words[i];
f = 1;
}
}
if (f == 0) {
count[i] = 1;
uniqueWords[i] = words[i];
}
console.log("count of " + uniqueWords[i] + " - " + count[i]);
}
}
am unable to trace out the problem ..any help is greatly appriciated.
output in this format:
count of is - 1
count of the - 2..
input: this is anil is kum the anil
Here is a JavaScript function to get the frequency of each word in a sentence:
function wordFreq(string) {
var words = string.replace(/[.]/g, '').split(/\s/);
var freqMap = {};
words.forEach(function(w) {
if (!freqMap[w]) {
freqMap[w] = 0;
}
freqMap[w] += 1;
});
return freqMap;
}
It will return a hash of word to word count. So for example, if we run it like so:
console.log(wordFreq("I am the big the big bull."));
> Object {I: 1, am: 1, the: 2, big: 2, bull: 1}
You can iterate over the words with Object.keys(result).sort().forEach(result) {...}. So we could hook that up like so:
var freq = wordFreq("I am the big the big bull.");
Object.keys(freq).sort().forEach(function(word) {
console.log("count of " + word + " is " + freq[word]);
});
Which would output:
count of I is 1
count of am is 1
count of big is 2
count of bull is 1
count of the is 2
JSFiddle: http://jsfiddle.net/ah6wsbs6/
And here is wordFreq function in ES6:
function wordFreq(string) {
return string.replace(/[.]/g, '')
.split(/\s/)
.reduce((map, word) =>
Object.assign(map, {
[word]: (map[word])
? map[word] + 1
: 1,
}),
{}
);
}
JSFiddle: http://jsfiddle.net/r1Lo79us/
I feel you have over-complicated things by having multiple arrays, strings, and engaging in frequent (and hard to follow) context-switching between loops, and nested loops.
Below is the approach I would encourage you to consider taking. I've inlined comments to explain each step along the way. If any of this is unclear, please let me know in the comments and I'll revisit to improve clarity.
(function () {
/* Below is a regular expression that finds alphanumeric characters
Next is a string that could easily be replaced with a reference to a form control
Lastly, we have an array that will hold any words matching our pattern */
var pattern = /\w+/g,
string = "I I am am am yes yes.",
matchedWords = string.match( pattern );
/* The Array.prototype.reduce method assists us in producing a single value from an
array. In this case, we're going to use it to output an object with results. */
var counts = matchedWords.reduce(function ( stats, word ) {
/* `stats` is the object that we'll be building up over time.
`word` is each individual entry in the `matchedWords` array */
if ( stats.hasOwnProperty( word ) ) {
/* `stats` already has an entry for the current `word`.
As a result, let's increment the count for that `word`. */
stats[ word ] = stats[ word ] + 1;
} else {
/* `stats` does not yet have an entry for the current `word`.
As a result, let's add a new entry, and set count to 1. */
stats[ word ] = 1;
}
/* Because we are building up `stats` over numerous iterations,
we need to return it for the next pass to modify it. */
return stats;
}, {} );
/* Now that `counts` has our object, we can log it. */
console.log( counts );
}());
const sentence = 'Hi my friend how are you my friend';
const countWords = (sentence) => {
const convertToObject = sentence.split(" ").map( (i, k) => {
return {
element: {
word: i,
nr: sentence.split(" ").filter(j => j === i).length + ' occurrence',
}
}
});
return Array.from(new Set(convertToObject.map(JSON.stringify))).map(JSON.parse)
};
console.log(countWords(sentence));
Here is an updated version of your own code...
<!DOCTYPE html>
<html>
<head>
<title>string frequency</title>
<style type="text/css">
#text{
width:250px;
}
</style>
</head>
<body >
<textarea id="txt" cols="25" rows="3" placeholder="add your text here"> </textarea></br>
<button type="button" onclick="search()">search</button>
<script >
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var unique = {};
for (var i = 0; i < words.length; i++) {
var word = words[i];
console.log(word);
if (word in unique)
{
console.log("word found");
var count = unique[word];
count ++;
unique[word]=count;
}
else
{
console.log("word NOT found");
unique[word]=1;
}
}
console.log(unique);
}
</script>
</body>
I think your loop was overly complicated. Also, trying to produce the final count while still doing your first pass over the array of words is bound to fail because you can't test for uniqueness until you have checked each word in the array.
Instead of all your counters, I've used a Javascript object to work as an associative array, so we can store each unique word, and the count of how many times it occurs.
Then, once we exit the loop, we can see the final result.
Also, this solution uses no regex ;)
I'll also add that it's very hard to count words just based on spaces. In this code, "one, two, one" will results in "one," and "one" as being different, unique words.
While both of the answers here are correct maybe are better but none of them address OP's question (what is wrong with the his code).
The problem with OP's code is here:
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
On every new word (unique word) the code adds it to uniqueWords at index at which the word was in words. Hence there are gaps in uniqueWords array. This is the reason for some undefined values.
Try printing uniqueWords. It should give something like:
["this", "is", "anil", 4: "kum", 5: "the"]
Note there no element for index 3.
Also the printing of final count should be after processing all the words in the words array.
Here's corrected version:
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var uniqueWords=new Array();
var count=new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f=0;
for(j=0;j<uniqueWords.length;j++){
if(words[i]==uniqueWords[j]){
count[j]=count[j]+1;
//uniqueWords[j]=words[i];
f=1;
}
}
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
}
for ( i = 0; i < uniqueWords.length; i++) {
if (typeof uniqueWords[i] !== 'undefined')
console.log("count of "+uniqueWords[i]+" - "+count[i]);
}
}
I have just moved the printing of count out of the processing loop into a new loop and added a if not undefined check.
Fiddle: https://jsfiddle.net/cdLgaq3a/
I had a similar assignment. This is what I did:
Assignment : Clean the following text and find the most frequent word (hint, use replace and regular expressions).
const sentence = '%I $am#% a %tea#cher%, &and& I lo%#ve %te#a#ching%;. The#re $is no#th#ing; &as& mo#re rewarding as educa#ting &and& #emp%o#weri#ng peo#ple. ;I found tea#ching m%o#re interesting tha#n any ot#her %jo#bs. %Do#es thi%s mo#tiv#ate yo#u to be a tea#cher!? %Th#is 30#Days&OfJavaScript &is al#so $the $resu#lt of &love& of tea&ching'
console.log(`\n\n 03.Clean the following text and find the most frequent word (hint, use replace and regular expressions) \n\n ${sentence} \n\n`)
console.log(`Cleared sentence : ${sentence.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()#]/g, "")}`)
console.log(mostFrequentWord(sentence))
function mostFrequentWord(sentence) {
sentence = sentence.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()#]/g, "").trim().toLowerCase()
let sentenceArray = sentence.split(" ")
let word = null
let count = 0
for (i = 0; i < sentenceArray.length; i++) {
word = sentenceArray[i]
count = sentence.match(RegExp(sentenceArray[i], 'gi')).length
if (count > count) {
count = count
word = word
}
}
return `\n Count of most frequent word "${word}" is ${count}`
}
I'd go with Sampson's match-reduce method for slightly better efficiency. Here's a modified version of it that is more production-ready. It's not perfect, but it should cover the vast majority of scenarios (i.e., "good enough").
function calcWordFreq(s) {
// Normalize
s = s.toLowerCase();
// Strip quotes and brackets
s = s.replace(/["“”(\[{}\])]|\B['‘]([^'’]+)['’]/g, '$1');
// Strip dashes and ellipses
s = s.replace(/[‒–—―…]|--|\.\.\./g, ' ');
// Strip punctuation marks
s = s.replace(/[!?;:.,]\B/g, '');
return s.match(/\S+/g).reduce(function(oFreq, sWord) {
if (oFreq.hasOwnProperty(sWord)) ++oFreq[sWord];
else oFreq[sWord] = 1;
return oFreq;
}, {});
}
calcWordFreq('A ‘bad’, “BAD” wolf-man...a good ol\' spook -- I\'m frightened!') returns
{
"a": 2
"bad": 2
"frightened": 1
"good": 1
"i'm": 1
"ol'": 1
"spook": 1
"wolf-man": 1
}

Categories

Resources