Word frequency for array of key/values on javascript - javascript

I'm trying to implement a piece of code on javascript to analyse word/frequency on a given string. My objective is to return a array as the following:
[{text: firstword, size:3 },{text:secondword , size:5 },{text: nword, size: 1},...]
I implemented the following code but I'm running out of memory, so I don't really know if its ok or not.
function wordFrequency(txt){
var wordArray = txt.split(/[ .?!,*'"]/);
var newArray = [];
$.each(wordArray, function (ix, word) {
if (newArray.length >= 1){
newArray.some(function (w){
if (w.text === word){
w.size++;
} else {
newArray.push({text: word, size: 1});
}
});
} else {
newArray.push({text: word, size: 1});
}
});
return newArray;
}

Array.prototype.some expects the given callback to return true or false and returns true as soon as your callback returns true for a given element, otherwise it returns false.
So some iterates over all elements, with your given callback, and your callback checks if the given element text equals the search word and if not adds a new object. Introducing a new element the some function can iterate over.
So to make this clear, for every word thats in the newArray before the word you're searching, you're adding a new object containing your word.
Suppose your newArray looks like this:
[{word:"test"},{word:"another"},{word:"one"},{word:"more"}]
after calling your function for the word even it looks like this:
[{word:"test"},{word:"another"},{word:"one"},{word:"more"},{word:"even"},{word:"even"},{word:"even"},{word:"even"}]
Using Array.prototype.filter would be the better approach here, finding you the matching element, note that I also replaced $.each with Array.prototype.forEach:
function wordFrequency(txt){
var wordArray = txt.split(/[ .?!,*'"]/);
var newArray = [], wordObj;
wordArray.forEach(function (word) {
wordObj = newArray.filter(function (w){
return w.text == word;
});
if (wordObj.length) {
wordObj[0].size += 1;
} else {
newArray.push({text: word, size: 1});
}
});
return newArray;
}
document.write(JSON.stringify(wordFrequency("count everything, count all the words, count all the words!").sort(function(a,b){return a.size<b.size})).split("},").join("}<br/>"));

It would be simpler and far more efficient to create a direct map from word to frequency, and only afterwards convert that to your array structure. Given an array words create a map of the words:
var freq = words.reduce(function(p, c) {
p[c] = (p[c] || 0) + 1;
return p;
}, {});
and the convert that map into your array:
var array = Object.keys(freq).map(function(key) {
return { text: key, size: freq[key] };
});

To tell the frequency all you need is a hash map approach. Your algorithm is quadratic, since the some method is nested in the each method, so you're always looping over the newArray just to find an entry and increment the size.
A map approach is easily achievable using a JavaScript object. It also gives you constant look-up time, which is better performance than the nested loops approach.
Try this approach instead:
function wordFrequency(txt){
var wordArray = txt.split(/[ .?!,*'"]/);
var map = {};
$.each(wordArray, function(ix, word) {
// skip empty results
if (!word.length) {
return;
}
// add word to map
if (!map[word]) {
map[word] = 0;
}
map[word]++;
});
return map;
}
To use the function:
var text = "hello!world*hello foo 'bar'foo";
var result = wordFrequency(text);
// iterate over results
Object.keys(result).forEach(function(w) {
console.log(w + ": " + result[w]);
});
// or use for...in
for (var w in result) {
console.log(w + ": " + result[w]);
}
If you really wanted to, you could then map the result into your desired array format with text and size properties:
var mappedResult = Object.keys(result).map(function(w) {
return { text: w, size: result[w] };
});
console.log(mappedResult);
Also, depending on your target browsers, you might consider using the array forEach instead of the jQuery $.each, similar to what I did with the Object.keys portion.
Here's the JSBin example.

You would probably want to avoid any iterations on duplicate elements and keep your results array unique. Since any of the iterators of Array.prototype will include each of the elements, they might not be the ideal solution for this. Sometimes plain old loops do the job best ...
(You may also want to expressively escape any special characters in your regular expression).
function wordFrequency(txt) {
var words = txt.split(/[ \.\?!,\*'"]+/),
seen = [];
for (var i = 0; i < words.length; i++) {
var w = words[i],
found = false;
for (var j = 0; j < seen.length; j++) {
if (w === seen[j].text) {
seen[j].size++;
found = true;
break;
}
}
if (!found) seen.push( { text: w, size: 1 } );
}
return seen;
}
(Note that the inner for-loop isn't visited for the first word, so the first word will be pushed to the seen-stack and the inner for-loop will start with the second word compared to the first one. Only words that we haven't seen already are added to the seen-stack, making it an array of unique elements.)
And here is the equivalent using Array.prototype.forEach() and Array.prototype.indexOf(), but we have to add another intermediate results stack for the latter one. So we'll have to add another iteration to produce the final result. (We wouldn't have to do this using Array.prototype.findIndex(), but this is not a standard method.)
function wordFrequency2(txt) {
var words = txt.split(/[ \.\?!,\*'"]+/),
seen = [],
freq = [];
// get frequencies
words.forEach(function (w) {
var idx = seen.indexOf(w);
if (idx >= 0) {
freq[idx]++;
}
else {
seen.push(w);
freq.push(1);
}
});
// produce the results array
var r = [];
seen.forEach(function (w, idx) {
r.push( { text: w, size: freq[idx] } );
});
return r;
}
Putting optimization into account, the first version using explicit loops will be probably performing faster ...

var words = (function(){
var sWords = document.body.innerText.toLowerCase().trim().replace(/[,;.]/g,'').split(/[\s\/]+/g).sort();
var iWordsCount = sWords.length; // count w/ duplicates
// array of words to ignore
var ignore = ['and','the','to','a','of','for','as','i','with','it','is','on','that','this','can','in','be','has','if'];
ignore = (function(){
var o = {}; // object prop checking > in array checking
var iCount = ignore.length;
for (var i=0;i<iCount;i++){
o[ignore[i]] = true;
}
return o;
}());
var counts = {}; // object for math
for (var i=0; i<iWordsCount; i++) {
var sWord = sWords[i];
if (!ignore[sWord]) {
counts[sWord] = counts[sWord] || 0;
counts[sWord]++;
}
}
var arr = []; // an array of objects to return
for (sWord in counts) {
arr.push({
text: sWord,
frequency: counts[sWord]
});
}
// sort array by descending frequency | http://stackoverflow.com/a/8837505
return arr.sort(function(a,b){
return (a.frequency > b.frequency) ? -1 : ((a.frequency < b.frequency) ? 1 : 0);
});
}());
(function(){
var iWordsCount = words.length; // count w/o duplicates
for (var i=0; i<iWordsCount; i++) {
var word = words[i];
console.log(word.frequency, word.text);
}
}());

Related

Remove elements from array by type of element

I have an array and I want to remove all the string elements from it.
This is what I have so far. The result is not what I want since it returns only "bicycle"
Also, I am doing this in Test Complete so I need to have a main function that logs the result.
function ex06(){
var mailBox = "mailbox";
var twenty = 20;
var isItRaining = true;
var goat = "";
var stringsArray = ["bicycle", "pocket", 3, mailBox, twenty, isItRaining, goat];
var result = removeStrings();
Log.Message("stringsArray looks like this after the removal of all the string elements: " + result);
function removeStrings(){
var i;
var x
for(i = 0; i < stringsArray.length; i++){
if (typeof(stringsArray[i]) === 'string'){
x = stringsArray.splice(i, 1);
return x;
}
}
}
}
Version 1, with Array#filter
var a = [1, 2, "3", "4", true];
a = a.filter(function (e) {
return typeof e !== 'string';
});
document.write('<pre>' + JSON.stringify(a, 0, 4) + '</pre>');
Version 2, with Array#splice and running backwards.
var a = [1, 2, "3", "4", true],
i = a.length;
while (i--) {
if (typeof a[i] === 'string') {
a.splice(i, 1);
}
}
document.write('<pre>' + JSON.stringify(a, 0, 4) + '</pre>');
The Array.prototype.filter method is what you need:
var stringsArray = ["bicycle", "pocket", 3, mailBox, twenty, isItRaining, goat];
var result = stringsArray.filter(function(element) {
return typeof element !== 'string';
});
you need to reduce the counter variable and check the original array
try this simple example
var a = [1,2,"3", "4", true];
for( var counter = 0; counter < a.length; counter++)
{
if ( (typeof a[ counter ] ) == "string" )
{
a.splice(counter,1); counter--;
}
}
console.log(a); //output [1, 2, true]
try this code:
function ex06(){
var mailBox = "mailbox";
var twenty = 20;
var isItRaining = true;
var goat = "";
var stringsArray = ["bicycle", "pocket", 3, mailBox, twenty, isItRaining, goat];
var result = removeStrings();
Log.Message("stringsArray looks like this after the removal of all the string elements: " + result);
function removeStrings(){
var newarray = [];
var i;
var x
for(i = 0; i < stringsArray.length; i++){
if (typeof(stringsArray[i]) !== 'string'){
newarray.push(stringsArray[i]);
}
}
return newarray
}
}
JavaScript offers native methods to filter arrays, so that you can remove string elements more easily: Array.prototype.filter can make the process a lot easier (and prevents strange behaviours when using splice inside a loop).
function ex06(){
var mailBox = "mailbox";
var twenty = 20;
var isItRaining = true;
var goat = "";
var stringsArray = ["bicycle", "pocket", 3, mailBox, twenty, isItRaining, goat];
var result = removeStrings(stringsArray);
Log.Message("stringsArray looks like this after the removal of all the string elements: " + result);
function removeStrings(arrayWithString){
return arrayWithString.filter(function(item) {
return typeof item !== 'string'; // returns only items which are not strings
});
}
}
A small piece of advice: Pass in the array into your function instead of referencing it from the parent scope. This way you have a pure, reusable function (and no strange side effects you might not want).
I assume this is an exercise, and that's why you're not using Array#filter.
The problem is that you have your return x inside your for loop, so you return the first string you find.
You have at least three options:
Don't return anything, since removeStrings is modifying the original array. That one's easy: Just remove the return x; line.
Don't modify the original array; instead, create and return a new array with the strings left out. In that case, you'd start with x = [] before the loop, remove the splice call, and instead push any non-string onto x.
Modify the original array, and create and return a new array containing the strings you've removed. In that case, you'd remove return x from inside the loop, have x = [] before the loop, and push the entries you remove onto x. Then return x at the end.
In any of the places where you're modifying the original, note gurvinder372's point that when you remove an entry, you need to not increase the index counter, as you'll end up skipping the next entry.
I wouldn't do it the way he suggests, though; when I'm looping through an array modifying it, for isn't what I reach for, I reach for while:
i = 0;
while (i < stringsArray.length) {
if (typeof stringsArray[i] === 'string'){
stringsArray.splice(i, 1);
// We leave `i` alone here, because we need to process
// the new `stringsArray[i]` on the next pass
} else {
// Didn't remove this entry, move past it
++i;
}
}
Side note: typeof isn't a function, it's an operator, there's no need to put its operand in ():if (typeof stringsArray[i] === 'string'){

Get full value from array using partial value

I have an array like this:
var array = ["xs-1", "sm-10", "md-4"];
Now I want to get the number at the end of a particular value. For example I want to search the array for "md-" and see what number is at the end of that string (should return 4).
I can't do array.indexOf("xs-") because that isn't the whole value. Is there a way to do this?
Using a for loop:
var array = ["xs-1", "sm-10", "md-4"];
var search = "md-";
var found = null;
for (var i = 0; i < array.length; i++) {
if (array[i].indexOf(search) === 0) {
found = array[i];
break; // Note: this is assuming only one match exists - or at least you are
// only interested in the first match
}
}
if (found) {
alert(found);
} else {
alert("Not found");
}
Using .filter:
var array = ["xs-1", "sm-10", "md-4"];
var search = "md-";
var filtered = array.filter(function(item) {
return item.indexOf(search) === 0;
});
// note that here filtered will contain all matched elements, so it might be more than
// one match.
alert(filtered);
Building from #János Weisz's suggestion, you can easily transform your array into an object using .reduce:
var array = ["xs-1", "sm-10", "md-4"];
var search = "md";
var obj = array.reduce(function(prev, item) {
var cells = item.split("-");
prev[cells[0]] = cells[1];
return prev;
}, {});
// note: at this point we have an object that looks like this:
// { xs:1, sm:10, md: 4 }
// if we save this object, we can do lookups much faster than looping
// through an array
// now to find "md", we simply do:
alert(obj[search]);
If you need to do multiple look ups from the same source array, then transforming it into an object may be the most efficient approach overall. You pay the initial price of the transformation, but after than lookups are O(1) versus O(n) for each time you have to search your array. Of course, if you only ever need one item, then probably don't bother.
I recommend using objects for this:
var array = [{'type': 'xs', 'value': 1}, {'type' : 'sm', 'value': '10'}, {'type' : 'md', 'value': '4'}];
This way you can search the array as:
function searchMyArrayByType(array, type) {
var items[];
for (var i = 0; i < array.length; i++)
{
if (array[i].type == type) items.push(array[i].value);
}
return items;
}
var valuesWithMd = searchMyArrayByType(array, 'md');
For more information regarding the structure and use of objects, please refer to https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Working_with_Objects
You can create a method that takes the prefix you're looking for, the array, and the split character and returns all the numbers in an array:
function findNumberFromPrefix(prefix, arr, splitChar) {
var values = [];
for (var i = 0; i < arr.length; i++) {
if (arr[i].indexOf(prefix) === 0) {
values.push(arr[i].split(splitChar)[1]);
}
}
return values;
}
And call it:
var array = ["xs-1", "sm-10", "md-4"];
var values = findNumberFromPrefix("md-", array, "-");
console.log(values); //["4"]
Demo: http://jsfiddle.net/rn4h9msh/
A more functional approach and assuming you can have have more than one element with the same prefix:
function findPrefix(array, prefix) {
return array.filter(function (a) { return a.indexOf(prefix) === 0; })
.map(function (e) { return e.slice(prefix.length); })
}
If you have only one matching element, do a loop like this:
var array = ["xs-1", "sm-10", "md-4"];
var needle = "md-";
for(i=0;i<array.length;i++) {
if(array[i].indexOf(needle) == 0)
alert(array[i].substr(needle.length, array[i].length));
}
Demo: http://jsfiddle.net/kg0c43ov/
You can do it like this...
var array = ["xs-1", "sm-10", "md-4"];
getValue("md-");
function getValue(search) {
for(var key in array) {
if(array[key].indexOf(search) > -1) {
alert("Array key is: " + key);
alert("Array value is: " + array[key].replace(search, ""));
}
}
}
JSFiddle here.

javascript array with numeric index without undefineds

suppose I do..
var arr = Array();
var i = 3333;
arr[i] = "something";
if you do a stringify of this array it will return a string with a whole bunch of undefined numeric entries for those entries whose index is less than 3333...
is there a way to make javascript not do this?
I know that I can use an object {} but I would rather not since I want to do array operations such as shift() etc which are not available for objects
If you create an array per the OP, it only has one member with a property name of "333" and a length of 334 because length is always set to be at least one greater than the highest index. e.g.
var a = new Array(1000);
has a length of 1000 and no members,
var a = [];
var a[999] = 'foo';
has a length of 1000 and one member with a property name of "999".
The speedy way to only get defined members is to use for..in:
function myStringifyArray(a) {
var s = [];
var re = /^\d+$/;
for (var p in a) {
if (a.hasOwnProperty(p) && re.test(p)) {
s.push(a[p]);
}
}
return '' + s;
}
Note that the members may be returned out of order. If that is an issue, you can use a for loop instead, but it will be slower for very sparse arrays:
function myStringifyArray(a) {
var s = [];
var re = /^\d+$/;
for (var i=0, iLen=a.length; i<iLen; i++) {
if (a.hasOwnProperty(i)) {
s.push(a[i]);
}
}
return '' + s;
}
In some older browsers, iterating over the array actually created the missing members, but I don't think that's in issue in modern browsers.
Please test the above thoroughly.
The literal representation of an array has to have all the items of the array, otherwise the 3334th item would not end up at index 3333.
You can replace all undefined values in the array with something else that you want to use as empty items:
for (var i = 0; i < arr.length; i++) {
if (typeof arr[i] == 'undefined') arr[i] = '';
}
Another alternative would be to build your own stringify method, that would create assignments instead of an array literal. I.e. instead of a format like this:
[0,undefined,undefined,undefined,4,undefined,6,7,undefined,9]
your method would create something like:
(function(){
var result = [];
result[0] = 0;
result[4] = 4;
result[6] = 6;
result[7] = 7;
result[9] = 9;
return result;
}())
However, a format like that is of course not compatible with JSON, if that is what you need.

Push objects to array so long as object property is not NaN

I am aware that I could use .filter to achieve this, however I am not sure how to implement it.
I have objects as follows within an array
item {
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
}
however, some items in the array have a possible property of NaN.
I need to make sure that only items whose possible property is not NaN, are pushed into the array.
Here is an excerpt of my complete code:
function load(id){
itemPath = lev1.lev2.lev3;
items = [];
for (var i = 0; i<itemPath.length; i++) {
if(itemPath[i].attributes.id==id) {
return itemPath[i].attributes.grades.models.map(function(g) {
items.push(
{
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
});
});
}
}
}
function load(id){
itemPath = lev1.lev2.lev3;
items = [];
for (var i = 0; i<itemPath.length; i++) {
if(itemPath[i].attributes.id==id) {
return itemPath[i].attributes.grades.models.map(function(g) {
if(g.attributes.possible !== g.attributes.possible){
return;
}
items.push(
{
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
});
});
}
}
}
NaN is the only property in javascript that does not equal itself. Just loop over the properties and check them for this, or use the built in NaN() function within the loop as suggested elsewhere.
Update
Since you're only worried about the possible property, just check that one as part of the if statement using === self, or isNaN()
Just change your test line from
if(itemPath[i].attributes.id==id)
to use isNaN function on the properties you want to check
var attr = itemPath[i].attributes;
if (attr.id==id && !isNaN(attr.title) && !isNaN(attr.categoryid) && !isNaN(attr.possible))
You can use the isNaN() and test it before adding it...
function load(id){
itemPath = lev1.lev2.lev3;
items = [];
for (var i = 0; i<itemPath.length; i++) {
if(itemPath[i].attributes.id==id) {
return itemPath[i].attributes.grades.models.map(function(g) {
if( isNaN(g.attributes.title) || isNaN(g.attributes.categoryid) || isNaN(g.attributes.possible) ){
items.push(
{
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
});
}
});
}
}
}
You're code is a little confusing
function load(id){
itemPath = lev1.lev2.lev3;
items = [];
for (var i = 0; i<itemPath.length; i++) {
if(itemPath[i].attributes.id==id) {
return itemPath[i].attributes.grades.models.map(function(g) {
items.push(
{
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
});
});
}
}
}
It doesn't look like you are using map right. Map works like list compresions in the sense that it iterates over a sequence to preform some kind of operation on each element and returns a new sequence
var arr = [1,2,3];
var complexMagic = arr.map( function(n) { return n + 10; } );
// complexMagic === [11,12,13]
FYI, this is how filter, works. Filter takes in a predicate function( aka, Boolean function) to build a new sequence. If the predicate returns true, then the element will be stored in the new sequence.
var arr = [1, 123, 42, 1001, 1100];
var oddNumbers = arr.filter( function(n) {
return 1 === (n & (-n) );
} );
// oddNumbers === [1, 123, 1001] );
// Bit hacks are fun ;P
It looks like you don't need items array or to even push new elements onto it.
function load(id){
itemPath = lev1.lev2.lev3;
items = [];
for (var i = 0; i<itemPath.length; i++) {
if(itemPath[i].attributes.id==id) {
return itemPath[i].attributes.grades.models.map(function(g) {
// You don't have to return anything.
// This might be an ok place for the NaN check.
return ({
title : g.attributes.title,
category : g.attributes.categoryid,
possible: g.attributes.possible
});
});
}
}
}
I'm lazy and didn't testing any of my code so reader beware. Also avoid the push method if possible. It can be a inefficient approach to append new elements onto an array.

Check for a value in Array

I'm trying to check for match in an array with PURE JAVASCRIPT. I don't know how to do this, I would appreciate your help.
var sites = new Array ("site1.com", "site2.com", "site3.com" ...);
// Sites array contains 100 values
var imgs = document.getElementsByTagName("img");
for (var i = 0; i < imgs.length; i++) {
img = imgs[i].src;
// I'm trying to check if is in array,
// and don't waste a lot of size in code
if(img.match(sites)){
notHere(imgs[i]);
}
// This is the working way.
// Even if location is a.site1.com/b/, it will match
if (img.match("site1.com")) {
heReload(imgs[i]);
}
// Repeat this piece of code 100 times
}
}
NOTE: I don't want to check for an exact value. I want to simulate the match() function so if img = "http://a.b.c/d/" and in array is "b.c/", it executes function().
Your "sites" variable should be a regular expression rather than an array:
var sites = /\b(site1\.com|site2\.com|etc|etc)\b/
later:
if (img.match(sites))
......
If for some reason you prefer to keep "sites" in an array, you also can create a regular expression "on the fly":
var sites = ["site1.com", "site2.com", "site3.com"]
var sitesRegexp = new RegExp("\\b(" + sites.join("|").replace(".", "\\.") + ")\\b")
....
if (img.match(sitesRegexp)
......
Good use case for filter.
If you want to have it working on "old" browser :
var nativeFilter = Array.prototype.filter;
_.filter = _.select = function(obj, iterator, context) {
var results = [];
if (obj == null) return results;
if (nativeFilter && obj.filter === nativeFilter) return obj.filter(iterator, context);
each(obj, function(value, index, list) {
if (iterator.call(context, value, index, list)) results[results.length] = value;
});
return results;
};
It will return an empty array if nothing is found, otherwise, return an array containing the result(s)
> [1,2,3,4].filter(function(item) { return item == 4 } );
[ 4 ]
Source : Underscore.js
So now your code will look like this :
var sites = new Array ("site1.com", "site2.com", "site3.com" ...);
// Sites array contains 100 values
var imgs = document.getElementsByTagName( "img" );
for ( var i = 0; i < imgs.length; i++ ) {
var img = imgs[ i ].src;
var result = sites._filter( function( site ) {
return img.match( site )
});
// result is an array of matching sites
}
you can extend the Array prototype , so it supports all browsers ...
try this :
Array.prototype.myContains = function(obj) {
var i = this.length;
while (i--) {if (this[i] .indexOf(obj)>-1) return true; }
return false;
}
usage : var t=myArr.myContains(3);

Categories

Resources