Best way to group elements in an array with least complexity - javascript

I have a JSON array which looks like this:
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9}
{"Type":"Plot","Price":89.8}]
This array contains about 100,000 records. I want the output to be grouped by "Type" and "Price". It should look like this:
var expected_output = [{"Type":"Flat", "Data":[{"Price":100.9, "Total":2},
{"Price":67.5, "Total":1}] },
{"Type":"Room","Data":[{"Price":23.5,"Total":1}]},
{"Type":"Plot","Data":[{"Price":89.8, "Total:1"}]}]
This has to be done in pure javascript and I cannot use libraries like undersore.js. I tried solving the problem but it had like 3 nested for loops which made the complexity as n^4. What could be a better solution for this problem??
The function I have looks like this:
var reduce = function (map_results) {
var results = [];
for (var i in map_results) {
var type_found = 0;
for(var result in results){
if (map_results[i]["Type"] == results[result]["Type"]){
type_found = 1;
var price_found = 0;
for(var data in results[result]["Data"]){
if(map_results[i]["Price"] == results[result]["Data"][data]["Price"]){
price_found = 1;
results[result]["Data"][data]["Total"] +=1;
}
}
if(price_found == 0){
results[result]["Data"].push({"Price":map_results[i]["Price"], "Total":1});
}
}
}
if(type_found == 0){
results.push({"Type":map_results[i]["Type"], "Data":[{"Price":map_results[i]["Price"],"Total":1}]});
}
}
return results;
};

I have a short function that handles the first part of the requested functionality: It maps the map_results to the desired format:
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9},
{"Type":"Plot","Price":89.8}]
var expected_output = map_results.reduce(function(obj, current){
if(!obj[current.Type]){
obj[current.Type] = {'Type':current.Type, 'Data':[]};
}
obj[current.Type].Data.push({'Price':current.Price, 'Total':1});
return obj;
},{})
Then this piece of code is required to calculate the totals, I'm afraid:
for(var type in expected_output){
var d = {};
for(var item in expected_output[type].Data){
d[expected_output[type].Data[item].Price] = (d[expected_output[type].Data[item].Price] || 0) + 1;
}
expected_output[type].Data = [];
for(var i in d){
expected_output[type].Data.push({
'Price':i,
'Total':d[i]
})
}
}
Output:
{
"Flat":{
"Type":"Flat",
"Data":[{"Price":"100.9","Total":2},
{"Price":"67.5","Total":1}]
},
"Room":{
"Type":"Room",
"Data":[{"Price":"23.5","Total":1}]
},
"Plot":{
"Type":"Plot",
"Data":[{"Price":"89.8","Total":1}]
}
}

As the Types and the Prices are unique after grouping I think a structure like {"Flat": {"100.9":2,"67.5":1}, {"Room": {"23.5": 1}}} would be easier to handle. So could do the grouping the following way:
var output = {};
map_results.map(function(el, i) {
output[el["Type"]] = output[el["Type"]] || [];
output[el["Type"]][el["Price"] = (output[el["Type"]][el["Price"]+1) || 1;
});
If you can not handle this structure you could do another mapping to your structure.
As you are iterating the Array one time this should have a complexity of n.
Look here for a working fiddle.
EDIT: So remap everything to your structure. The order of the remapping is far less then the first mapping, because the grouping is already done.
var expected_output = [];
for(type in output) {
var prices = [];
for(price in output[type]) {
prices.push({"Price": price, "Total": output[type][price]);
}
expected_output.push({"Type": type, "Data": prices});
}

Below is yet another effort. Here's a FIDDLE
For performance testing, I also mocked up a JSPerf test with 163840 elements. On Chrome(OSX) original solution is 90% slower than this one.
Few notes:
Feel free to optimize for your case (e.g. take out the hasOwnProperty check on object cloning).
Also, if you need the latest Total as the first element use unshift instead of push to add the obj the beginning of the array.
function groupBy(arr, key, key2) {
var retArr = [];
arr.reduce(function(previousValue, currentValue, index, array){
if(currentValue.hasOwnProperty(key)) {
var kVal = currentValue[key];
if(!previousValue.hasOwnProperty(kVal)) {
previousValue[kVal] = {};
retArr.push(previousValue[kVal]);
previousValue[kVal][key] = kVal;
previousValue[kVal]["Data"] = [];
}
var prevNode = previousValue[kVal];
if(currentValue.hasOwnProperty(key2)) {
var obj = {};
for(var k in currentValue) {
if(currentValue.hasOwnProperty(k) && k!=key)
obj[k] = currentValue[k];
}
obj["Total"] = prevNode["Data"].length + 1;
prevNode["Data"].push(obj);
}
}
return previousValue;
}, {});
return retArr;
}
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9},
{"Type":"Plot","Price":89.8}];
var expected_output = groupBy(map_results, "Type", "Price");
console.dir(expected_output);

Tried something like this:
var reduce_func = function (previous, current) {
if(previous.length == 0){
previous.push({Type: current.Type, Data:[{Price:current.Price,Total:1}]});
return previous;
}
var type_found = 0;
for (var one in previous) {
if (current.Type == previous[one].Type){
type_found = 1;
var price_found = 0;
for(var data in previous[one].Data){
if(current.Price == previous[one].Data[data].Price){
price_found = 1;
previous[one].Data[data].Total += 1;
}
}
if(price_found == 0){
previous[one].Data.push({Price:current.Price, Total:1});
}
}
}
if(type_found == 0){
previous.push({Type:current.Type, Data:[{Price : current.Price ,Total:1}]});
}
return previous;
}
map_results.reduce(reduce_func,[]);

Related

Push different object in an array with a for loop

I have an element structured like this:
Element ->
[{values: arrayOfObject, key:'name1'}, ... ,{values: arrayOfObjectN, key:'nameN'}]
arrayDiObject -> [Object1, Object2, ... , ObjectN] //N = number of lines in my CSV
Object1 -> {x,y}
I have to take data from a big string:
cityX#substanceX#cityY#substanceY#
I thought to make it this way, but it seems like it pushes always in the same array of objects. If I put oggetto = {values: arrayDateValue, key: key}; inside the d3.csv function, instead if I put outside the function it add me only empty objects.
Here is my code:
var final = new Array();
var oggetto;
var key;
function creaDati() {
var newdate;
var arrayDateValue = new Array();
var selString = aggiungiElemento().split("#");
//selString is an array with selString[0]: city, selString[1]: substance and so on..
var citySelected = "";
var substanceSelected = "";
for (var i = 0; i < selString.length - 1; i++) {
if (i % 2 === 0) {
citySelected = selString[i];
} else if (i % 2 !== 0) {
substanceSelected = selString[i];
key = citySelected + "#" + substanceSelected;
d3.csv("/CSV/" + citySelected + ".csv", function(error, dataset) {
dataset.forEach(function(d) {
arrayDateValue.push({
x: d.newdate,
y: d[substanceSelected]
});
});
});
oggetto = {
values: arrayDateValue,
key: key
};
arrayDateValue = [];
final.push(oggetto);
}
}
}
Any idea ?
First you should make the if statement for the city and then for the key, which you seem to be doing wrong since you want the pair indexes to be the keys and the not pair to be the city, and you are doing the opposite. And then you need to have the d3.csv and push the objects outside of the if statement, otherwise in your case you are just adding elements with citySelected="".
Try something like :
for(var i = 0; i < selString.length -1; i+=2){
cittySelected = selString[i];
substanceSelected = selString[i+1];
key = citySelected + "#" + substanceSelected;
d3.csv("/CSV/"+citySelected+".csv", function(error, dataset){
dataset.forEach(function(d){
arrayDateValue.push({x: d.newdate, y: d[substanceSelected]});
});
});
oggetto = {values: arrayDateValue, key: key};
arrayDateValue = [];
final.push(oggetto);
}
It's is not the best way to do it, but it is clearer that what you are following, i think.
In the if(i % 2 == 0) { citySelected = ... } and else if(i % 2 !== 0) { substanceSelected = ... } citySelected and substanceSelected will never come together.
The values should be in one statement:
if(...) { citySelected = ...; substanceSelected = ...; }
The string can be splitted into pairs
city1#substance1, city2#substance2, ...
with a regex (\w{1,}#\w{1,}#).
Empty the arrayDateValue after the if-statement.
Hint:
var str = "cityX#substanceX#cityY#substanceY#";
function createArr(str) {
var obj = {};
var result = [];
var key = "";
// '', cityX#substanceX, '', cityYsubstanceY
var pairs = str.split(/(\w{1,}#\w{1,}#)/g);
for (var i = 0; i < pairs.length; i++) {
if(i % 2 !== 0) {
key = pairs[i];
// d3 stuff to create values
obj = {
// Values created with d3 placeholder
values: [{x: "x", y: "y"}],
// Pair
key: key
};
result.push(obj);
}
// Here should be values = [];
}
return result;
}
var r = createArr(str);
console.log(r);
May be you can do like this;
var str = "cityX#substanceX#cityY#substanceY",
arr = str.split("#").reduce((p,c,i,a) => i%2 === 0 ? p.concat({city:c, key:a[i+1]}) : p,[]);
console.log(JSON.stringify(arr));
RESOLVED-
The problem is about d3.csv which is a asynchronous function, it add in the array when it finish to run all the other code.
I make an XMLHttpRequest for each csv file and it works.
Hope it helps.

Recursion: How can I remove

here's my code:
var asset = ['1234_12', '1234_34', '1234_33', '4321_22', '4321_90'];
var largest = removeElements(asset);
function removeElements(asset) {
var retVal = [];
for (i = 0; i < asset.length; i++) {
for (var j = 0; j < asset.length; j++) {
if (asset[i].split('_')[0] == asset[j].split('_')[0]) {
if (asset[i].split('_')[1].split('.')[0] > asset[j].split('_')[1].split('.')[0]) {
retVal = removeElements(asset, asset[j]);
for (var k = 0; k < retVal.length; k++) {
for (var l = 0; l < retVal.length; l++) {
if (retVal[k].split('_')[0] == retVal[l].split('_')[0]) {
removeElements(retVal);
} else {
return retVal;
}
}
}
}
}
}
}
return retVal;
}
Here's the structure of array:
var asset = ['1234_12', '1234_34', '1234_33', '4321_22', '4321_90'];
What I want is to get largest in '1234' or '4321' series. For example, in this case, I need to grab '1234_34' and '4321_90'.
RangeError: Maximum call stack size exceeded
What am I doing wrong?
You are making it harder than it is for yourself. You can just iterate over each item and store the matched values in an object:
var asset = ['1234_12', '1234_34', '1234_33', '4321_22', '4321_90'];
var intermediate = {};
asset.forEach(function(v) {
var parts = v.split('_');
var key = parts[0];
var val = parts[1];
if (!intermediate[key] || intermediate[key] < val) {
intermediate[key] = val;
}
});
This will produce an object like:
{"1234": "34", "4321": "90"}
Which you can then be turned into the expected array:
var output = Object.keys(intermediate).map(function(key) {
return key + '_' + intermediate[key];
});
console.log(output); // ["1234_34", "4321_90"]
Take a look at .forEach, .map and Object.keys
Here's an example of something that will grab those values (see the jsbin):
var asset = ['1234_12', '1234_34', '1234_33', '4321_22', '4321_90'];
var ids = _.values(_.mapValues(asset.reduce(function(agg, curr) {
var parts = curr.split('_');
agg[parts[0]] = agg[parts[0]] || [];
agg[parts[0]].push(parts[1]);
return agg;
}, {}), function(value, key) {
return [key, Math.max.apply(Math, value)].join('_');
}));
console.log(ids); // => ["1234_34", "4321_90"]
It uses lodash for convenience, but the principles are the same without it.
First you split each string into a key-value pair of the prefix and suffix (so 1234_12 and 1234_34, etc., becomes like { 1234: ['12', '34'] }). Then you just find the max value in that array and join it back with its key.

How to do I unshift/shift single value and multiple values using custom methods?

I have prototypes to recreate how array methods work, pop/push/shift/etc, and I would like to extend the functionality to do the following:
Push/Pop/shift/unshift multiple values
array.push(0);
array.push(1);
array.push(2);
expect(array.pop()).to.be(2);
expect(array.pop()).to.be(1);
expect(array.pop()).to.be(0);
Push/Pop/unshift/etc single values
array.push(0);
array.push(1);
expect([0,1]);
array.pop(1);
expect([0]);
My assumption is that I would need a global array variable to store the elements. Is that the right?
Here is my code:
var mainArray = []; // array no longer destroyed after fn() runs
function YourArray(value) {
this.arr = mainArray; // looks to global for elements | function?
this.index = 0;
var l = mainArray.length;
if(this.arr === 'undefined')
mainArray += value; // add value if array is empty
else
for(var i = 0; i < l ; i++) // check array length
mainArray += mainArray[i] = value; // create array index & val
return this.arr;
}
YourArray.prototype.push = function( value ) {
this.arr[ this.index++ ] = value;
return this;
};
YourArray.prototype.pop = function( value ) {
this.arr[ this.index-- ] = value;
return this;
};
var arr = new YourArray();
arr.push(2);
console.log(mainArray);
My assumption is that I would need a global array variable to store
the elements. Is that the right?
No. That is not right.
You want each array object to have its own, independent set of data. Otherwise, how can you have multiple arrays in your program?
function YourArray(value) {
this.arr = []; // This is the data belonging to this instance.
this.index = 0;
if(typeof(value) != 'undefined')) {
this.arr = [value];
this.index = 1;
}
}
////////////////////////////////////
// Add prototype methods here
///////////////////////////////////
var array1 = new YourArray();
var array2 = new YourArray();
array1.push(2);
array1.push(4);
array2.push(3);
array2.push(9);
// Demonstrate that the values of one array
// are unaffected by the values of a different array
expect(array1.pop()).to.be(4);
expect(array2.pop()).to.be(9);
It's a bit late for this party, admitted but it nagged me. Is there no easy (for some larger values of "easy") way to do it in one global array?
The standard array functions work as in the following rough(!) sketch:
function AnotherArray() {
this.arr = [];
// points to end of array
this.index = 0;
if(arguments.length > 0) {
for(var i=0;i<arguments.length;i++){
// adapt if you want deep copies of objects
// and/or take a given array's elements as
// individual elements
this.arr[i] = arguments[i];
this.index++;
}
}
}
AnotherArray.prototype.push = function() {
// checks and balances ommitted
for(var i=0;i<arguments.length;i++){
this.arr[ this.index++ ] = arguments[i];
}
return this;
};
AnotherArray.prototype.pop = function() {
this.index--;
return this;
};
AnotherArray.prototype.unshift = function() {
// checks and balances ommitted
var tmp = [];
var alen = arguments.length;
for(var i=0;i<this.index;i++){
tmp[i] = this.arr[i];
}
for(var i=0;i<alen;i++){
this.arr[i] = arguments[i];
this.index++;
}
for(var i=0;i<tmp.length + alen;i++){
this.arr[i + alen] = tmp[i];
}
return this;
};
AnotherArray.prototype.shift = function() {
var tmp = [];
for(var i=1;i<this.index;i++){
tmp[i - 1] = this.arr[i];
}
this.arr = tmp;
this.index--;
return this;
};
AnotherArray.prototype.isAnotherArray = function() {
return true;
}
AnotherArray.prototype.clear = function() {
this.arr = [];
this.index = 0;
}
AnotherArray.prototype.fill = function(value,length) {
var len = 0;
if(arguments.length > 1)
len = length;
for(var i=0;i<this.index + len;i++){
this.arr[i] = value;
}
if(len != 0)
this.index += len;
return this;
}
// to simplify this example
AnotherArray.prototype.toString = function() {
var delimiter = arguments.length > 0 ? arguments[0] : ",";
var output = "";
for(var i=0;i<this.index;i++){
output += this.arr[i];
if(i < this.index - 1)
output += delimiter;
}
return output;
}
var yaa = new AnotherArray(1,2,3);
yaa.toString(); // 1,2,3
yaa.push(4,5,6).toString(); // 1,2,3,4,5,6
yaa.pop().toString(); // 1,2,3,4,5
yaa.unshift(-1,0).toString(); // -1,0,1,2,3,4,5
yaa.shift().toString(); // 0,1,2,3,4,5
var yaa2 = new AnotherArray();
yaa2.fill(1,10).toString(); // 1,1,1,1,1,1,1,1,1,1
Quite simple and forward and took only about 20 minutes to write (yes, I'm a slow typist). I would exchange the native JavaScript array in this.arr with a double-linked list if the content can be arbitrary JavaScript objects which would make shift and unshift a bit less memory hungry but that is obviously more complex and slower, too.
But to the main problem, the global array. If we want to use several individual chunks of the same array we need to have information about the starts and ends of the individual parts. Example:
var globalArray = [];
var globalIndex = [[0,0]];
function YetAnotherArry(){
// starts at the end of the last one
this.start = globalIndex[globalIndex.length-1][1];
this.index = this.start;
// position of the information in the global index
this.pos = globalIndex.length;
globalIndex[globalIndex.length] = [this.start,this.index];
}
So far, so well. We can handle the first array without any problems. We can even make a second one but the moment the first one wants to expand its array we get in trouble: there is no space for that. The start of the second array is the end of the first one, without any gap.
One simple solution is to use an array of arrays
globalArray = [
["first subarray"],
["second subarray"],
...
];
We can than reuse what we already wrote in that case
var globalArray = [];
function YetAnotherArray(){
// open a new array
globalArray[globalArray.length] = [];
// point to that array
this.arr = globalArray[globalArray.length - 1];
this.index = 0;
}
YetAnotherArray.prototype.push = function() {
for(var i=0;i<arguments.length;i++){
this.arr[ this.index++ ] = arguments[i];
}
return this;
};
// and so on
But for every new YetAnotherArray you add another array to the global array pool and every array you abandon is still there and uses memory. You need to manage your arrays and delete every YetAnotherArray you don't need anymore and you have to delete it fully to allow the GC to do its thing.
That will leave nothing but gaps in the global array. You can leave it as it is but if you want to use and delete thousands you are left with a very sparse global array at the end. Or you can clean up. Problem:
var globalArray = [];
function YetAnotherArray(){
// add a new subarray to the end of the global array
globalArray[globalArray.length] = [];
this.arr = globalArray[globalArray.length - 1];
this.index = 0;
this.pos = globalArray.length - 1;
}
YetAnotherArray.prototype.push = function() {
for(var i=0;i<arguments.length;i++){
this.arr[ this.index++ ] = arguments[i];
}
return this;
};
YetAnotherArray.prototype.toString = function() {
var delimiter = arguments.length > 0 ? arguments[0] : ",";
var output = "";
for(var i=0;i<this.index;i++){
output += this.arr[i];
if(i < this.index - 1)
output += delimiter;
}
return output;
}
// we need a method to delete an instance
YetAnotherArray.prototype.clear = function() {
globalArray[this.pos] = null;
this.arr = null;
this.index = null;
};
YetAnotherArray.delete = function(arr){
arr.clear();
delete(arr);
};
// probably won't work, just a hint in case of asynch. use
var mutex = false;
YetAnotherArray.gc = function() {
var glen, indexof, next_index, sub_len;
indexof = function(arr,start){
for(var i = start;i<arr.length;i++){
if (arr[i] == null || arr[i] == undefined)
return i;
}
return -1;
};
mutex = true;
glen = globalArray.length;
sublen = 0;
for(var i = 0;i<glen;i++){
if(globalArray[i] == null || globalArray[i] == undefined){
next_index = indexof(globalArray,i);
if(next_index == -1){
break;
}
else {
globalArray[i] = globalArray[next_index + 1];
globalArray[next_index + 1] = null;
sublen++;
}
}
}
globalArray.length -= sublen - 1;
mutex = false;
};
var yaa_1 = new YetAnotherArray();
var yaa_2 = new YetAnotherArray();
var yaa_3 = new YetAnotherArray();
var yaa_4 = new YetAnotherArray();
yaa_1.push(1,2,3,4,5,6,7,8,9).toString(); // 1,2,3,4,5,6,7,8,9
yaa_2.push(11,12,13,14,15,16).toString(); // 11,12,13,14,15,16
yaa_3.push(21,22,23,24,25,26,27,28,29).toString();// 21,22,23,24,25,26,27,28,29
yaa_4.push(311,312,313,314,315,316).toString(); // 311,312,313,314,315,316
globalArray.join("\n");
/*
1,2,3,4,5,6,7,8,9
11,12,13,14,15,16
21,22,23,24,25,26,27,28,29
311,312,313,314,315,316
*/
YetAnotherArray.delete(yaa_2);
globalArray.join("\n");
/*
1,2,3,4,5,6,7,8,9
21,22,23,24,25,26,27,28,29
311,312,313,314,315,316
*/
YetAnotherArray.gc();
globalArray.join("\n");
/*
1,2,3,4,5,6,7,8,9
21,22,23,24,25,26,27,28,29
311,312,313,314,315,316
*/
But, as you might have guessed already: it doesn't work.
YetAnotherArray.delete(yaa_3); // yaa_3 was 21,22,23,24,25,26,27,28,29
globalArray.join("\n");
/*
1,2,3,4,5,6,7,8,9
21,22,23,24,25,26,27,28,29
*/
We would need another array to keep all positions. Actual implementation as an exercise for the reader but if you want to implement a JavaScript like array, that is for arbitrary content you really, really, really should use a doubly-linked list. Or a b-tree. A b+-tree maybe?
Oh, btw: yes, you can do it quite easily with a {key:value} object, but that would have squeezed all the fun out of the job, wouldn't it? ;-)

Javascript: Improve four nested loops?

I have a complex array of objects with nested arrays. The following works to extract certain objects, but it's one of the ugliest things I've written.
Is there some javascript dark magic to do this elegantly?
function getEighthInsertionBlocks() {
var struct = Lifestyle.Pagination.structure;
var blocks = [];
for (var i = 0; i<struct.length; i++) {
var page = struct[i];
var layers = page.children;
for (var j=0; j<layers.length; j++) {
var layer = layers[j];
if (layer.className === 'EighthPageLayer' ) {
var rows = layer.children;
for (var k=0; k<rows.length; k++) {
var row = rows[k];
eBlocks = row.children;
for (var l=0; l<eBlocks.length; l++) {
blocks.push(eBlocks[l]);
}
}
}
}
}
return blocks;
}
Not that I'm a big fan of code golf, but ... this is horrible.
You could write a generic iterator, which would reduce the code into sequential blocks:
var iterator = function(collection, callback){
var length = collection.length;
var results = [];
var result;
for (var i = 0; i < collection.length; i++){
result = callback(colleciton[i], i);
if (result){
results = results.concat(result);
}
}
return results;
};
function getEighthInsertionBlocks() {
var struct = Lifestyle.Pagination.structure;
var layers = iterator(struct, function(page){ return page.children; });
var rows = iterator(layers, function(layer){
return layer.className === 'EighthPageLayer' ? layer.children : null;
});
return iterator(rows, function(eBlocks, index){ return eblocks[index]; });
}
I usually tend to like using forEach for the readability but this is subjective.
function isEighthPageLayer(layer){
return layer.className === "EighthPageLayer"
}
function getEighthInsertionBlocks(struct) {
var blocks = [];
struct.forEach(function(page){
page.layers
.filter(isEighthPageLayer)
.forEach( function(layer) {
layer.children.forEach(function(row){
row.children.forEach(function(eBlocks){
blocks.push(eBlocks);
});
});
});
});
});
return blocks;
}
This is an interesting challenge. To avoid deep nesting, you need a generic iterator that you can use recursively, yet there are a few special cases in your iteration. So, I tried to create a generic iterator that you can pass an options object to in order to specify the special conditions. Here's what I came up with. Since I don't have a sample data set, this is untested, but hopefully you see the idea:
function iterateLevel(data, options, level, output) {
console.log("level:" + level);
console.log(data);
var fn = options[level] && options[level].fn;
for (var i = 0; i < data.length; i++) {
if (!fn || (fn(data[i]) === true)) {
if (level === options.endLevel) {
output.push(data[i]);
} else {
iterateLevel(data[i].children, options, level + 1, output);
}
}
}
}
var iterateOptions = {
"1": {
fn: function(arg) {return arg.className === 'EighthPageLayer'}
},
"endLevel": 3
}
var blocks = [];
iterateLevel(Lifestyle.Pagination.structure, iterateOptions, 0, blocks);
The idea is that the options object can have an optional filter function for each level and it tells you how many levels to go down.
Working demo: http://jsfiddle.net/jfriend00/aQs6h/

fastest way to detect if duplicate entry exists in javascript array?

var arr = ['test0','test2','test0'];
Like the above,there are two identical entries with value "test0",how to check it most efficiently?
If you sort the array, the duplicates are next to each other so that they are easy to find:
arr.sort();
var last = arr[0];
for (var i=1; i<arr.length; i++) {
if (arr[i] == last) alert('Duplicate : '+last);
last = arr[i];
}
This will do the job on any array and is probably about as optimized as possible for handling the general case (finding a duplicate in any possible array). For more specific cases (e.g. arrays containing only strings) you could do better than this.
function hasDuplicate(arr) {
var i = arr.length, j, val;
while (i--) {
val = arr[i];
j = i;
while (j--) {
if (arr[j] === val) {
return true;
}
}
}
return false;
}
There are lots of answers here but not all of them "feel" nice... So I'll throw my hat in.
If you are using lodash:
function containsDuplicates(array) {
return _.uniq(array).length !== array.length;
}
If you can use ES6 Sets, it simply becomes:
function containsDuplicates(array) {
return array.length !== new Set(array).size
}
With vanilla javascript:
function containsDuplicates(array) {
return array
.sort()
.some(function (item, i, items) {
return item === items[i + 1]
})
}
However, sometimes you may want to check if the items are duplicated on a certain field.
This is how I'd handle that:
containsDuplicates([{country: 'AU'}, {country: 'UK'}, {country: 'AU'}], 'country')
function containsDuplicates(array, attribute) {
return array
.map(function (item) { return item[attribute] })
.sort()
.some(function (item, i, items) {
return item === items[i + 1]
})
}
Loop stops when found first duplicate:
function has_duplicates(arr) {
var x = {}, len = arr.length;
for (var i = 0; i < len; i++) {
if (x[arr[i]]) {
return true;
}
x[arr[i]] = true;
}
return false;
}
Edit (fix 'toString' issue):
function has_duplicates(arr) {
var x = {}, len = arr.length;
for (var i = 0; i < len; i++) {
if (x[arr[i]] === true) {
return true;
}
x[arr[i]] = true;
}
return false;
}
this will correct for case has_duplicates(['toString']); etc..
var index = myArray.indexOf(strElement);
if (index < 0) {
myArray.push(strElement);
console.log("Added Into Array" + strElement);
} else {
console.log("Already Exists at " + index);
}
You can convert the array to to a Set instance, then convert to an array and check if the length is same before and after the conversion.
const hasDuplicates = (array) => {
const arr = ['test0','test2','test0'];
const uniqueItems = new Set(array);
return array.length !== uniqueItems.size();
};
console.log(`Has duplicates : ${hasDuplicates(['test0','test2','test0'])}`);
console.log(`Has duplicates : ${hasDuplicates(['test0','test2','test3'])}`);
Sorting is O(n log n) and not O(n). Building a hash map is O(n). It costs more memory than an in-place sort but you asked for the "fastest." (I'm positive this can be optimized but it is optimal up to a constant factor.)
function hasDuplicate(arr) {
var hash = {};
var hasDuplicate = false;
arr.forEach(function(val) {
if (hash[val]) {
hasDuplicate = true;
return;
}
hash[val] = true;
});
return hasDuplicate;
}
It depends on the input array size. I've done some performance tests with Node.js performance hooks and found out that for really small arrays (1,000 to 10,000 entries) Set solution might be faster. But if your array is bigger (like 100,000 elements) plain Object (i. e. hash) solution becomes faster. Here's the code so you can try it out for yourself:
const { performance } = require('perf_hooks');
function objectSolution(nums) {
let testObj = {};
for (var i = 0; i < nums.length; i++) {
let aNum = nums[i];
if (testObj[aNum]) {
return true;
} else {
testObj[aNum] = true;
}
}
return false;
}
function setSolution(nums) {
let testSet = new Set(nums);
return testSet.size !== nums.length;
}
function sortSomeSolution(nums) {
return nums
.sort()
.some(function (item, i, items) {
return item === items[i + 1]
})
}
function runTest(testFunction, testArray) {
console.log(' Running test:', testFunction.name);
let start = performance.now();
let result = testFunction(testArray);
let end = performance.now();
console.log(' Duration:', end - start, 'ms');
}
let arr = [];
let setSize = 100000;
for (var i = 0; i < setSize; i++) {
arr.push(i);
}
console.log('Set size:', setSize);
runTest(objectSolution, arr);
runTest(setSolution, arr);
runTest(sortSomeSolution, arr);
On my Lenovo IdeaPad with i3-8130U Node.js v. 16.6.2 gives me following results for the array of 1,000:
results for the array of 100,000:
Assuming all you want is to detect how many duplicates of 'test0' are in the array. I guess an easy way to do that is to use the join method to transform the array in a string, and then use the match method.
var arr= ['test0','test2','test0'];
var str = arr.join();
console.log(str) //"test0,test2,test0"
var duplicates = str.match(/test0/g);
var duplicateNumber = duplicates.length;
console.log(duplicateNumber); //2

Categories

Resources