How to remove repeating entries in a massive array (javascript) - javascript

I'm trying to graph a huge data set (about 1.6 million points) using Kendo UI. This number is too large, but I have figured out that many of these points are repeating. The data is currently stored in this format:
[ [x,y], [x,y], [x,y]...]
with each x and y being a number, thus each subarray is a point.
The approach I have in mind is to create a second empty array, and then loop through the very long original array, and only push each point to the new one if it isn't already found there.
I tried to use jQuery.inArray(), but it does not seem to work with the 2D array I have here.
I currently try this:
var datMinified = [];
for( z = 2; z < dat1.length; z++) //I start at 2 because the first 2 elements are strings, disregard this
{
if( !(testContains(datMinified, dat1[z])) )
{
datMinified.push(dat1[z])
}
}
with the helper functions defined as:
function testContains(arr, val)
{
for(i=0;i<arr.length;i++)
{
if( arraysEqual( arr[i], val) )
{
return true;
}
}
return false;
}
and:
function arraysEqual(arr1, arr2)
{
if(! (arr1.length == arr2.length))
{
return false;
}
for( i = 0; i < arr1.length; i++ )
{
if( !(arr1[i] == arr2[i]))
{
return false;
}
}
return true;
}
When I run this script, even with a smaller array of length 6 thousand it still gets stuck up.
Maybe jQuery is a good solution?
Edit: I was also thinking that there might be some way to tell the browser to not time out and just sit and work through the data?

You have a non-trivial problem but I'm gonna blast right through so ask questions if I lose you somewhere along the line. This solution does not cast the coordinate into a String or serialise it using other techniques like JSON.stringify -
Start with a way to create coordinates -
const Coord = (x, y) =>
[ x, y ]
To demonstrate the solution, I need to construct many random coordinates -
const rand = x =>
Math.floor(Math.random() * x)
const randCoord = x =>
Coord(rand(x), rand(x))
console.log(randCoord(1e3))
// [ 655, 89 ]
Now we make an array of 1 million random coordinates -
const million =
Array.from(Array(1e6), _ => randCoord(1e3))
Now we make a function to filter all of the unique values using DeepMap, a tiny module I developed in this answer.
const uniq = (coords = []) =>
{ const m = new Map
const r = []
for (const c of coords)
if (!DeepMap.has(m, c))
{ DeepMap.set(m, c, true)
r.push(c)
}
return r
}
Because for and DeepMap have excellent performance, uniq can identify all of the unique values in less than one second -
console.time("uniq")
const result = uniq(million)
console.timeEnd("uniq")
console.log("uniq length:", result.length)
console.log("sample:", result.slice(0,10))
// uniq: 535 ms
// uniq length: 631970
// sample:
// [ [ 908, 719 ]
// , [ 532, 967 ]
// , [ 228, 689 ]
// , [ 942, 546 ]
// , [ 716, 180 ]
// , [ 456, 427 ]
// , [ 714, 79 ]
// , [ 315, 480 ]
// , [ 985, 499 ]
// , [ 212, 407 ]
// ]
Expand the snippet below to verify the results in your own browser -
const DeepMap =
{ has: (map, [ k, ...ks ]) =>
ks.length === 0
? map.has(k)
: map.has(k)
? DeepMap.has(map.get(k), ks)
: false
, set: (map, [ k, ...ks ], value) =>
ks.length === 0
? map.set(k, value)
: map.has(k)
? (DeepMap.set(map.get(k), ks, value), map)
: map.set(k, DeepMap.set(new Map, ks, value))
}
const Coord = (x, y) =>
[ x, y ]
const rand = x =>
Math.floor(Math.random() * x)
const randCoord = x =>
Coord(rand(x), rand(x))
const million =
Array.from(Array(1e6), _ => randCoord(1e3))
const uniq = (coords = []) =>
{ const m = new Map
const r = []
for (const c of coords)
if (!DeepMap.has(m, c))
{ DeepMap.set(m, c, true)
r.push(c)
}
return r
}
console.time("uniq")
const result = uniq(million)
console.timeEnd("uniq")
console.log("uniq length:", result.length)
console.log("sample:", result.slice(0,10))
// uniq: 535 ms
// uniq length: 631970
// sample:
// [ [ 908, 719 ]
// , [ 532, 967 ]
// , [ 228, 689 ]
// , [ 942, 546 ]
// , [ 716, 180 ]
// , [ 456, 427 ]
// , [ 714, 79 ]
// , [ 315, 480 ]
// , [ 985, 499 ]
// , [ 212, 407 ]
// ]
By using generating smaller random coordinates, we can verify that uniq is generating a correct output. Below we generate coordinates up to [ 100, 100 ] for a maximum possibility of 10,000 unique coordinates. When you run the program below, because the coordinates are generated at random, it's possible that result.length will be under 10,000, but it should never exceed it - in which case we'd know an invalid (duplicate) coordinate was added -
const million =
Array.from(Array(1e6), _ => randCoord(1e2))
console.time("uniq")
const result = uniq(million)
console.timeEnd("uniq")
console.log("uniq length:", result.length)
console.log("sample:", result.slice(0,10))
// uniq: 173 ms
// uniq length: 10000
// sample:
// [ [ 50, 60 ]
// , [ 18, 69 ]
// , [ 87, 10 ]
// , [ 8, 7 ]
// , [ 91, 41 ]
// , [ 48, 47 ]
// , [ 78, 28 ]
// , [ 39, 12 ]
// , [ 18, 84 ]
// , [ 0, 71 ]
// ]
Expand the snippet below to verify the results in your own browser -
const DeepMap =
{ has: (map, [ k, ...ks ]) =>
ks.length === 0
? map.has(k)
: map.has(k)
? DeepMap.has(map.get(k), ks)
: false
, set: (map, [ k, ...ks ], value) =>
ks.length === 0
? map.set(k, value)
: map.has(k)
? (DeepMap.set(map.get(k), ks, value), map)
: map.set(k, DeepMap.set(new Map, ks, value))
}
const Coord = (x, y) =>
[ x, y ]
const rand = x =>
Math.floor(Math.random() * x)
const randCoord = x =>
Coord(rand(x), rand(x))
const uniq = (coords = []) =>
{ const m = new Map
const r = []
for (const c of coords)
if (!DeepMap.has(m, c))
{ DeepMap.set(m, c, true)
r.push(c)
}
return r
}
const million =
Array.from(Array(1e6), _ => randCoord(1e2))
console.time("uniq")
const result = uniq(million)
console.timeEnd("uniq")
console.log("uniq length:", result.length)
console.log("sample:", result.slice(0,10))
// uniq: 173 ms
// uniq length: 10000
// sample:
// [ [ 50, 60 ]
// , [ 18, 69 ]
// , [ 87, 10 ]
// , [ 8, 7 ]
// , [ 91, 41 ]
// , [ 48, 47 ]
// , [ 78, 28 ]
// , [ 39, 12 ]
// , [ 18, 84 ]
// , [ 0, 71 ]
// ]
Lastly, I'll include the DeepMap module used here -
const DeepMap =
{ has: (map, [ k, ...ks ]) =>
ks.length === 0
? map.has(k)
: map.has(k)
? DeepMap.has(map.get(k), ks)
: false
, set: (map, [ k, ...ks ], value) =>
ks.length === 0
? map.set(k, value)
: map.has(k)
? (DeepMap.set(map.get(k), ks, value), map)
: map.set(k, DeepMap.set(new Map, ks, value))
, get: (map, [ k, ...ks ]) =>
// ...
, entries: function* (map, fields = [])
// ...
}
For a complete implementation, see the linked Q&A. Fwiw, I do think you will find the link interesting as it provides more context for the complexity of this problem.

You could try something like this. Probably would be helpful to do some benchmarking, or consider doing is server side. That is a lot of data, and you probably are going to see most browser hang:
points = ["test", "string", [1,1], [1,2],[1,3],[1,4],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7],[1,8],[2,1],[2,1],[2,2],[1,1],[1,1],[1,1],[1,1],[1,1]];
t={};
unique = points.filter(e=>!(t[e]=e in t));
console.log(unique);

UPDATE
In short: You can use a Set to automatically create a collection of unique values (which is what differentiates Set from Map), if these values are in a suitable (e.g. comparable) format:
let collection = new Set(data.map((point) => point.toString()));
collection = [...collection].map((val) => val.split(','));
these two lines are enough to filter your 1 million + array to unique values in about 1 second. For a lengthier explanation, see the third example =)...
Original Answer
jQuery is mainly for DOM manipulation and helping with (older) browser quirks, not for dealing with big data! So, no, I would not recommend that, plus it will slow down your processing even more...question is, can you use modern JS (e.g. generator functions) in your app or does it have to work in older browsers as well?
I'm not sure how this will go performance wise with 1+ million entries, but let me know how this works (where data is your datMinified of course):
const data = [
'string',
'string',
[1, 2],
[1, 2],
[2, 3],
[3, 4],
[3, 4],
[4, 5],
];
data.splice(0, 2); // remove your 2 strings at the beginning
console.time('filtering with reduce');
let collection = data.reduce((acc, val) => {
const pointstr = val.toString();
if ( !acc.includes(pointstr) ) {
acc.push(pointstr);
}
return acc;
}, []);
collection.map((point) => point.split(','));
console.timeEnd('filtering with reduce');
console.log(`filtered data has ${collection.length} entries!`);
a generator function could help you to keep memory consumption down (maybe?) =), and it would spare you the .map() part at the end of the above example:
console.time('filtering with generator');
function* filter(arr) {
let filtered = [];
for (var i = 0, l = arr.length; i < l; i++ ) {
const pointstr = arr[i].toString();
if ( !filtered.includes(pointstr) ) {
filtered.push(pointstr);
yield arr[i];
}
}
}
let collection = [];
for (let point of filter(data)) {
collection.push(point);
}
console.timeEnd('filtering with generator');
console.log(`filtered data has ${collection.length} entries!`);
EDIT
both of the above are horrible in terms of performance, here is a realistic scenario for your use case with 1'000'000 data points and a significant improvement based on #user633183 's suggestion to use a Set or Map. I chose to use a set because it represents a collection of unique values, which is exactly what we want, e.g. it takes automatically care of the filtering for us (if the data is in the right form to identify duplicates of course):
const randomBetween = (min,max) => Math.floor(Math.random()*(max-min+1)+min);
var data = Array(1000000);
for (var i = data.length; i; data[--i] = [randomBetween(1,1000), randomBetween(1, 1000)]);
console.log(`unfiltered data has ${data.length} entries!`);
console.time('filtering');
// create the Set with unique values by adding them as strings
// like that the Set will automatically filter duplicates
let collection = new Set(data.map((point) => point.toString()));
console.log(`filtered data has ${collection.size} entries!`);
// we still have to revert the toString() process here
// but we operate on the automatically filtered collection of points
// and this is fast!
collection = [...collection].map((val) => val.split(','));
console.log(`resulting data has ${collection.length} entries!`);
console.timeEnd('filtering');
thanks again #user633183, learned something today =)!
another option would be to combine the generator function with a Set like this:
console.time('filtering with generator and Set');
function* filterSet(arr) {
let filtered = new Set();
for (var i = 0, l = arr.length; i < l; i++ ) {
const pointstr = arr[i].toString();
if ( !filtered.has(pointstr) ) {
filtered.add(pointstr);
yield arr[i];
}
}
}
let collection = [];
for (let point of filterSet(data)) {
collection.push(point);
}
console.timeEnd('filtering with generator and Set');
console.log(`filtered data has ${collection.length} entries!`);
this again spares you from having to reverse the .toString() and is just slightly faster than the "direct" new Set() approach.
To finish this up, here a completely subjective benchmark on my machine with 100'000 data points:
unfiltered data has 100000 entries!
filtering with reduce: 31946.634ms
filtered data has 95232 entries!
filtering with generator: 39533.802ms
filtered data has 95232 entries!
filtering with generator and Set: 107.893ms
filtered data has 95232 entries!
filtering with Set: 159.894ms
filtered data has 95232 entries!

Related

How to include zero counts in a javascript frequency counting function for histograms

Need some help please - am using the excellent response to this histogram bucketing question as per code below. My question is what is the most elegant way to get my buckets with zero counts represented in the function output?
Current Output is:
[ [ '0', 2 ],
[ '32', 1 ],
[ '64', 2 ],
[ '80', 1 ],
[ '96', 1 ],
[ '112', 1 ] ]
which omits to note buckets 16 and 48 which have zero counts.
My desired output is:
[ [ '0', 2 ],
[ '16', 0 ],
[ '32', 1 ],
[ '48', 0 ],
[ '64', 2 ],
[ '80', 1 ],
[ '96', 1 ],
[ '112', 1 ] ]
All help much appreciated.
function defArrs (){
var arr = [1,16,38,65,78,94,105,124]
var binsize = 16;
var check = Object.entries(frequencies (arr,binsize));
console.log(check);
}
function frequencies(values, binsize) {
var mapped = values.map(function(val) {
return Math.ceil(val / binsize) -1;
});
console.log(mapped);
return mapped.reduce(function (freqs, val, i) {
var bin = (binsize * val);
freqs[bin] ? freqs[bin]++ : freqs[bin] = 1;
return freqs;
}, {});
}
Instead of starting with the empty object for .reduce:
}, {});
// ^^
construct one with all the properties you'll want to be included at the end (starting with a count of 0, of course).
const initialObj = Object.fromEntries(
Array.from(
{ length: 7 },
(_, i) => [i * binsize, 0]
)
);
And pass that as the second parameter to .reduce.
That approach also means that this
freqs[bin] ? freqs[bin]++ : freqs[bin] = 1;
will simplify to
freqs[bin]++;
Or, even better:
const frequencies = (nums, binsize) => {
const mapped = nums.map(num => Math.ceil(num / binsize) - 1;
const grouped = Object.fromEntries(
{ length: 7 },
(_, i) => [i * binsize, 0]
);
for (const val of mapped) {
grouped[binsize * val]++;
}
return grouped;
};

Functional way to get previous element during map

I have an array which I map over. I need to compare the current element with the previous. I am detecting if the current element is the same as the previous element by comparing their ids and doing something different based on this condition. Is there any purely functional way to do it without doing index math?
items.map((item, index) => {
if(item.id === items[index - 1 > 0 ? index - 1 : 0].id) {
// do something
} else {
// do something else
}
})
The code works but I would like to avoid doing math on the index. Is there any way to do it?
The reduce() function provides a functional what you need:
items.reduce((previousValue, currentValue) => {
if(currentValue.id === previousValue.id) {
// do something
} else {
// do something else
}
});
Are you sure that you want a map? This sounds like an XY problem. If you want to map over adjacent elements of an array then you'd have to define your own function.
const mapAdjacent = (mapping, array) => {
const {length} = array, size = length - 1, result = new Array(size);
for (let i = 0; i < size; i++) result[i] = mapping(array[i], array[i + 1]);
return result;
};
const items = [1, 2, 3, 4, 5];
const result = mapAdjacent((x, y) => [x, y], items);
console.log(result); // [[1, 2], [2, 3], [3, 4], [4, 5]]
Note that this will throw a RangeError if you give it an empty array as input.
const mapAdjacent = (mapping, array) => {
const {length} = array, size = length - 1, result = new Array(size);
for (let i = 0; i < size; i++) result[i] = mapping(array[i], array[i + 1]);
return result;
};
const items = [];
const result = mapAdjacent((x, y) => [x, y], items); // RangeError: Invalid array length
console.log(result);
I think this is good behaviour because you shouldn't be giving mapAdjacent an empty array to begin with.
Here's a purely functional implementation of mapAdjacent which uses reduceRight. As an added bonus, it works for any iterable object.
const mapAdjacent = (mapping, [head, ...tail]) =>
tail.reduceRight((recur, item) => prev =>
[mapping(prev, item), ...recur(item)]
, _ => [])(head);
const items = "hello";
const result = mapAdjacent((x, y) => [x, y], items);
console.log(result); // [['h', 'e'], ['e', 'l'], ['l', 'l'], ['l', 'o']]
Unlike the iterative version, it returns an empty array instead of throwing an error if you give it an empty array as input.
const mapAdjacent = (mapping, [head, ...tail]) =>
tail.reduceRight((recur, item) => prev =>
[mapping(prev, item), ...recur(item)]
, _ => [])(head);
const items = "";
const result = mapAdjacent((x, y) => [x, y], items);
console.log(result); // []
Note that this is an unintended side effect of array destructuring with rest elements in JavaScript. The equivalent Haskell version does raise an exception.
mapAdjacent :: (a -> a -> b) -> [a] -> [b]
mapAdjacent f (x:xs) = foldr (\y g x -> f x y : g y) (const []) xs x
main :: IO ()
main = do
print $ mapAdjacent (,) "hello" -- [('h','e'),('e','l'),('l','l'),('l','o')]
print $ mapAdjacent (,) "" -- Exception: Non-exhaustive patterns in function mapAdjacent
However, returning an empty array might be desirable for this function. It's equivalent to adding the mapAdjacent f [] = [] case in Haskell.
Not a particularly fast implementation, but destructuring assignment makes it particularly elegant -
const None =
Symbol ()
const mapAdjacent = (f, [ a = None, b = None, ...more ] = []) =>
a === None || b === None
? []
: [ f (a, b), ...mapAdjacent (f, [ b, ...more ]) ]
const pair = (a, b) =>
[ a, b ]
console.log(mapAdjacent(pair, [ 1, 2, 3 ]))
// [ [ 1, 2 ], [ 2, 3 ] ]
console.log(mapAdjacent(pair, "hello"))
// [ [ h, e ], [ e, l ], [ l, l ], [ l, o ] ]
console.log(mapAdjacent(pair, [ 1 ]))
// []
console.log(mapAdjacent(pair, []))
// []
Or write it as a generator -
const mapAdjacent = function* (f, iter = [])
{ while (iter.length > 1)
{ yield f (...iter.slice(0,2))
iter = iter.slice(1)
}
}
const pair = (a, b) =>
[ a, b ]
console.log(Array.from(mapAdjacent(pair, [ 1, 2, 3 ])))
// [ [ 1, 2 ], [ 2, 3 ] ]
console.log(Array.from(mapAdjacent(pair, "hello")))
// [ [ h, e ], [ e, l ], [ l, l ], [ l, o ] ]
console.log(Array.from(mapAdjacent(pair, [ 1 ])))
// []
console.log(Array.from(mapAdjacent(pair, [])))
// []
As I mentioned in a comment, I would suggest using reduce. Here is an example:
const input = [
{id: 1, value: "Apple Turnover"},
{id: 1, value: "Apple Turnover"},
{id: 2, value: "Banana Bread"},
{id: 3, value: "Chocolate"},
{id: 3, value: "Chocolate"},
{id: 3, value: "Chocolate"},
{id: 1, value: "Apple"},
{id: 4, value: "Danish"},
];
// Desired output: Array of strings equal to values in the above array,
// but with a prefix string of "New: " or "Repeated: " depending on whether
// the id is repeated or not
const reducer = (accumulator, currentValue) => {
let previousValue, descriptions, isRepeatedFromPrevious;
if (accumulator) {
previousValue = accumulator.previousValue;
descriptions = accumulator.descriptions;
isRepeatedFromPrevious = previousValue.id === currentValue.id;
} else {
descriptions = [];
isRepeatedFromPrevious = false;
}
if (isRepeatedFromPrevious) {
// The following line is not purely functional and performs a mutation,
// but maybe we do not care because the mutated object did not exist
// before this reducer ran.
descriptions.push("Repeated: " + currentValue.value);
} else {
// Again, this line is mutative
descriptions.push("New: " + currentValue.value);
}
return { previousValue: currentValue, descriptions }
};
const output = input.reduce(reducer, null).descriptions;
document.getElementById('output').innerText = JSON.stringify(output);
<output id=output></output>

create a dictionary( MAP ) from 2 arrays

Lets assume I have arrays of Strings ( but it should work aslo with array of numbers).
I would like to create a Map object over them with one's value of the 2 as keys and the others as values, but basically to establish a relationship. After that next step would be create a Map from 2 Array of Objects, but this is a bit more complicated.
Unfortunately my approach so far isn't working( since few hours try make it working), as I get for the second value the Map Iterator either for Arrays of Objects
let pairsMap = new Map();
let productsMap = new Map();
let engWords = ['house','gift','zoo','tidy','flat','to play',' to see','boy','ice cream']
let itaWords = ['casa','regalo','zoo','ordinato','appartamento','giocare','guardare','ragazzo','gelato']
let pairsMap = new Map();
let productsMap = new Map();
let products = [
{
name: "chair",
inventory: 5,
unit_price: 45.99,
client:'MG Gmbh'
},
{
name: "table",
inventory: 10,
unit_price: 123.75,
client : "XYZ"
},
{
name: "sofa",
inventory: 2,
unit_price: 399.50,
client : "MongoDB"
}];
let clients =[
{
name:"MG Gmbh",
address: 'Linen street',
country: 'Germany'
},
{
name:'XYZ',
address:'Mongomery street',
country: 'USA'
},
{
name:'MongoDB',
address: 'NoSQL road',
country: 'UK'
},
{
name:'Zeppelin',
address: 'lienestraße',
country: 'Germany'
}];
for( val in engWords){
const nk = engWords[val];
engWords.forEach(function(element) {
const v = pairsMap.values();
pairsMap.set(nk,v);
} )
}
for (let [b, z] of pairsMap){
console.log(b, " -> ", z)
}
function groupBy(objectArray, property) {
return objectArray.reduce(function (acc, obj) {
var key = obj[property];
if (!acc[key]) {
acc[key] = [];
}
acc[key].push(obj);
return acc;
}, {});
}
let clients_name = console.log(groupedClients);
for( val in groupedClients){
const nk = groupedClients[val];
products.forEach(function(element) {
const v = productsMap.values();
productsMap.set(nk,v);
} )
}
This is the result I would like to get( from engWords and itaWords)
house -> casa
gift -> regalo
zoo -> zoo
tidy -> ordinato
flat -> appartamento
to play -> giocare
to see -> guardare
boy -> ragazzo
ice cream -> gelato
I have found where I was missing, at least for the 2 Arrays, not using the right "index" for the "value" of the Map, so this way worked:
for( val in engWords){
const nk = engWords[val];
engWords.forEach(function(element) {
pairsMap.set(nk,itaWords[val]);
} )
}
for (let [b, z] of pairsMap){
console.log(b, " -> ", z)
}
and doing that change also for the Arrays of object worked as well but not on the
using the grouped by name:
for( val in clients){
const nk = clients[val];
products.forEach(function(element) {
productsMap.set(nk,products[val]);
} )
}
Though the order for the relationship remain that of the order in the original
arrays, so the order is still not dynamic.
[UPDATE]
I was able to compare for check if an element of an Array was also in the Map( another goal), and made a slightly modification of the above, but seems it sucks too much Chrome CPU, as from Chrome Task Manager my file took more then 109 % of GPU( browser), and seems so strange to me.
let m = ['casa','house'];
Array.from(newMap.keys()).forEach((k, i) => {
var values = newMap.get(k);
for(let j = 0;j < m.length ; i++){
if(m[i] == values){
console.log(m[i]);
}
}
})

Add together an array of objects

I am trying to add together an array of objects, using reduce however i can't get it work.
const testArray = [
{
"T1": 1
},
{
"T2": 12
},
{
"T3": 20
}
]
reduce function
const q = testArray.reduce((count, x) => count + x.P1Count);
outcome = 33
You could get the values and reduce the values as well.
const
add = (a, b) => a + b,
array = [{ "T1": 1 }, { "T2": 12 }, { "T3": 20 }],
total = array.reduce(
(s, o) => Object.values(o).reduce(add, s),
0
);
console.log(total);
The 2nd argument of the reduce() function will be the member of the array the reduce is being called on. In your case, which will be { T[i]: ... } where i = 1, 2, 3.
You can try this:
const testArray = [
{
"T1": 1
},
{
"T2": 12
},
{
"T3": 20
}
]
const x = testArray.reduce((count, x, index) => {
const key = `T${index+1}`; // prepare the dynamic key T1, T2,...
return count + x[key];
}, 0); // <-- 0 is the initial value of sum
console.log(x)

find unique entries inside a multidimensional javascript array, order important dependent on level

What would be the most elegant solution to find all unique first level entries inside a multidimensional javascript array? There is only one important rule: the order of the entries is important only on the first level, but not important on the second level
For example, for the following array the script should return 4 unique entries (the first, the third, the fourth and the fifth):
[
[ [],[22],[1,13,17],[12],[] ],
[ [],[22],[17,13,1],[12],[] ],
[ [],[12],[1,13,17],[22],[] ],
[ [11],[12],[13],[14],[15] ],
[ [15],[14],[13],[12],[11] ]
]
PS. jQuery can be used as well.
First of all, here is a working JSFiddle for you to play around with: http://jsfiddle.net/missyalyssi/ro8o94nk/
Given an input array the function findUnique will return an array containing items that are unique according to your definition. So, for example:
[[8],[1,2,3],[9]] is a duplicate of [[8], [3,1,2], [9]] but it is not a duplicate of [[9], [3,1,2], [8]]
My main focus when writing this was to make it easy to read and understand.
function findUnique(input) {
let found = [];
let uniqueEls = new Set();
let hasDup = true;
for (let element of input) {
hasDup = found.length &&
found.every((el) => {return deepEqualsNaive(el, element)});
if (hasDup) {
uniqueEls.delete(element);
continue;
}
found.push(element);
uniqueEls.add(element);
}
return [...uniqueEls];
}
This function uses deepEqualsNaive to determine if two arrays are equal. Since object equality in javascript means that the arrays would point to the same memory location we need to build our own function to return true for what we are calling equal. Here, by equal we mean that they have the same elements even though they are not pointing to the same memory location, or appearing in the same order.
I have written this function recursively for readability I do not know the context that you are using this in. If you could overflow the stack then use an iterative version.
Here are some example inputs and what we would expect:
deepEqualsNaive([ [],[22],[1,13,17],[12],[] ], [ [],[22],[17,13,1],[12],[] ]) => true
deepEqualsNaive([ [],[22],[17,13,1],[12],[] ], [ [],[12],[1,13,17],[22],[] ]) => false
deepEqualsNaive([ [],[22],[1,13,17],[12],[] ], [ [],22,[17,13,1],[12],[] ]) => false
The function:
function deepEqualsNaive (input, clone) {
if (!Array.isArray(input) || !Array.isArray(clone)) return false;
if (input.length !== clone.length) return false;
var result = 0;
for (let elIdx = 0; elIdx < input.length; elIdx++) {
var tryDeep = true;
if (Array.isArray(input[elIdx])) tryDeep = deepEqualsNaive(input[elIdx], clone[elIdx]);
if (!tryDeep) return false;
result ^= input[elIdx];
result ^= clone[elIdx];
}
return result === 0;
}
If you're not all that worried about performance and just need something that works, you could use the constant depth you mentioned along with the string representation as a "fingerprint" of sorts (akin to Java's hashcode).
Then you use a Set to keep track of items you've not seen before, and add only those that are new.
function getUnique(rows) {
let unique = new Set();
let results = [];
for (let row of rows) {
// Fingerprint is the string representation of the row,
// with the inner-level sorted (as order doesn't matter).
// E.g., fingerprint of [ [8], [3, 2, 1], [9] ] is '[[8],[1,2,3],[9]]'
let fingerprint = JSON.stringify(row.map((cells) => {
return cells.concat().sort(); // Use concat to avoid sorting in place.
}));
// If we haven't seen this fingerprint before,
// add to the filter and the results list.
if (!unique.has(fingerprint)) {
unique.add(fingerprint);
results.push(row);
}
}
return results;
}
This, for example, will come up with...
> x = [
... [ [8], [3, 2, 1], [9] ],
... [ [7], [8, 3, 9], [1, 2] ],
... [ [8], [1, 2, 3], [9] ],
... ];
> getUnique(x);
[ [ [ 8 ], [ 3, 2, 1 ], [ 9 ] ],
[ [ 7 ], [ 8, 3, 9 ], [ 1, 2 ] ] ]
Obviously if your inner values are non-primitives (objects, arrays, etc) then this will fall over, but if you're dealing with numbers like your example, it should be fine.
If it's ok to have reference to the original array 'records' and inner arrays (that is, no deep copy), you can use something like:
function distinct(arr){
const res =[], //array with results
cmpArr = (a1,a2) => a1.length===a2.length && a1.every((i,ind) => a2[ind] === i),
cmpRec = (a1,a2) => [1,2,3].every(i=> cmpArr(a1[i],a2[i])); //compare 'records' for indices 1,2 and 3
for(let subarr of arr){
subarr[2].sort(); //NB, this alters the source array. If this is not allowed, a work around can be created
if(!res.some(r => cmpRec(r,subarr))) //check if 'res' doesn't have an entry , based on the cmpRec function
res.push(subarr);
}
return res;
}
//test:
let input = [
[ [],[22],[1,13,17],[12],[] ],
[ [],[22],[17,13,1],[12],[] ],
[ [],[12],[1,13,17],[22],[] ],
[ [11],[12],[13],[14],[15] ],
[ [15],[14],[13],[12],[11] ]
];
console.log(distinct(input).map(JSON.stringify));

Categories

Resources