Related
I would like to have multiple arrays of objects like this.
E.g:
const pets = [
{
name: "cat",
age: 4
},
{
name: "dog",
age: 6
}
]
But I want to create it using a map. So I was trying something like this.
let pets = [];
pets.map((item) => {
return (
item.push({
name: "cat",
age: 4
}, {
name: "dog",
age: 6
})
)
})
By this method, I'm getting an empty array.
So assuming this is incorrect, how would I go on and make this through a map.
Please any help would be appreciated.
first of all map works by looping through an array but you have empty array let pets = []; so the loop doesn't even start ! that's why you are getting empty array
Secondly map essentially is a method through which we can create a new array with the help of an existing array so you have chosen a wrong way!
example of map
const fruits = ["Mango", "Apple", "Banana", "Pineapple", "Orange"];
console.log(fruits);
const uppercaseFruits = fruits.map((fruit)=>{
return fruit.toUpperCase(); // this thing will be added to new array in every iteration
});
console.log(uppercaseFruits);
but still ....
let pets = [""]; // an item so that loop can start
const myPets = pets.map((item) => {
return (
([{
name: "cat",
age: 4
},{
name: "dog",
age: 6
}])
)
})
console.log(myPets)
//Usage of map: for example
let array = [1, 2, 3, 4, 5];
let newArray = array.map((item) => {
return item * item;
})
console.log(newArray) // [1, 4, 9, 16, 25]
map will not change the original array, if you don't assign a value to it, the original array will never be affected
And if you want to get what you want you use RANDOM like this
//random String
function randomString(e) {
e = e || 32;
var t = "ABCDEFGHJKMNPQRSTWXYZabcdefhijkmnprstwxyz2345678",
a = t.length,
n = "";
for (i = 0; i < e; i++) n += t.charAt(Math.floor(Math.random() * a));
return n
}
//random Number
function GetRandomNum(Min,Max)
{
var Range = Max - Min;
var Rand = Math.random();
return(Min + Math.round(Rand * Range));
}
var num = GetRandomNum(10000,999999);
alert(num);
Then you can combine random strings and random numbers into a new Object through a function
I have two arrays of objects say 1- variants and 2- inventoryLevels. Objects in both arrays share a property which is the id. So I want to search for each variant if it's id is matched with any inventoryLevel I want to change its property named shopify_inventory_quantity with matched inventoryLevel's property available ? My words are little but confusing but take a look at code below basically it's doing properly whats needed I just want to know can it be optimized right now it's nested for loop. So any help to make it efficient would be appreciated ?
for (let i = 0; i < variants.length; i++) {
for (let j = 0; j < inventorylevels.length; j++) {
if (variants[i].id === inventorylevels[j].variant_id) {
variants[i].shopify_inventory_quantity = inventorylevels[j].available;
}
}
}
I understand you have a solution in O(n²). Assuming your ids are unique, you can reduce the time complexity to O(n) (basically what #Alireza commented):
var variants = [
{id: 0, shopify_inventory_quantity: 0},
{id: 1, shopify_inventory_quantity: 0},
{id: 2, shopify_inventory_quantity: 0}
];
var inventoryLevels = [
{id: 0, available: 10},
{id: 1, available: 2},
{id: 2, available: 3}
];
// O(n) + O(n) = O(n)
function getAvailableVariants(v, i) {
// O(n)
var inventoryLevels = i.reduce(function(inventoryLevels, inventoryLevel) {
inventoryLevels[inventoryLevel.id] = inventoryLevel;
return inventoryLevels;
}, {});
// O(n)
return v.map(variant => Object.assign(variant, {shopify_inventory_quantity: inventoryLevels[variant.id].available}));
}
var results = document.createElement('pre');
results.textContent = JSON.stringify(getAvailableVariants(variants, inventoryLevels), null, '\t');
document.body.appendChild(results);
For example, if I have these arrays:
var name = ["Bob","Tom","Larry"];
var age = ["10", "20", "30"];
And I use name.sort() the order of the "name" array becomes:
var name = ["Bob","Larry","Tom"];
But, how can I sort the "name" array and have the "age" array keep the same order? Like this:
var name = ["Bob","Larry","Tom"];
var age = ["10", "30", "20"];
You can sort the existing arrays, or reorganize the data.
Method 1:
To use the existing arrays, you can combine, sort, and separate them:
(Assuming equal length arrays)
var names = ["Bob","Tom","Larry"];
var ages = ["10", "20", "30"];
//1) combine the arrays:
var list = [];
for (var j = 0; j < names.length; j++)
list.push({'name': names[j], 'age': ages[j]});
//2) sort:
list.sort(function(a, b) {
return ((a.name < b.name) ? -1 : ((a.name == b.name) ? 0 : 1));
//Sort could be modified to, for example, sort on the age
// if the name is the same. See Bonus section below
});
//3) separate them back out:
for (var k = 0; k < list.length; k++) {
names[k] = list[k].name;
ages[k] = list[k].age;
}
This has the advantage of not relying on string parsing techniques, and could be used on any number of arrays that need to be sorted together.
Method 2: Or you can reorganize the data a bit, and just sort a collection of objects:
var list = [
{name: "Bob", age: 10},
{name: "Tom", age: 20},
{name: "Larry", age: 30}
];
list.sort(function(a, b) {
return ((a.name < b.name) ? -1 : ((a.name == b.name) ? 0 : 1));
});
for (var i = 0; i<list.length; i++) {
alert(list[i].name + ", " + list[i].age);
}
For the comparisons,-1 means lower index, 0 means equal, and 1 means higher index. And it is worth noting that sort() actually changes the underlying array.
Also worth noting, method 2 is more efficient as you do not have to loop through the entire list twice in addition to the sort.
http://jsfiddle.net/ghBn7/38/
Bonus Here is a generic sort method that takes one or more property names.
function sort_by_property(list, property_name_list) {
list.sort((a, b) => {
for (var p = 0; p < property_name_list.length; p++) {
prop = property_name_list[p];
if (a[prop] < b[prop]) {
return -1;
} else if (a[prop] !== a[prop]) {
return 1;
}
}
return 0;
});
}
Usage:
var list = [
{name: "Bob", age: 10},
{name: "Tom", age: 20},
{name: "Larry", age: 30},
{name: "Larry", age: 25}
];
sort_by_property(list, ["name", "age"]);
for (var i = 0; i<list.length; i++) {
console.log(list[i].name + ", " + list[i].age);
}
Output:
Bob, 10
Larry, 25
Larry, 30
Tom, 20
You could get the indices of name array using Array.from(name.keys()) or [...name.keys()]. Sort the indices based on their value. Then use map to get the value for the corresponding indices in any number of related arrays
const indices = Array.from(name.keys())
indices.sort( (a,b) => name[a].localeCompare(name[b]) )
const sortedName = indices.map(i => name[i]),
const sortedAge = indices.map(i => age[i])
Here's a snippet:
const name = ["Bob","Tom","Larry"],
age = ["10", "20", "30"],
indices = Array.from(name.keys())
.sort( (a,b) => name[a].localeCompare(name[b]) ),
sortedName = indices.map(i => name[i]),
sortedAge = indices.map(i => age[i])
console.log(indices)
console.log(sortedName)
console.log(sortedAge)
This solution (my work) sorts multiple arrays, without transforming the data to an intermediary structure, and works on large arrays efficiently. It allows passing arrays as a list, or object, and supports a custom compareFunction.
Usage:
let people = ["john", "benny", "sally", "george"];
let peopleIds = [10, 20, 30, 40];
sortArrays([people, peopleIds]);
[["benny", "george", "john", "sally"], [20, 40, 10, 30]] // output
sortArrays({people, peopleIds});
{"people": ["benny", "george", "john", "sally"], "peopleIds": [20, 40, 10, 30]} // output
Algorithm:
Create a list of indexes of the main array (sortableArray)
Sort the indexes with a custom compareFunction that compares the values, looked up with the index
For each input array, map each index, in order, to its value
Implementation:
/**
* Sorts all arrays together with the first. Pass either a list of arrays, or a map. Any key is accepted.
* Array|Object arrays [sortableArray, ...otherArrays]; {sortableArray: [], secondaryArray: [], ...}
* Function comparator(?,?) -> int optional compareFunction, compatible with Array.sort(compareFunction)
*/
function sortArrays(arrays, comparator = (a, b) => (a < b) ? -1 : (a > b) ? 1 : 0) {
let arrayKeys = Object.keys(arrays);
let sortableArray = Object.values(arrays)[0];
let indexes = Object.keys(sortableArray);
let sortedIndexes = indexes.sort((a, b) => comparator(sortableArray[a], sortableArray[b]));
let sortByIndexes = (array, sortedIndexes) => sortedIndexes.map(sortedIndex => array[sortedIndex]);
if (Array.isArray(arrays)) {
return arrayKeys.map(arrayIndex => sortByIndexes(arrays[arrayIndex], sortedIndexes));
} else {
let sortedArrays = {};
arrayKeys.forEach((arrayKey) => {
sortedArrays[arrayKey] = sortByIndexes(arrays[arrayKey], sortedIndexes);
});
return sortedArrays;
}
}
See also https://gist.github.com/boukeversteegh/3219ffb912ac6ef7282b1f5ce7a379ad
If performance matters, there is sort-ids package for that purpose:
var sortIds = require('sort-ids')
var reorder = require('array-rearrange')
var name = ["Bob","Larry","Tom"];
var age = [30, 20, 10];
var ids = sortIds(age)
reorder(age, ids)
reorder(name, ids)
That is ~5 times faster than the comparator function.
It is very similar to jwatts1980's answer (Update 2).
Consider reading Sorting with map.
name.map(function (v, i) {
return {
value1 : v,
value2 : age[i]
};
}).sort(function (a, b) {
return ((a.value1 < b.value1) ? -1 : ((a.value1 == b.value1) ? 0 : 1));
}).forEach(function (v, i) {
name[i] = v.value1;
age[i] = v.value2;
});
You are trying to sort 2 independet arrays by only calling sort() on one of them.
One way of achieving this would be writing your own sorting methd which would take care of this, meaning when it swaps 2 elements in-place in the "original" array, it should swap 2 elements in-place in the "attribute" array.
Here is a pseudocode on how you might try it.
function mySort(originals, attributes) {
// Start of your sorting code here
swap(originals, i, j);
swap(attributes, i, j);
// Rest of your sorting code here
}
inspired from #jwatts1980's answer, and #Alexander's answer here I merged both answer's into a quick and dirty solution;
The main array is the one to be sorted, the rest just follows its indexes
NOTE: Not very efficient for very very large arrays
/* #sort argument is the array that has the values to sort
#followers argument is an array of arrays which are all same length of 'sort'
all will be sorted accordingly
example:
sortMutipleArrays(
[0, 6, 7, 8, 3, 4, 9],
[ ["zr", "sx", "sv", "et", "th", "fr", "nn"],
["zero", "six", "seven", "eight", "three", "four", "nine"]
]
);
// Will return
{
sorted: [0, 3, 4, 6, 7, 8, 9],
followed: [
["zr", th, "fr", "sx", "sv", "et", "nn"],
["zero", "three", "four", "six", "seven", "eight", "nine"]
]
}
*/
You probably want to change the method signature/return structure, but that should be easy though. I did it this way because I needed it
var sortMultipleArrays = function (sort, followers) {
var index = this.getSortedIndex(sort)
, followed = [];
followers.unshift(sort);
followers.forEach(function(arr){
var _arr = [];
for(var i = 0; i < arr.length; i++)
_arr[i] = arr[index[i]];
followed.push(_arr);
});
var result = {sorted: followed[0]};
followed.shift();
result.followed = followed;
return result;
};
var getSortedIndex = function (arr) {
var index = [];
for (var i = 0; i < arr.length; i++) {
index.push(i);
}
index = index.sort((function(arr){
/* this will sort ints in descending order, change it based on your needs */
return function (a, b) {return ((arr[a] > arr[b]) ? -1 : ((arr[a] < arr[b]) ? 1 : 0));
};
})(arr));
return index;
};
I was looking for something more generic and functional than the current answers.
Here's what I came up with: an es6 implementation (with no mutations!) that lets you sort as many arrays as you want given a "source" array
/**
* Given multiple arrays of the same length, sort one (the "source" array), and
* sort all other arrays to reorder the same way the source array does.
*
* Usage:
*
* sortMultipleArrays( objectWithArrays, sortFunctionToApplyToSource )
*
* sortMultipleArrays(
* {
* source: [...],
* other1: [...],
* other2: [...]
* },
* (a, b) => { return a - b })
* )
*
* Returns:
* {
* source: [..sorted source array]
* other1: [...other1 sorted in same order as source],
* other2: [...other2 sorted in same order as source]
* }
*/
export function sortMultipleArrays( namedArrays, sortFn ) {
const { source } = namedArrays;
if( !source ) {
throw new Error('You must pass in an object containing a key named "source" pointing to an array');
}
const arrayNames = Object.keys( namedArrays );
// First build an array combining all arrays into one, eg
// [{ source: 'source1', other: 'other1' }, { source: 'source2', other: 'other2' } ...]
return source.map(( value, index ) =>
arrayNames.reduce((memo, name) => ({
...memo,
[ name ]: namedArrays[ name ][ index ]
}), {})
)
// Then have user defined sort function sort the single array, but only
// pass in the source value
.sort(( a, b ) => sortFn( a.source, b.source ))
// Then turn the source array back into an object with the values being the
// sorted arrays, eg
// { source: [ 'source1', 'source2' ], other: [ 'other1', 'other2' ] ... }
.reduce(( memo, group ) =>
arrayNames.reduce((ongoingMemo, arrayName) => ({
...ongoingMemo,
[ arrayName ]: [
...( ongoingMemo[ arrayName ] || [] ),
group[ arrayName ]
]
}), memo), {});
}
You could append the original index of each member to the value, sort the array, then remove the index and use it to re-order the other array. It will only work where the contents are strings or can be converted to and from strings successfuly.
Another solution is keep a copy of the original array, then after sorting, find where each member is now and adjust the other array appropriately.
I was having the same issue and came up with this incredibly simple solution. First combine the associated ellements into strings in a seperate array then use parseInt in your sort comparison function like this:
<html>
<body>
<div id="outPut"></div>
<script>
var theNums = [13,12,14];
var theStrs = ["a","b","c"];
var theCombine = [];
for (var x in theNums)
{
theCombine[x] = theNums[x] + "," + theStrs;
}
var theSorted = theAr.sort(function(a,b)
{
var c = parseInt(a,10);
var d = parseInt(b,10);
return c-d;
});
document.getElementById("outPut").innerHTML = theS;
</script>
</body>
</html>
How about:
var names = ["Bob","Tom","Larry"];
var ages = ["10", "20", "30"];
var n = names.slice(0).sort()
var a = [];
for (x in n)
{
i = names.indexOf(n[x]);
a.push(ages[i]);
names[i] = null;
}
names = n
ages = a
Simplest explantion is the best, merge the arrays, and then extract after sorting:
create an array
name_age=["bob#10","Tom#20","Larry#30"];
sort the array as before, then extract the name and the age, you can use # to reconise where
name ends and age begins. Maybe not a method for the purist, but I have the same issue and this my approach.
I have an array of objects that is currently like this, in which entries are ordered by date and time:
var checkin_data = [
{id: 430, date: "2013-05-05", time: "08:24"},
{id: 435, date: "2013-05-06", time: "04:22"},
{id: 436, date: "2013-05-06", time: "05:36"},
{id: 437, date: "2013-05-06", time: "07:51"},
{id: 488, date: "2013-05-06", time: "08:08"},
{id: 489, date: "2013-05-06", time: "10:12"},
{id: 492, date: "2013-05-06", time: "13:18"},
{id: 493, date: "2013-05-06", time: "15:55"},
{id: 494, date: "2013-05-06", time: "18:55"},
{id: 498, date: "2013-05-06", time: "22:15"},
{id: 501, date: "2013-05-07", time: "11:40"},
{id: 508, date: "2013-05-07", time: "18:00"},
{id: 520, date: "2013-05-08", time: "04:48"},
{id: 532, date: "2013-05-09", time: "21:11"},
{id: 492, date: "2013-05-10", time: "11:45"},
{id: 601, date: "2013-05-11", time: "18:12"}
];
The dates represent a date in a particular week: I'd like to sort this array in order to lay it out in "rows", so the data needs to be re-sorted to lay out like this (note the order of the dates):
var checkin_data = [
{id: 430, date: "2013-05-05", time: "08:24"},
{id: 435, date: "2013-05-06", time: "04:22"},
{id: 501, date: "2013-05-07", time: "11:40"},
{id: 520, date: "2013-05-08", time: "04:48"},
{id: 532, date: "2013-05-09", time: "21:11"},
{id: 492, date: "2013-05-10", time: "11:45"},
{id: 601, date: "2013-05-11", time: "18:12"},
{id: 436, date: "2013-05-06", time: "05:36"},
{id: 508, date: "2013-05-07", time: "18:00"},
{id: 437, date: "2013-05-06", time: "07:51"},
{id: 488, date: "2013-05-06", time: "08:08"},
{id: 489, date: "2013-05-06", time: "10:12"},
{id: 492, date: "2013-05-06", time: "13:18"},
{id: 493, date: "2013-05-06", time: "15:55"},
{id: 494, date: "2013-05-06", time: "18:55"},
{id: 498, date: "2013-05-06", time: "22:15"}
];
Getting the data in that order would allow me to lay out a table like this:
Thanks, any help would be appreciated.
Here is a suggestion using functional methods:
Reduce the list into arrays of buckets based on day, and sort that list (this is like reading the table you've got on rows)
Iterate through the rows in order, clear out unused ones.
Here:
//first, we collapse the array into an array of buckets by day
half_sorted = checkin_data.reduce(function(accum,cur){
var bucket = new Date(cur.date).getDay();
accum[bucket].push(cur);
return accum;
},[[],[],[],[],[],[],[]]).map(function(day){
return day.sort(function(x,y){ // now we sort each bucket
return new Date("01-01-1990 "+x.time) - new Date("01-01-1990 "+y.time);
});
});
// At this point, we have an array with 7 cells looking like your table
// if we look at its columns.
// finally, we push to the result table.
var result = [];
var daysToClear = 7;
for(var i=0;daysToClear>0;i=(i+1)%7){
if(half_sorted[i] && half_sorted[i].length > 0){
result.push(half_sorted[i].pop());
}else if(half_sorted[i] && half_sorted[i].length === 0){
half_sorted[i] = null;
daysToClear--;
}
}
Working fiddle
First of all, I think you're going about this in the wrong way. Please see my note below the following code.
To do exactly as you've asked, here's one way:
// parsing the date strings ourselves avoids time zone problems
function dateFromString(string) {
var parts = string.split('-');
return new Date(parseInt(parts[0], 10),
parseInt(parts[1], 10) - 1,
parseInt(parts[2], 10));
}
The above is a utility function.
var i, l, dates = [[], [], [], [], [], [], []], item;
// place the objects into dow-sorted buckets
for (i = 0, l = checkin_data.length; i < l; i += 1) {
item = checkin_data[i];
dates[dateFromString(item.date).getDay()].push(item);
}
i = 0;
l = 0;
checkin_data = [];
while (true) { // instead of a for loop to handle the row-wrap manually
if (dates[i][l]) {
item = dates[i][l];
checkin_data.push(item);
}
i += 1;
if (i === 7) {
if (!item) {
break; // we had a complete row with no data
}
item = undefined;
l += 1;
i = 0;
}
}
checkin_data is now sorted in the order you requested.
Note: you really don't need the second loop, because it is doing most of the work you'll have to do again to use the provided array. So in your routine for writing out the table, instead just use the data structure that the first loop creates. You would of course need a slightly different bailout strategy since you don't want to create an extra blank row, but I'll leave that up to you.
After a bit of thought, though, I came up with another way to do it, if you don't mind adding a new key to your objects:
function dateFromString(string) {
var parts = string.split('-');
return new Date(parseInt(parts[0], 10),
parseInt(parts[1], 10) - 1,
parseInt(parts[2], 10));
}
var i, l, counts = [0, 0, 0, 0, 0, 0, 0], item, dow;
for (i = 0, l = checkin_data.length; i < l; i += 1) {
item = checkin_data[i];
dow = dateFromString(item.date).getDay();
item.sortKey = ++counts[dow] * 7 + dow;
}
checkin_data.sort(function(a, b) {
return a.sortKey - b.sortKey;
});
I've come up with a solution, maybe not the most elegant, but it's working:
var sorted_data = [], elements_to_dump = [], i, j, tmp;
while (checkin_data.length > 0) {
for (i = 0; i < checkin_data.length; i++) {
if (checkin_data[i-1]) {
if (checkin_data[i-1].date === checkin_data[i].date) {
continue;
}
}
sorted_data.push(checkin_data[i]);
elements_to_dump.push(checkin_data[i].id);
}
for (j = 0; j < elements_to_dump.length; j++) {
for (i = 0; i < checkin_data.length; i++) {
if (checkin_data[i].id === elements_to_dump[j]) {
tmp = checkin_data.splice(i, 1);
break;
}
}
}
}
I'd like to sort this array in order to lay it out in "rows", so the data needs to be re-sorted [into this linear representation]. Getting the data in that order would allow me to lay out a table
No, it does not need to be. Actually, that's one step too much, and the order of your intermediate result makes absolutely no sense. What you should do instead is construct a (weekday-) list of (entries-per-day-) lists:
var days = [];
for (var i=0, date=null, day; i<checkin_data.length; i++) {
var entry = checkin_data[i];
if (entry.date !== date)
days.push(day = []);
day.push(entry);
}
That's it, you have you two-dimensional format now. Well, maybe you will need to transpose it to get it into the table you wanted, but that's not too complicated either:
var header = [],
table = [header]; // or create a DOM or a HTML string or whatever
for (var i=0; i<days.length; i++)
header.push(days[i][0].date /* or the weekday name? */);
for (var r=0; !done; r++) {
var row = [],
done = true;
// create cells:
for (var i=0; i<days.length; i++)
if (days[i].length > r) {
row[i] = days[i][r].time;
done = false;
} else
row[i] = "";
}
if (!done)
table.push(row);
}
What you're trying to do is very simple. This is what I would do:
var groups = groupBy(checkin_data, "date"); // groups items based on date
var table = zipAll.apply(null, toArray(groups)); // zips corresponding elements
// of each group into an array
After this you may create your table as follows:
var header = getHeader(groups), rows = map(table, getRow);
document.body.appendChild(getTable(header, rows));
Of course the actual code would be much bigger (a little more than 100 lines of code) since you need to write the logic for groupBy, toArray, zipAll, map, getHeader, getRow, getTable, etc.
Luckily for you I had the time to go and write all this stuff. Hence you now have a working demo: http://jsfiddle.net/hZFJw/
I would suggest that you browse through my code and try to understand how it works. It's too much to explain in one answer.
Note: My solution may be more than 100 lines of code. However it's still a simple solution because:
The actual code which generates the table is just 4 lines long and is very simple to understand.
The rest of the code is composed of reusable functions like groupBy, zipAll and map. These functions are very small and simple to understand.
Overall by abstracting the program into reusable functions the size of the program has increased. However the complexity of the program has considerably decreased.
You could achieve the same result by tackling the problem in an imperative style like most other answers do. However doing so makes the program harder to understand. Compare my code with other answers and see for yourself.
You can sort array with Alasql JavaScript library.
alasql.fn.WEEKDAY = function(d) { // User-defined function
return (new Date(d)).getDay();
};
var res = alasql('SELECT *, WEEKDAY(date) AS dow FROM ? ORDER BY dow', [checkin_data]);
Try this example at jsFiddle.
I have available to me hundreds of JSON strings. Each of these contains an array of 15-20 words sorted by some predetermined weight. This weight, if it's worth noting, is the amount of times these words are found in some chunk of text. What's the best way of finding similarity between arrays of words that are structured like this?
First idea that came to my head was to create a numerical hash of all the words together and basically compare these values to determine similarity. I wasn't very successful with this, since the resulting hash values of very similar strings were not very close. After some research regarding string comparison algorithms, I come to Stackoverflow in hopes of receiving more guidance. Thanks in advance, and please let me know if you need more details of the problem.
Edit 1: Clarifying what I'm trying to do: I want to determine how similar two arrays are according to the words each of these have. I would also like to take into consideration the weight each word carries in each array. For example:
var array1 = [{"word":"hill","count":5},{"word":"head","count":5}];
var array2 = [{"word":"valley","count":7},{"word":"head","count":5}];
var array3 = [{"word":"head", "count": 6}, {"word": "valley", "count": 5}];
var array4 = [{"word": "valley", "count": 7}, {"word":"head", "count": 5}];
In that example, array 4 and array 2 are more similar than array 2 and array 3 because, even though both have the same words, the weight is the same for both of them in array 4 and 2. I hope that makes it a little bit easier to understand. Thanks in advance.
I think that what you want is "cosine similarity", and you might also want to look at vector space models. If you are coding In Java, you can use the open source S-space package.
(added on 31 Oct) Each element of the vector is the count of one particular string. You just need to transform your arrays of strings into such vectors. In your example, you have three words - "hill", "head", "valley". If your vector is in that order, the vectors corresponding to the arrays would be
// array: #hill, #head, #valley
array1: {5, 5, 0}
array2: {0, 5, 7}
array3: {0, 6, 5}
array4: {0, 5, 7}
Given that each array has to be compared to every other array, you are looking at a serious amount of processing along the lines of ∑(n-1) times the average number of "words" in each array. You'll need to store the score for each comparison, then make some sense of it.
e.g.
var array1 = [{"word":"hill","count":5},{"word":"head","count":5}];
var array2 = [{"word":"valley","count":7},{"word":"head","count":5}];
var array3 = [{"word":"head", "count": 6}, {"word": "valley", "count": 5}];
var array4 = [{"word": "valley", "count": 7}, {"word":"head", "count": 5}];
// Comparison score is summed product of matching word counts
function compareThings() {
var a, b, i = arguments.length,
j, m, mLen, n, nLen;
var word, score, result = [];
if (i < 2) return;
// For each array
while (i--) {
a = arguments[i];
j = i;
// Compare with every other array
while (j--) {
b = arguments[j];
score = 0;
// For each word in array
for (m=0, mLen = b.length; m<mLen; m++) {
word = b[m].word
// Compare with each word in other array
for (n=0, nLen=a.length; n<nLen; n++) {
// Add to score
if (a[n].word == word) {
score += a[n].count * b[m].count;
}
}
}
// Put score in result
result.push(i + '-' + j + ':' + score);
}
}
return result;
}
var results = compareThings(array1, array2, array3, array4);
alert('Raw results:\n' + results.join('\n'));
/*
Raw results:
3-2:65
3-1:74
3-0:25
2-1:65
2-0:30
1-0:25
*/
results.sort(function(a, b) {
a = a.split(':')[1];
b = b.split(':')[1];
return b - a;
});
alert('Sorted results:\n' + results.join('\n'));
/*
Sorted results:
3-1:74
3-2:65
2-1:65
2-0:30
3-0:25
1-0:25
*/
So 3-1 (array4 and array2) have the highest score. Fortunately the comparison need only be one way, you don't have to compare a to b and b to a.
Here is an attempt. The algorithm is not very smart (a difference > 20 is the same as not having the same words), but could be a useful start:
var wordArrays = [
[{"word":"hill","count":5},{"word":"head","count":5}]
, [{"word":"valley","count":7},{"word":"head","count":5}]
, [{"word":"head", "count": 6}, {"word": "valley", "count": 5}]
, [{"word": "valley", "count": 7}, {"word":"head", "count": 5}]
]
function getSimilarTo(index){
var src = wordArrays[index]
, values
if (!src) return null;
// compare with other arrays
weighted = wordArrays.map(function(arr, i){
var diff = 0
src.forEach(function(item){
arr.forEach(function(other){
if (other.word === item.word){
// add the absolute distance in count
diff += Math.abs(item.count - other.count)
} else {
// mismatches
diff += 20
}
})
})
return {
arr : JSON.stringify(arr)
, index : i
, diff : diff
}
})
return weighted.sort(function(a,b){
if (a.diff > b.diff) return 1
if (a.diff < b.diff) return -1
return 0
})
}
/*
getSimilarTo(3)
[ { arr: '[{"word":"valley","count":7},{"word":"head","count":5}]',
index: 1,
diff: 100 },
{ arr: '[{"word":"valley","count":7},{"word":"head","count":5}]',
index: 3,
diff: 100 },
{ arr: '[{"word":"head","count":6},{"word":"valley","count":5}]',
index: 2,
diff: 103 },
{ arr: '[{"word":"hill","count":5},{"word":"head","count":5}]',
index: 0,
diff: 150 } ]
*/
Sort the arrays by word before attempting comparison. Once this is complete, comparing two arrays will require exactly 1 pass through each array.
After sorting the arrays, here is a compare algorithm (psuedo-java):
int compare(array1, array2)
{
returnValue = 0;
array1Index = 0
array2Index = 0;
while (array1Index < array1.length)
{
if (array2Index < array2.length)
{
if (array1[array1Index].word == array2[array2Index].word) // words match.
{
returnValue += abs(array1[array1Index].count - array2[array2Index].count);
++array1Index;
++array2Index;
}
else // account for the unmatched array2 word.
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array2[array2Index].count;
++array2Index;
}
}
else // array2 empty and array1 is not empty.
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array1[array1Index].count;
}
}
// account for any extra unmatched array 2 values.
while (array2Index < array2.length)
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array2[array2Index].count;
}
return returnValue;
}