best combination algorithm of complex data with multiple contraints - javascript

I guess we can say that this is very similar to an already asked question here (Optimisation/knapsack algorithm with multiple contraints in JavaScript), which hasn't yet an answer.
Let say we like javascript, C, C++, java. Any of this languages work for me. Anyone know algorithms to solve the problem?
PROBLEM:
find the best subset of items which grants minimum cost and maximum number of objects, knowing that there's a limitation of resource:
var items = [
{name: "Rome", cost: 1000, hours: 5, peoples: 5},
{name: "Venice", cost: 200, hours: 1, peoples: 10},
{name: "Torin", cost: 500, hours: 3, peoples: 2},
{name: "Genova", cost: 700, hours: 7, peoples: 8},
{name: "Rome2", cost: 1020, hours: 5, peoples: 6},
{name: "Venice2", cost: 220, hours: 1, peoples: 10},
{name: "Torin2", cost: 520, hours: 3, peoples: 2},
{name: "Genova2", cost: 720, hours: 7, peoples: 4},
{name: "Rome3", cost: 1050, hours: 5, peoples: 5},
{name: "Venice3", cost: 250, hours: 1, peoples: 8},
{name: "Torin3", cost: 550, hours: 3, peoples: 8},
{name: "Genova3", cost: 750, hours: 7, peoples: 8}
];
var maxCost = 10000, maxHours = 100, maxPeoples = 50;
// find subset of items that minimize cost, hours and peoples
// and maximize number of items
// do not exceed max values!!!
IDEAS I HAD: I imagined I could do a solution to knapsack problem for each couple of cost (let call them "KPcost-hours", "KPhours-cost", "KPcost-peoples" etc.), which grants me the solution to optimize single costs. Then, if I'm lucky, take the common parts of this subsets and work from there... but i don't think it's a good path...
If you can give a script sample, or a pseudo-script sample, you're welcome! Thank you!

General solution
PROBLEM: find the best subset of items which grants minimum cost and
maximum number of objects, knowing that there's a limitation of
resource.
I see two optimization criteria here (I'll talk about the case where you want to minimize people, hours and cost below as well).
A possible approach is to build a program that will return a maximum Pareto-optimal set of solutions.
A Pareto set is a set of non-dominating solutions, meaning that for any two solutions S1 and S2, S1 does not dominate S2, and vice versa. A solution S1 dominates a solution S2 if it is better or equal than S2 regarding all criterias, and strictly better regarding at least one criteria.
For example, in your case, we can consider the following solutions:
S1: cost = 10, nb_objects = 4
S2: cost = 10, nb_objects = 7
S3: cost = 0, nb_objects = 0
S4: cost = 14, nb_objects = 6
Then our Pareto-optimal set of solutions is {S1, S3, S4}. That is because they do not dominate each other (for example, S1 does not dominate S4 because S4 is better regarding the number of objects). S2 is not part of the Pareto-optimal solution because it is dominated by both S1 and S4.
In the general case, Pareto-set are very hard to calculate, and can be extremely big. In your particular case, 4 criteria seem somehow reasonable, but it always can be surprising.
Here is a pseudocode on how to compute such a set of solutions:
Result = array of size nb_objects, initialized with empty sets
for i from 0 to total_nb_of_objects:
for each feasible solution 'new_solution' to the problem with fixed number of objects:
for each solution of Result[i]:
if hours(new_solution) >= hours(solution) and \
cost(new_solution) >= cost(solution) and \
people(new_solution) >= people(solution):
dominated = true
break
if not dominated:
add new_solution to Result[i]
return Result
This little pseudocode here has more a value to try and understand the concept of Pareto Efficiency, I would not advice looping on all the feasible solutions of a variation to a knapsack problem (too costy).

A brute force approach by checking all combinations.
function getItems(array, constraints, [optimum, equal]) {
function iter(index = 0, right = [], add) {
function update() {
if (!result.length || optimum(right, result[0])) return result = [right];
if (equal(right, result[0])) result.push(right);
}
if (index >= array.length || !constraints.every(fn => fn(right))) return;
if (add && right.length) update();
var temp = right.find(({ ref }) => ref === array[index]),
old = JSON.parse(JSON.stringify(right));
if (temp) {
temp.count++;
} else {
right.push({ count: 1, ref: array[index] });
}
iter(index, right, true);
iter(index + 1, old);
}
var result = [];
iter();
return result;
}
const
addBy = k => (s, { count, ref: { [k]: value } }) => s + count * value,
addCount = (s, { count }) => s + count;
// find subset of items that minimize cost, hours and peoples
// and maximize number of items
// do not exceed max values!!!
var items = [{ name: "Rome", cost: 1000, hours: 5, peoples: 5 }, { name: "Venice", cost: 200, hours: 1, peoples: 10 }, { name: "Torin", cost: 500, hours: 3, peoples: 2 }, { name: "Genova", cost: 700, hours: 7, peoples: 8 }],
maxCost = 10000,
maxHours = 100,
maxPeoples = 50,
result = getItems(
items,
[
array => array.reduce(addBy('cost'), 0) <= maxCost,
array => array.reduce(addBy('hours'), 0) <= maxHours,
array => array.reduce(addBy('peoples'), 0) <= maxPeoples
],
[
(a, b) => a.reduce(addCount, 0) > b.reduce(addCount, 0),
(a, b) => a.reduce(addCount, 0) === b.reduce(addCount, 0)
]
);
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }

Assuming one can only select 0 or 1 of each item, there are 2^12=4096 combinations possible. The number of feasible solutions is 3473. The number of non-dominated (or Pareto optimal) solutions is 83.
I used two different approaches:
Enumerate all feasible solutions. Then filter out all dominated solutions (each solution must be better in at least one objective than all other solutions).
Write a Mixed Integer Programming. It finds a solution, and adds a constraint that says: it should be better in at least one of the objectives than previous solutions. (Along the lines of this model).
Both methods find these 83 solutions. For this problem complete enumeration is faster.
Note that the number of Pareto optimal solutions can grow quickly. Here are some pictures of such a Pareto optimal set of a real-world design problem.
Note that there is no "single" best solution. All Pareto optimal solutions are optimal. Only when you make assumptions on the trade-offs between objectives, you can reduce the number of optimal solutions further.

I elaborate a working solution, but it's really bruteforce, however a bit optimized. I didn't went thru the Pareto solution which I believe is probably a better solution. Unfortunately the script from Nina Sholz didn't work (at least for me), so I came up with this one.
Just to leave here a working sample (read: don't use for BIG data).
PS - if anyone can write any phrase in a better english, comment below, I'll correct my bad writing.
/**
* Brute Force approach
* Problem: find combination of data objects to minimize sum of object properties and maximize number of objects
* Costraint: sum of object properties has upper limit (for each property)
* Solution used: do every combination, starting with the max number of objects, then lower by 1 each time, until a (or more) combination satisfy every criteria.
*/
// combination
// e.g. combination of 3 numbers with value from 0 to 4 -> combination(3,5)
// see https://rosettacode.org/wiki/Combinations#JavaScript
function combination(n, length) {
// n -> [a] -> [[a]]
function comb(n, lst) {
if (!n) return [[]];
if (!lst.length) return [];
var x = lst[0],
xs = lst.slice(1);
return comb(n - 1, xs).map(function (t) {
return [x].concat(t);
}).concat(comb(n, xs));
}
// f -> f
function memoized(fn) {
m = {};
return function (x) {
var args = [].slice.call(arguments),
strKey = args.join('-');
v = m[strKey];
if ('u' === (typeof v)[0])
m[strKey] = v = fn.apply(null, args);
return v;
}
}
// [m..n]
function range(m, n) {
return Array.apply(null, Array(n - m + 1)).map(function (x, i) {
return m + i;
});
}
var fnMemoized = memoized(comb),
lstRange = range(0, length-1);
return fnMemoized(n, lstRange)
}
// just some math calculation ------
// obviously n & r in N; r < n
function _factor(n){
var f = 1;
while (n > 1){ f *= n--; }
return f;
}
function _factor2(n,to){
var f = 1;
while (n > 1 && n >= to){ f *= n--; }
return f;
}
function _factorFraction(sup,inf){
return (sup > inf) ? _factor2(sup,inf+1) : 1/_factor2(inf,sup+1)
}
function _combination(n,r){
return (r > n/2) ? _factorFraction(n,r)/_factor(n-r) : _factorFraction(n,n-r)/_factor(r); // namely _factor(n)/_factor(n-r)/_factor(r)
}
// just some math calculation ------
var minr = 2, // set inferior limit (r) of combination search. 2 <= minr < datas.length
datas = [], // to be set. matrix to be filled with array of data
limits = [0], // to be set. contains limit for each datas column
comboKeep = [], // will contain all solutions found
columns,
sums,
timer;
function combineCheck(r){
if (r < minr) return;
console.log("Expected number of combination C(",datas.length,",",r,") = ",_combination(datas.length,r));
var metconditions = 0;
var CNR = combination(r,datas.length);
CNR.forEach(combo => {
sums = new Array(columns).fill(0);
// calculate sum for each column
for (var j=0; j<combo.length; j++){
for (var i=0; i<columns; i++){
sums[i] += datas[combo[j]][i];
};
}
// check if conditions are met
for (var i=0; i<columns; i++){
if (sums[i] > limits[i]){
//console.log("sum of column",i,"exceeds limit (",sums[i]," > ",limits[i],")");
return;
}
};
comboKeep.push(combo);
metconditions++;
});
console.log("Condition met in ",metconditions,"combos.");
if (metconditions == CNR.length){
console.log("No need to go further, all combo have been checked.");
return;
}
//------------
// OPTIONAL...
//------------
if (metconditions) return; // remove this line if you want all possible combination, even with less objects
combineCheck(r-1); // for delayed call: setTimeout( () => combineCheck(r-1), 250 );
}
function combineCheckStarter(){
comboKeep = [];
columns = datas[0].length;
timer = Date.now();
combineCheck(datas.length-1);
timer = Date.now() - timer;
}
//-----------------------------------------
var items = [
{name: "Rome", cost: 1000, hours: 5, peoples: 5},
{name: "Venice", cost: 200, hours: 1, peoples: 10},
{name: "Torin", cost: 500, hours: 3, peoples: 2},
{name: "Genova", cost: 700, hours: 7, peoples: 8},
{name: "Rome2", cost: 1020, hours: 5, peoples: 6},
{name: "Venice2", cost: 220, hours: 1, peoples: 10},
{name: "Torin2", cost: 520, hours: 3, peoples: 2},
{name: "Genova2", cost: 720, hours: 7, peoples: 4},
{name: "Rome3", cost: 1050, hours: 5, peoples: 5},
{name: "Venice3", cost: 250, hours: 1, peoples: 8},
{name: "Torin3", cost: 550, hours: 3, peoples: 8},
{name: "Genova3", cost: 750, hours: 7, peoples: 8}
];
var datas = Array.from(items, e => [e.cost, e.hours, e.peoples]);
var limits = [2500, 8, 20];
//-----------------------------------------
// test ;)
combineCheckStarter();
console.log("Combination found in ",timer,"ms:",comboKeep);
// pretty print results
var prettier = new Array(comboKeep.length),
unifier = new Array(columns).fill(0);
comboKeep.forEach( (combo, k) => {
var answer = new Array(combo.length);
sums = new Array(columns).fill(0);
combo.forEach((itm,i) => {
answer[i] = items[itm].name;
for (var j=0; j<columns; j++){
sums[j] += datas[itm][j];
};
});
prettier[k] = {items: answer.join(","), cost: sums[0], hours: sums[1], peoples: sums[2]};
for (var j=0; j<columns; j++){
if (unifier[j]<sums[j]) unifier[j] = sums[j];
};
});
// normalize
prettier.forEach( e => {
e.total = e.cost/unifier[0] + e.hours/unifier[1] + e.peoples/unifier[2];
});
//find the best (sum of all resource is lower)
prettier.sort( (b,a) => b.total-a.total);
console.log("sorted solutions:",prettier);
console.log("Best solution should be ",prettier[0].items,prettier[0]);

Related

Split randomly Array of Object in two equal arrays

I have this array of object;
let persons = [
{id: 1, name: "..."},
{id: 2, name: "..."},
{id: 3, name: "..."},
{id: 4, name: "..."},
{id: 5, name: "..."},
{id: 6, name: "..."},
{id: 7, name: "..."},
{id: 8, name: "..."}
]
I would like to split this array in two array of equal length. each time a execute the function which split the array It should return a random data in each array not the same list of object.
I try with this function
function splitArr(data, part) {
let list1 = [];
let list2 = [];
for(let i = 0; i < data.length ; i++) {
let random = Math.floor(Math.random() * data.length);
if(random % 2 === 0) {
list1.push(data[i]);
} else {
list2.push(data[i]);
}
}
return [list1, list2];
}
It isn't obvious that the function will return exactly array of equal length each time. Some time it return array of 2 and 6 element not equal.
Just shuffle the array randomly and then splice the array in half.
For shuffling an array take the solution provided here.
function shuffle(a) {
var j, x, i;
for (i = a.length - 1; i > 0; i--) {
j = Math.floor(Math.random() * (i + 1));
x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
For getting the two lists from that, do:
let list2 = shuffle([...data]); // spread to avoid mutating the original
let list1 = list2.splice(0, data.length >> 1);
The shift operator >> is used to get the truncated half of the array length.
I think that fastest and more reliable would be to use native array methods in this case.
I would recommend to go with slice method like below:
function splitArr(data) {
const arrLength = data.length;
const firstArr = data.slice(0, arrLength/2);
const secArr = data.slice(arrLength / 2, arrLength);
return [firstArr, secArr];
}
This way you got an universal function that will always return two arrays of same length.
You can experiment with Math.min() and Math.ceil in edge cases (like with arrays of uneven lenght).
You can do this with randojs.com really easily using the randoSequence function, which does not affect the original array. Then, use the slice function to split the arrays, and the bitwise operator >> to handle original arrays of odd length.
function shuffleAndSplit(arr){
var shuffled = randoSequence(arr);
shuffled.forEach((item, i) => {shuffled[i] = item.value;});
return [shuffled.slice(0, shuffled.length >> 1), shuffled.slice(shuffled.length >> 1)];
}
console.log(shuffleAndSplit(["a", "b", "c", "d", "e", "f", "g"]));
<script src="https://randojs.com/1.0.0.js"></script>
This could be even simpler if you used the array map function, but that has some issues in Internet Explorer. If you don't care about IE, here's how you'd do this with map:
function shuffleAndSplit(arr){
var shuffled = randoSequence(arr).map(item => item.value);
return [shuffled.slice(0, shuffled.length >> 1), shuffled.slice(shuffled.length >> 1)];
}
console.log(shuffleAndSplit(["a", "b", "c", "d", "e", "f", "g"]));
<script src="https://randojs.com/1.0.0.js"></script>
If you only wish to split it into two seperate part, you could use splice.
It takes two parameters, three if you wish to replace elements, the first one is the starting splice index. The second is the number of element to remove. The function will returns the removed element, spliting your array in half. And since the splice function is removing element from the original array, you will be left with two arrays of equals length ( if you have an even number of element ).
As for the randomess of your array, you could simply shuffle it before splitting it. Here i've used Jeff's answer
/**
* https://stackoverflow.com/a/6274381/5784924
* Shuffles array in place. ES6 version
* #param {Array} a items An array containing the items.
*/
function shuffle(a) {
for (let i = a.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[a[i], a[j]] = [a[j], a[i]];
}
return a;
}
let persons = shuffle([
{id: 1, name: "..."},
{id: 2, name: "..."},
{id: 3, name: "..."},
{id: 4, name: "..."},
{id: 5, name: "..."},
{id: 6, name: "..."},
{id: 7, name: "..."},
{id: 8, name: "..."}
]);
let firstArray = persons.splice(0, persons.length / 2);
console.log(firstArray.map((item) => item.id), persons.map((item) => item.id));
the problem with your approach is this:
if(random % 2 === 0) {
list1.push(data[i]);
} else {
list2.push(data[i]);
}
you are trying to insert in a random array, but you are not handling if that array has the same length than the other ( and it will be hard to do, you will lose your random intentions)
it is better to insert random items in each one of the arrays, for each iteration.
let persons = [
{id: 1, name: "..."},
{id: 2, name: "..."},
{id: 3, name: "..."},
{id: 4, name: "..."},
{id: 5, name: "..."},
{id: 6, name: "..."},
{id: 7, name: "..."},
{id: 8, name: "..."}
]
function splitArr(data, part) {
let list1 = [];
let list2 = [];
let isPair = false;
while(data.length > 0){
const randomEntry = Math.floor(Math.random() * data.length);
const arrayToPush = isPair?list1:list2;
arrayToPush.push(data[randomEntry]);
data.splice(randomEntry, 1);
isPair = !isPair;
}
console.log(list1.length, list2.length)
return [list1, list2];
}
splitArr(persons)
Does this helps you ?
function splitArr(data, count_decks) {
data = data.slice()
const decks = []
let i = 0
while (data.length) {
if (!Array.isArray(decks[i])) decks[i] = []
decks[i].push(data.splice(Math.random()*data.length, 1)[0])
i = (i+1) % count_decks
}
return decks
}
splitArr(persons, 2) // here 2 for 2 decks

substract an element of an array after multiply two elements on the same array

I'm getting info from an api, and i multiply two elements from the response, and then, sum the result. This is my code so far:
function forTest(){
$http.get('/api/DaApi?di=' + '2018-11-01' + '&df=' + '2018-11-01')
.then(function(data){
$scope.result = data.data.Response;
$scope.multiplyResult = $scope.result.map(x => x.Cost * x.Quantity).reduce((a, b) => a + b, 0);
}
Everything works perfect, but now i have to make another operation before the total sum. Let's say now i have 3 elements: Cost, Quantity and Discount:
[{Quantity: 2, Cost: 1000, Discount: -100},
{Quantity: 3, Cost: 2000, Discount: -500},
{Quantity: 2, Costo: 3130, Discount: -120}]
And now i need to multiply Quantity per Cost, and substract the Discount(which is already a negative number). After multiply and substract, i have to sum all the results. How can i make the substraction? Inside of map i have to make the subsraction? Something like:
$scope.multiplyResult = $scope.result.map(x => (x.Cost * x.Quantity) + x.Discount)).reduce((a, b) => a + b, 0);
Or must be inside of the reduce?
Someone can help me please, i'm a little far to understand the use of map and reduce. I'm using javascript and AngularJs.
Thanx in advance.
I think it was a typo.
let arr = [{Quantity: 2, Cost: 1000, Discount: -100},
{Quantity: 3, Cost: 2000, Discount: -500},
{Quantity: 2, Cost: 3130, Discount: -120}];
let sum = arr.map(x => (x.Cost * x.Quantity) + x.Discount).reduce((a,b)=>a+b,0)
console.log(sum);
You could calculate and sum in a single loop by taking a destructuring assignment for the needed properties and then add to sum the price.
$scope.multiplyResult = $scope.result
.reduce((sum, { Cost, Quantity, Discount }) => sum + Cost * Quantity + Discount, 0);

Calculate the average of points in a array - Javascript

I have an array with infos about a group of people : name, current status, new points, last event points
Example:
var group = new Array();
group[0] = "John Doe,beginer,14,7";
group[1] = "Lois Lane,advanced,13,9";
group[2] = "Bruce Waine,advanced,17,10";
I need a function that calculates the average of the new points.
For the previous example the average would be (14+13+17)/3 = 14.66666666666667
It'd be a heck of a lot easier if you convert the data in the array from strings to objects This will benefit you in two ways: 1) the code will be more readable, understandable, and easier to maintain, and 2) you won't have to do a bunch of string gymnastics to pull out the relevant data.
Do something like this:
var group = [
{ name: 'John Doe', status: 'beginner', newPoints: 14, eventPoints: 7 },
{ name: 'Lois Lane', status: 'advanced', newPoints: 13, eventPoints: 9 },
{ name: 'Bruce Waine', status: 'advanced', newPoints: 17, eventPoints: 10 }
];
function getAverageNewPoints(people) {
var count = people.length || 0,
average = 0;
for (var i = 0; i < count; i++) {
average += people[i].newPoints;
}
return average / count;
}
alert('The average of new points in the group is: ' + getAverageNewPoints(group));
Try the following:
function groupAverage(group) {
var sum = 0;
var count = group.length;
for (var i in group) {
sum += parseInt(group[i].split(',')[2], 10);
}
return sum / count;
}
Split the String at , and get the values and convert them to Number.
var group = new Array();
group[0] = "John Doe,beginer,14,7";
group[1] = "Lois Lane,advanced,13,9";
group[2] = "Bruce Waine,advanced,17,10";
sum=0;
for(var i in group)
{
sum=sum+Number(group[i].split(",")[2]);
}
console.log(sum/group.length);
You have a bad data structure for this. You don't want to use strings. You also should not use the Array constructor. Start with:
var group = [
{name: "John Doe", rank: "beginner", points: 14, lastScore: 7},
{name: "Lois Lane", rank: "advanced", points: 13, lastScore: 9},
{name: "Bruce Wayne", rank: "advanced", points: 17, lastScore: 10},
],
length = group.length,
sum = 0,
i;
for ( i = 0; i < length; ++i) {
sum += group[i].points;
}
return sum / length; // Or do something else with the result.
// I did no error checking.
You could use an object constructor instead of the inline Object I used, but that's not really necessary. I'm just curious; did you use strings as a default, or was using a string interface part of a textbook assignment?
Oh, one reason to use [] instead of new Array() is that when you construct an Array, the value is always truthy, while [] is falsy.
I did take the liberty of correcting Bruce's last name.

Sort array into "rows" in JavaScript

I have an array of objects that is currently like this, in which entries are ordered by date and time:
var checkin_data = [
{id: 430, date: "2013-05-05", time: "08:24"},
{id: 435, date: "2013-05-06", time: "04:22"},
{id: 436, date: "2013-05-06", time: "05:36"},
{id: 437, date: "2013-05-06", time: "07:51"},
{id: 488, date: "2013-05-06", time: "08:08"},
{id: 489, date: "2013-05-06", time: "10:12"},
{id: 492, date: "2013-05-06", time: "13:18"},
{id: 493, date: "2013-05-06", time: "15:55"},
{id: 494, date: "2013-05-06", time: "18:55"},
{id: 498, date: "2013-05-06", time: "22:15"},
{id: 501, date: "2013-05-07", time: "11:40"},
{id: 508, date: "2013-05-07", time: "18:00"},
{id: 520, date: "2013-05-08", time: "04:48"},
{id: 532, date: "2013-05-09", time: "21:11"},
{id: 492, date: "2013-05-10", time: "11:45"},
{id: 601, date: "2013-05-11", time: "18:12"}
];
The dates represent a date in a particular week: I'd like to sort this array in order to lay it out in "rows", so the data needs to be re-sorted to lay out like this (note the order of the dates):
var checkin_data = [
{id: 430, date: "2013-05-05", time: "08:24"},
{id: 435, date: "2013-05-06", time: "04:22"},
{id: 501, date: "2013-05-07", time: "11:40"},
{id: 520, date: "2013-05-08", time: "04:48"},
{id: 532, date: "2013-05-09", time: "21:11"},
{id: 492, date: "2013-05-10", time: "11:45"},
{id: 601, date: "2013-05-11", time: "18:12"},
{id: 436, date: "2013-05-06", time: "05:36"},
{id: 508, date: "2013-05-07", time: "18:00"},
{id: 437, date: "2013-05-06", time: "07:51"},
{id: 488, date: "2013-05-06", time: "08:08"},
{id: 489, date: "2013-05-06", time: "10:12"},
{id: 492, date: "2013-05-06", time: "13:18"},
{id: 493, date: "2013-05-06", time: "15:55"},
{id: 494, date: "2013-05-06", time: "18:55"},
{id: 498, date: "2013-05-06", time: "22:15"}
];
Getting the data in that order would allow me to lay out a table like this:
Thanks, any help would be appreciated.
Here is a suggestion using functional methods:
Reduce the list into arrays of buckets based on day, and sort that list (this is like reading the table you've got on rows)
Iterate through the rows in order, clear out unused ones.
Here:
//first, we collapse the array into an array of buckets by day
half_sorted = checkin_data.reduce(function(accum,cur){
var bucket = new Date(cur.date).getDay();
accum[bucket].push(cur);
return accum;
},[[],[],[],[],[],[],[]]).map(function(day){
return day.sort(function(x,y){ // now we sort each bucket
return new Date("01-01-1990 "+x.time) - new Date("01-01-1990 "+y.time);
});
});
// At this point, we have an array with 7 cells looking like your table
// if we look at its columns.
// finally, we push to the result table.
var result = [];
var daysToClear = 7;
for(var i=0;daysToClear>0;i=(i+1)%7){
if(half_sorted[i] && half_sorted[i].length > 0){
result.push(half_sorted[i].pop());
}else if(half_sorted[i] && half_sorted[i].length === 0){
half_sorted[i] = null;
daysToClear--;
}
}
Working fiddle
First of all, I think you're going about this in the wrong way. Please see my note below the following code.
To do exactly as you've asked, here's one way:
// parsing the date strings ourselves avoids time zone problems
function dateFromString(string) {
var parts = string.split('-');
return new Date(parseInt(parts[0], 10),
parseInt(parts[1], 10) - 1,
parseInt(parts[2], 10));
}
The above is a utility function.
var i, l, dates = [[], [], [], [], [], [], []], item;
// place the objects into dow-sorted buckets
for (i = 0, l = checkin_data.length; i < l; i += 1) {
item = checkin_data[i];
dates[dateFromString(item.date).getDay()].push(item);
}
i = 0;
l = 0;
checkin_data = [];
while (true) { // instead of a for loop to handle the row-wrap manually
if (dates[i][l]) {
item = dates[i][l];
checkin_data.push(item);
}
i += 1;
if (i === 7) {
if (!item) {
break; // we had a complete row with no data
}
item = undefined;
l += 1;
i = 0;
}
}
checkin_data is now sorted in the order you requested.
Note: you really don't need the second loop, because it is doing most of the work you'll have to do again to use the provided array. So in your routine for writing out the table, instead just use the data structure that the first loop creates. You would of course need a slightly different bailout strategy since you don't want to create an extra blank row, but I'll leave that up to you.
After a bit of thought, though, I came up with another way to do it, if you don't mind adding a new key to your objects:
function dateFromString(string) {
var parts = string.split('-');
return new Date(parseInt(parts[0], 10),
parseInt(parts[1], 10) - 1,
parseInt(parts[2], 10));
}
var i, l, counts = [0, 0, 0, 0, 0, 0, 0], item, dow;
for (i = 0, l = checkin_data.length; i < l; i += 1) {
item = checkin_data[i];
dow = dateFromString(item.date).getDay();
item.sortKey = ++counts[dow] * 7 + dow;
}
checkin_data.sort(function(a, b) {
return a.sortKey - b.sortKey;
});
I've come up with a solution, maybe not the most elegant, but it's working:
var sorted_data = [], elements_to_dump = [], i, j, tmp;
while (checkin_data.length > 0) {
for (i = 0; i < checkin_data.length; i++) {
if (checkin_data[i-1]) {
if (checkin_data[i-1].date === checkin_data[i].date) {
continue;
}
}
sorted_data.push(checkin_data[i]);
elements_to_dump.push(checkin_data[i].id);
}
for (j = 0; j < elements_to_dump.length; j++) {
for (i = 0; i < checkin_data.length; i++) {
if (checkin_data[i].id === elements_to_dump[j]) {
tmp = checkin_data.splice(i, 1);
break;
}
}
}
}
I'd like to sort this array in order to lay it out in "rows", so the data needs to be re-sorted [into this linear representation]. Getting the data in that order would allow me to lay out a table
No, it does not need to be. Actually, that's one step too much, and the order of your intermediate result makes absolutely no sense. What you should do instead is construct a (weekday-) list of (entries-per-day-) lists:
var days = [];
for (var i=0, date=null, day; i<checkin_data.length; i++) {
var entry = checkin_data[i];
if (entry.date !== date)
days.push(day = []);
day.push(entry);
}
That's it, you have you two-dimensional format now. Well, maybe you will need to transpose it to get it into the table you wanted, but that's not too complicated either:
var header = [],
table = [header]; // or create a DOM or a HTML string or whatever
for (var i=0; i<days.length; i++)
header.push(days[i][0].date /* or the weekday name? */);
for (var r=0; !done; r++) {
var row = [],
done = true;
// create cells:
for (var i=0; i<days.length; i++)
if (days[i].length > r) {
row[i] = days[i][r].time;
done = false;
} else
row[i] = "";
}
if (!done)
table.push(row);
}
What you're trying to do is very simple. This is what I would do:
var groups = groupBy(checkin_data, "date"); // groups items based on date
var table = zipAll.apply(null, toArray(groups)); // zips corresponding elements
// of each group into an array
After this you may create your table as follows:
var header = getHeader(groups), rows = map(table, getRow);
document.body.appendChild(getTable(header, rows));
Of course the actual code would be much bigger (a little more than 100 lines of code) since you need to write the logic for groupBy, toArray, zipAll, map, getHeader, getRow, getTable, etc.
Luckily for you I had the time to go and write all this stuff. Hence you now have a working demo: http://jsfiddle.net/hZFJw/
I would suggest that you browse through my code and try to understand how it works. It's too much to explain in one answer.
Note: My solution may be more than 100 lines of code. However it's still a simple solution because:
The actual code which generates the table is just 4 lines long and is very simple to understand.
The rest of the code is composed of reusable functions like groupBy, zipAll and map. These functions are very small and simple to understand.
Overall by abstracting the program into reusable functions the size of the program has increased. However the complexity of the program has considerably decreased.
You could achieve the same result by tackling the problem in an imperative style like most other answers do. However doing so makes the program harder to understand. Compare my code with other answers and see for yourself.
You can sort array with Alasql JavaScript library.
alasql.fn.WEEKDAY = function(d) { // User-defined function
return (new Date(d)).getDay();
};
var res = alasql('SELECT *, WEEKDAY(date) AS dow FROM ? ORDER BY dow', [checkin_data]);
Try this example at jsFiddle.

Comparing arrays of strings for similarity

I have available to me hundreds of JSON strings. Each of these contains an array of 15-20 words sorted by some predetermined weight. This weight, if it's worth noting, is the amount of times these words are found in some chunk of text. What's the best way of finding similarity between arrays of words that are structured like this?
First idea that came to my head was to create a numerical hash of all the words together and basically compare these values to determine similarity. I wasn't very successful with this, since the resulting hash values of very similar strings were not very close. After some research regarding string comparison algorithms, I come to Stackoverflow in hopes of receiving more guidance. Thanks in advance, and please let me know if you need more details of the problem.
Edit 1: Clarifying what I'm trying to do: I want to determine how similar two arrays are according to the words each of these have. I would also like to take into consideration the weight each word carries in each array. For example:
var array1 = [{"word":"hill","count":5},{"word":"head","count":5}];
var array2 = [{"word":"valley","count":7},{"word":"head","count":5}];
var array3 = [{"word":"head", "count": 6}, {"word": "valley", "count": 5}];
var array4 = [{"word": "valley", "count": 7}, {"word":"head", "count": 5}];
In that example, array 4 and array 2 are more similar than array 2 and array 3 because, even though both have the same words, the weight is the same for both of them in array 4 and 2. I hope that makes it a little bit easier to understand. Thanks in advance.
I think that what you want is "cosine similarity", and you might also want to look at vector space models. If you are coding In Java, you can use the open source S-space package.
(added on 31 Oct) Each element of the vector is the count of one particular string. You just need to transform your arrays of strings into such vectors. In your example, you have three words - "hill", "head", "valley". If your vector is in that order, the vectors corresponding to the arrays would be
// array: #hill, #head, #valley
array1: {5, 5, 0}
array2: {0, 5, 7}
array3: {0, 6, 5}
array4: {0, 5, 7}
Given that each array has to be compared to every other array, you are looking at a serious amount of processing along the lines of ∑(n-1) times the average number of "words" in each array. You'll need to store the score for each comparison, then make some sense of it.
e.g.
var array1 = [{"word":"hill","count":5},{"word":"head","count":5}];
var array2 = [{"word":"valley","count":7},{"word":"head","count":5}];
var array3 = [{"word":"head", "count": 6}, {"word": "valley", "count": 5}];
var array4 = [{"word": "valley", "count": 7}, {"word":"head", "count": 5}];
// Comparison score is summed product of matching word counts
function compareThings() {
var a, b, i = arguments.length,
j, m, mLen, n, nLen;
var word, score, result = [];
if (i < 2) return;
// For each array
while (i--) {
a = arguments[i];
j = i;
// Compare with every other array
while (j--) {
b = arguments[j];
score = 0;
// For each word in array
for (m=0, mLen = b.length; m<mLen; m++) {
word = b[m].word
// Compare with each word in other array
for (n=0, nLen=a.length; n<nLen; n++) {
// Add to score
if (a[n].word == word) {
score += a[n].count * b[m].count;
}
}
}
// Put score in result
result.push(i + '-' + j + ':' + score);
}
}
return result;
}
var results = compareThings(array1, array2, array3, array4);
alert('Raw results:\n' + results.join('\n'));
/*
Raw results:
3-2:65
3-1:74
3-0:25
2-1:65
2-0:30
1-0:25
*/
results.sort(function(a, b) {
a = a.split(':')[1];
b = b.split(':')[1];
return b - a;
});
alert('Sorted results:\n' + results.join('\n'));
/*
Sorted results:
3-1:74
3-2:65
2-1:65
2-0:30
3-0:25
1-0:25
*/
So 3-1 (array4 and array2) have the highest score. Fortunately the comparison need only be one way, you don't have to compare a to b and b to a.
Here is an attempt. The algorithm is not very smart (a difference > 20 is the same as not having the same words), but could be a useful start:
var wordArrays = [
[{"word":"hill","count":5},{"word":"head","count":5}]
, [{"word":"valley","count":7},{"word":"head","count":5}]
, [{"word":"head", "count": 6}, {"word": "valley", "count": 5}]
, [{"word": "valley", "count": 7}, {"word":"head", "count": 5}]
]
function getSimilarTo(index){
var src = wordArrays[index]
, values
if (!src) return null;
// compare with other arrays
weighted = wordArrays.map(function(arr, i){
var diff = 0
src.forEach(function(item){
arr.forEach(function(other){
if (other.word === item.word){
// add the absolute distance in count
diff += Math.abs(item.count - other.count)
} else {
// mismatches
diff += 20
}
})
})
return {
arr : JSON.stringify(arr)
, index : i
, diff : diff
}
})
return weighted.sort(function(a,b){
if (a.diff > b.diff) return 1
if (a.diff < b.diff) return -1
return 0
})
}
/*
getSimilarTo(3)
[ { arr: '[{"word":"valley","count":7},{"word":"head","count":5}]',
index: 1,
diff: 100 },
{ arr: '[{"word":"valley","count":7},{"word":"head","count":5}]',
index: 3,
diff: 100 },
{ arr: '[{"word":"head","count":6},{"word":"valley","count":5}]',
index: 2,
diff: 103 },
{ arr: '[{"word":"hill","count":5},{"word":"head","count":5}]',
index: 0,
diff: 150 } ]
*/
Sort the arrays by word before attempting comparison. Once this is complete, comparing two arrays will require exactly 1 pass through each array.
After sorting the arrays, here is a compare algorithm (psuedo-java):
int compare(array1, array2)
{
returnValue = 0;
array1Index = 0
array2Index = 0;
while (array1Index < array1.length)
{
if (array2Index < array2.length)
{
if (array1[array1Index].word == array2[array2Index].word) // words match.
{
returnValue += abs(array1[array1Index].count - array2[array2Index].count);
++array1Index;
++array2Index;
}
else // account for the unmatched array2 word.
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array2[array2Index].count;
++array2Index;
}
}
else // array2 empty and array1 is not empty.
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array1[array1Index].count;
}
}
// account for any extra unmatched array 2 values.
while (array2Index < array2.length)
{
// 100 is just a number to give xtra weight to unmatched numbers.
returnValue += 100 + array2[array2Index].count;
}
return returnValue;
}

Categories

Resources