Grouping multiple documents in mongodb - javascript

I am trying to create a golf leaderboard based on multiple rounds, which means that a player can have played 2 rounds with the same matchid. It is pretty simple if there is just one round, I do it like this:
db.roundScores.find( { "matchId": matchId } ).sort({"data.scoreTotals.toPar": 1});
But, if a match have more than one round, I am a little in the dark on how i group each player, add the scores for the rounds and then sort them?
I am walking down this way, but not sure if this is the right way?
var allRounds = db.rounds.find( { "matchId": matchId } );
var playerScores = [];
while( allRounds.hasNext() ) {
var thisRound = allRounds.next();
var allScores = db.roundScores.find( { "roundId": thisRound._id.$oid } );
var i = 0;
while( allScores.hasNext() ) {
var thisScore = allScores.next();
if(i > 0){
// Should be updating the playerScores array finding the object for the player, But how???
} else {
// Creating a new object and adding it to the playerScores array //
playerScores.push({
"id": thisScore.playerId,
"toPar": thisScore.scoretotals.toPar,
"points": thisScore.scoretotals.points
});
}
i++;
}
}
I really hope that someone can guide me in the right direction on this.
Thanks in advance :-)
------------ EDIT -----------
Here is examples on the documents
{"roundId": "8745362738", "playerId": "12653426518", "toPar": 3, "points": 27}
{"roundId": "8745362738", "playerId": "54354245566", "toPar": -1, "points": 31}
{"roundId": "7635452678", "playerId": "12653426518", "toPar": 1, "points": 29}
{"roundId": "7635452678", "playerId": "54354245566", "toPar": 2, "points": 23}
The result should be:
1 playerId 54354245566 toPar 1 points 10
2 playerId 12653426518 toPar 4 points 2

Here is something to get you going:
db.roundScores.aggregate([{
$group: {
_id: "$playerId", // group by playerId
toPar: { $sum: "$toPar" }, // sum up all scores
points: { $sum: { $max: [ 0, { $subtract: [ "$points", 25 ] } ] } }, // sum up all points above 25
}
}, {
$sort: { "toPar": 1 } // sort by toPar ascending
}])

You can go with MongoDB aggregation for this. A sample code below
db.roundScores.aggregate({
$group: {
_id: "$playerId",
toPar: { $sum: "$toPar" },
{$sort:{"toPar":1}}
}
})
This will work

Related

JavaScript: Creating 2 dimensional array of objects based on condition

I'm working on a project that involves two dimensional arrays of objects. I've been working to try to figure out this answer for a while now, and I have some ideas of how to solve it, but I'm a bit stumped.
Let's assume there are animal shelters in City A and that each can hold 50 animals. Animals are coming in from different parts of the state, but the amount of animals from one place can never be more than 50. Here's an example of the animals coming in.
let animal_shelter_capacity =< 50;
let array 1 = [
{ "region": "NE", quantity: 25 },
{ "region": "NW", quantity: 21 },
{ "region": "SE", quantity: 43 },
{ "region": "SW", quantity: 18 },
{ "region": "Central", quantity: 20}
]
In this example, the animals from NE (25) and NW (21) would go to one shelter (46 animals in total), the animals from SE (43) would go to another shelter (43 animals in total), and the animals from SW (18) and Central (20) would go to a third shelter (38 animals in total). The number of animals in one shelter can never be greater than 50.
So, I need to produce an array that looks like this:
let array2 = [
[ { "region": "NE", quantity: 25 }, { "region": "NW", quantity: 21 }],
[ { "region": "SE", quantity: 43 } ],
[ { "region": "SW", quantity: 18 }, { "region": "Central", quantity: 20} ]
]
I'm able to loop through array1 using forEach, but when it comes to adding until a certain value is reached, then creating a new array of arrays, I'm a little stumped on how to proceed to do this.
Here's what I have so far:
let array2 = [] //instantiate array
array1.forEach(function(element, index, array)
{
let sum = 0;
let capacity = 50;
for (let j = 0; j < array1.length; j++)
{
sum += array1[j].quantity;
if (sum >= capacity)
{
//create the new array consisting of the regions, push it into the larger array2
}
}
})
I'm not sure how to continue doing this. I know I need to do the following:
1. find a way to cut off the addition sequence once the quantity reaches 50
2. reset the sequence
3. form the new arrays and push them into a larger array
Can anyone provide any advice on how to proceed?
Try this. Loop through shelters, if it can fit, add it to the current shelter list. If not, save the current shelter roster and start a new one. After the loop, make sure to save the current roster being written
const locations = [{
"region": "NE",
"quantity": 25
},
{
"region": "NW",
"quantity": 21
},
{
"region": "SE",
"quantity": 43
},
{
"region": "SW",
"quantity": 18
},
{
"region": "Central",
"quantity": 20
}
]
const shelterRoster = [];
const capacity = 50;
let count = 0;
for (let location of locations) {
let shelter = shelterRoster[shelterRoster.length - 1];
if (!shelter || count + location.quantity > capacity) {
shelterRoster.push([location]);
count = 0;
} else {
shelter.push(location);
}
count += location.quantity
}
console.log(shelterRoster);
You can approach this with reduce(), using a custom object as the initial accumulator. When the reduce finish, you will need to do an extra line of code for get your final result.
const animal_shelter_capacity = 50;
const array1 = [
{"region": "NE", quantity: 25},
{"region": "NW", quantity: 21},
{"region": "SE", quantity: 43},
{"region": "SW", quantity: 18},
{"region": "Central", quantity: 20}
];
let obj = array1.reduce((res, curr) =>
{
let test = (curr.quantity + res.c >= animal_shelter_capacity);
return {
r: test ? [...res.r, res.a] : res.r,
c: test ? curr.quantity : res.c + curr.quantity,
a: test ? [curr] : [...res.a, curr]
};
},{r:[], c:0, a:[]});
let newArr = [...obj.r, obj.a];
console.log(newArr);
On the previous code, the accumulated object have the next keys:
r: The array of shelters generated progressively.
c: The counter of animals of the current shelter (see next).
a: The current shelter of animals.
When the reduce finish, the last shelter (that on the property a) will not be on the array of shelters. So, we have to put it manually (this is what the extra line does).
The first point I've come up with, is you need to sort input data first. because your given input ( as asked in the question ) is not the only possible way of having data.
You can have some data like:
let array1 = [
{ "region": "NE", quantity: 25 },
{ "region": "NW", quantity: 21 },
{ "region": "Central", quantity: 20 },
{ "region": "SE", quantity: 43 },
{ "region": "SW", quantity: 18 },
]
and in this example, we should have pushed central and SW together, but not sorting the input at first place will result central and SW in different arrays.
So, conclusion. I think this is gonna work:
var make = function( arr ) {
var res = [],
currentArr = [];
arr.forEach( v => {
sum += v.quantity;
if ( sum <= capacity ) {
currentArr.push( v );
} else {
res.push( currentArr );
currentArr = [ v ];
sum = v.quantity;
}
});
res.push( currentArr );
return res;
},
array1 = [
{ "region": "NE", quantity: 25 },
{ "region": "NW", quantity: 21 },
{ "region": "Central", quantity: 20 },
{ "region": "SE", quantity: 43 },
{ "region": "SW", quantity: 18 }
],
sum = 0,
result,
capacity = 50;
array1.sort( ( a, b ) => {
return a.quantity - b.quantity;
});
console.log( array1 );
result = make( array1 );
console.log( result );

lodash: aggregating and reducing array of objects based on date

I am new to Lodash and Functional Programming concepts. So, I have an array of objects with day-wise date like these:
[
{
"date": '1-Jan-2015',
"count": 4
},
{
"date": '4-Jan-2015',
"count": 3
},
{
"date": '1-Feb-2015',
"count": 4
},
{
"date": '18-Feb-2015',
"count": 10
}
]
and I want to reduce and aggregate it in such a way that I get an array of objects where each object has monthly data instead of day-wise data like this:
[
{
"date": 'Jan, 2015',
"count": 7 // aggregating the count of January
},
{
"date": 'Feb, 2015',
"count": 14 //aggregating the count of February
}
]
Currently, I have a written a very unreadable and convoluted code full of ifs and fors which works. However, I want to refactor it using lodash. Is it possible using lodash? I looked around and found _.reduce and _.groupBy which I can probably use but I am stumped right now and can't figure out a good clean implementation.
We can use _.reduce & _.values
var arr = [
{
"date": '1-Jan-2015',
"count": 4
},
{
"date": '4-Jan-2015',
"count": 3
},
{
"date": '1-Feb-2015',
"count": 4
},
{
"date": '18-Feb-2015',
"count": 10
}
]
_.values(_.reduce(arr,function(result,obj){
var name = obj.date.split('-');
name = name[1]+', '+name[2];
result[name] = {
date:name,
count:obj.count + (result[name]?result[name].count:0)
};
return result;
},{}));
You don't need lodash to achieve what you want, you could use plain old Javascript:
var array = [{
"date": '1-Jan-2015',
"count": 4
}, {
"date": '4-Jan-2015',
"count": 3
}, {
"date": '1-Feb-2015',
"count": 4
}, {
"date": '18-Feb-2015',
"count": 10
}]
var result = array.reduce(function(ar, item) {
var index = item.date.split('-').slice(1,3).join(', ') //getting date Month-Year
_item = ar.filter(function(a) {
return a.date === index
})[0] // getting item if already present in array
// getting index of _item if _item is already present in ar
indexOf = ar.indexOf(_item)
if(indexOf > -1)
// we sum the count of existing _item
ar[indexOf] = {date: index, count: count: _item.count + item.count }
else
// item is not yet in the array, we push a new _item
ar.push({date: index, count: item.count})
return ar; // return the array as required by reduce
}, []) // initialize the reduce method with an empty array
console.log(result) // your array with aggregated dates
And for the fun, a lodash version:
_.values(array.reduce(function(obj, item) {
var index = item.date.split('-').slice(1, 3).join(', ')
obj[index] = {date: index, count: (obj[index] && obj[index].count || 0) + item.count}
return obj
}, {}))
See jsfiddle here

How to retrieve documents with conditioning an array of nested objects?

The structure of the objects stored in mongodb is the following:
obj = {_id: "55c898787c2ab821e23e4661", ingredients: [{name: "ingredient1", value: "70.2"}, {name: "ingredient2", value: "34"}, {name: "ingredient3", value: "15.2"}, ...]}
What I would like to do is retrieve all documents, which value of specific ingredient is greater than arbitrary number.
To be more specific, suppose we want to retrieve all the documents which contain ingredient with name "ingredient1" and its value is greater than 50.
Trying the following I couldn't retrieve desired results:
var collection = db.get('docs');
var queryTest = collection.find({$where: 'this.ingredients.name == "ingredient1" && parseFloat(this.ingredients.value) > 50'}, function(e, docs) {
console.log(docs);
});
Does anyone know what is the correct query to condition upon specific array element names and values?
Thanks!
You really don't need the JavaScript evaluation of $where here, just use basic query operators with an $elemMatch query for the array. While true that the "value" elements here are in fact strings, this is not really the point ( as I explain at the end of this ). The main point is to get it right the first time:
collection.find(
{
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
},
{ "ingredients.$": 1 }
)
The $ in the second part is the postional operator, which projects only the matched element of the array from the query conditions.
This is also considerably faster than the JavaScript evaluation, in both that the evaluation code does not need to be compiled and uses native coded operators, as well as that an "index" can be used on the "name" and even "value" elements of the array to aid in filtering the matches.
If you expect more than one match in the array, then the .aggregate() command is the best option. With modern MongoDB versions this is quite simple:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$eq": [ { "$ifNull": [ "$name", "ingredient1" ] }, "ingredient1" ] },
{ "$gt": [ { "$ifNull": [ "$value", 60 ] }, 50 ] }
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
])
And even simplier in forthcoming releases which introduce the $filter operator:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$project": {
"ingredients": {
"$filter": {
"input": "$ingredients",
"as": "ingredient",
"cond": {
"$and": [
{ "$eq": [ "$$ingredient.name", "ingredient1" ] },
{ "$gt": [ "$$ingredient.value", 50 ] }
]
}
}
}
}}
])
Where in both cases you are effectively "filtering" the array elements that do not match the conditions after the initial document match.
Also, since your "values" are actually "strings" right now, you reaally should change this to be numeric. Here is a basic process:
var bulk = collection.initializeOrderedBulkOp(),
count = 0;
collection.find().forEach(function(doc) {
doc.ingredients.forEach(function(ingredient,idx) {
var update = { "$set": {} };
update["$set"]["ingredients." + idx + ".value"] = parseFloat(ingredients.value);
bulk.find({ "_id": doc._id }).updateOne(update);
count++;
if ( count % 1000 != 0 ) {
bulk.execute();
bulk = collection.initializeOrderedBulkOp();
}
})
]);
if ( count % 1000 != 0 )
bulk.execute();
And that will fix the data so the query forms here work.
This is much better than processing with JavaScript $where which needs to evaluate every document in the collection without the benefit of an index to filter. Where the correct form is:
collection.find(function() {
return this.ingredients.some(function(ingredient) {
return (
( ingredient.name === "ingredient1" ) &&
( parseFloat(ingredient.value) > 50 )
);
});
})
And that can also not "project" the matched value(s) in the results as the other forms can.
Try using $elemMatch:
var queryTest = collection.find(
{ ingredients: { $elemMatch: { name: "ingredient1", value: { $gte: 50 } } } }
);

How to Sort by Weighted Values

I have this problem that I want to sort the result of a query based on the field values from another collection,
Problem: I want to first get the user 123 friends and then get their posts and then sort the post with the friends strength value,
I have this :
POST COLLECTON:
{
user_id: 8976,
post_text: 'example working',
}
{
user_id: 673,
post_text: 'something',
}
USER COLLECTON:
{
user_id: 123,
friends: {
{user_id: 673,strength:4}
{user_id: 8976,strength:1}
}
}
Based on the information you have retrieved from your user you essentially want to come out to an aggregation framework query that looks like this:
db.posts.aggregate([
{ "$match": { "user_id": { "$in": [ 673, 8976 ] } } },
{ "$project": {
"user_id": 1,
"post_text": 1,
"weight": {
"$cond": [
{ "$eq": [ "$user_id", 8976 ] },
1,
{ "$cond": [
{ "$eq": [ "$user_id", 673 ] },
4,
0
]}
]
}
}},
{ "$sort": { "weight": -1 } }
])
So why aggregation when this does not aggregate? As you can see, the aggregation framework does more than just aggregate. Here it is being used to "project" a new field into the document an populate it with a "weight" to sort on. This allows you to get the results back ordered by the value you want them to be sorted on.
Of course, you need to get from your initial data to this form in a "generated" way that you do do for any data. This takes a few steps, but here I'll present the JavaScript way to do it, which should be easy to convert to most languages
Also presuming your actual "user" looks more like this, which would be valid:
{
"user_id": 123,
"friends": [
{ "user_id": 673, "strength": 4 },
{ "user_id": 8976, "strength": 1 }
]
}
From an object like this you then construct the aggregation pipeline:
// user is the structure shown above
var stack = [];
args = [];
user.friends.forEach(function(friend) {
args.push( friend.user_id );
var rec = {
"$cond": [
{ "$eq": [ "user_id", friend.user_id ] },
friend.strength
]
};
if ( stack.length == 0 ) {
rec["$cond"].push(0);
} else {
var last = stack.pop();
rec["$cond"].push( last );
}
stack.push( rec );
});
var pipeline = [
{ "$match": { "user_id": { "$in": args } } },
{ "$project": {
"user_id": 1,
"post_text": 1,
"weight": stack[0]
}},
{ "$sort": { "weight": -1 } }
];
db.posts.aggregate(pipeline);
And that is all there is to it. Now you have some code to go through the list of "friends" for a user and construct another query to get all posts from those friends weighted by the "strength" value for each.
Of course you could do much the same things with a query for all posts by just removing or changing the $match, but keeping the "weight" projection you can "float" all of the "friends" posts to the top.

Aggregation or Map Reduce to create normalized 'Unique Paying Users Per Vendor'

I am trying to created a report for Unique Paying Users Per Vendor using Map Reduce or the Aggregation Framework in Mongodb. The only catch is that the totals need to be normalized such each user contribute a total of 1 across all of the vendors he/she has purchased from. For example
{
"account": "abc",
"vendor": "amazon",
},
{
"account": "abc",
"vendor": "overstock",
},
{
"account": "ccc",
"vendor": "overstock",
}
would produce
{
"vendor": "amazon",
"total" : 0.5
},
{
"vendor": "overstock",
"total": 1.5
}
Here we see the user 'abc' made two purchases and contributes equally to both vendors. We also see that summing up vendors totals would be equal to our unique paying users.
My naive approach to performs this aggregation in four steps.
1. For each user, store number of purchases by vendor in a map.
2. For each user, sum up total purchases and divide each vendor purchases by total.
3. Perform an additive merge of each users normalized purchase map into a final vendor map.
This approach works with smaller data sets but is slow and runs out of memory on larger sets.
Using the Aggregation framework, I've figured out how to calculate the total users but in a normalized method.
agg = this.db.aggregate(
[
{
$group :
{
_id :
{
vendor : '$vendor',
user : '$account'
},
total :
{
$sum : 1
}
}
}
]);
var transformed = {};
for( var index in agg.result)
{
var entry = agg.result[index];
var vendor= entry._id.vendor;
if(!transformed[vendor])
{
transformed[vendor] = 0;
}
transformed[vendor] += 1;
}
How can I restructure this query to normalize the users totals?
There are a couple of approaches to this which can apply to either the .aggregate() or the .mapReduce() methods respectively, and they of course vary in efficiency which is relative to the overall size of your data.
Firstly using aggregate, you would need to get the totals per "vendor" much as you have done, but then you would need the overall total per user in order to work out your percentages. So mileage may vary on how efficient the grouping operation is considering we are going to have to create and $unwind arrays:
db.collection.aggregate([
{ "$group": {
"_id": { "account": "$account", "vendor": "$vendor" },
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.account",
"purch": { "$push": { "vendor": "$_id.vendor", "count": "$count" } },
"total": { "$sum": "$count" },
}},
{ "$unwind": "$purch" },
{ "$project": {
"vendor": "$purch.vendor",
"total": {
"$divide": [ "$purch.count", "$total" ]
}
}},
{ "$group": {
"_id": "$vendor",
"total": { "$sum": "$total" }
}}
])
The mapReduce approach would have to be run in two step, first reducing the response to vendors by user and then afterwards down to the vendor:
db.collection.mapReduce(
function () {
emit(
this.account,
{
"data": [{
"vendor": this.vendor,
"count": 1,
}],
"total": 1,
"seen": false
}
);
},
function (key,values) {
var reduced = { data: [], total: 0, seen: true };
values.forEach(function(value) {
value.data.forEach(function(data) {
var index = -1;
for (var i = 0; i <=reduced.data.length-1; i++) {
if ( reduced.data[i].vendor == data.vendor ) {
index = i;
break;
}
}
if ( index == -1 ) {
reduced.data.push(data);
} else {
if (!value.seen)
reduced.data[index].count += data.count;
}
});
});
reduced.data.map(function(x) {
reduced.total += x.count;
});
return reduced;
},
{
"out": { "replace": "output" },
"finalize": function (key,value) {
var result = {
data: []
};
result.data = value.data.map(function(x) {
var res = { };
res["vendor"] = x.vendor;
res["total"] = x.count / value.total;
return res;
});
return result;
}
}
)
And the second part on the output:
db.output.mapReduce(
function () {
this.value.data.forEach(function(data){
emit( data.vendor, data.total );
});
},
function(key,values) {
return Array.sum( values );
},
{ "out": { "inline": 1 } }
)
So it depends on the size of your data. The mapReduce approach will be slower and requires the output to a collection and then running the aggregation again.
On the other hand the aggregation framework approach should run faster in general but depending on how large that vendor array can get per user it can slow things down.
This is in response Neil Lunn's answer above. After some thought yesterday, I came to the same realization as you did that the aggregation would have to be a multi step process if in map reduce. I like your answer as it uses map reduce to write to a collection which in a larger data set would be required. I'll also try out the .aggregrate() method for performance. Interesting to note that the new aggregation framework in Mongo 2.6 also has this 'out' feature.
The solution I ended up with is the following (which works with our data set).
1. use aggregation framework to calculate purchases per account.
2. convert this result into a map for fast access
3. perform map reduce on collection making user of the 'scope' field to pass in the account total map we built in step 2.
The code looks similar to this.
var agg = this.db.aggregate(
[
{
$group :
{
_id :
{
user : '$account'
},
total :
{
$sum : 1
}
}
}
]);
var accountMap = {};
for( var index in agg.result)
{
var entry = agg.result[index];
addToMap(accountMap, entry._id.user, entry.total);
}
delete agg; // free up memory?
var mapFunction = function()
{
var key = this.vendor;
// create normalized total for the vendor based on the users purchases.
var value = 1 / accountMap[this.account];
emit(key, value);
};
var reduceFunction = function(key, values)
{
return(Array.sum(values));
};
var res = this.db.mapReduce(mapFunction, reduceFunction,
{
out :
{
inline : 1
},
scope :
{
'accountMap' : accountMap
}
});
delete accountMap;
var transformed = {};
for( var index in res.results)
{
transformed[entry._id] = entry.value;
}

Categories

Resources