I have this example items collection:
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag2",
"tag3"
]
},
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"category": "bikes",
"user": "1",
"tags": [
"tag3",
"tag4"
]
},
{
"_id": "4",
"field1": "value1",
"field2": "value2",
"category": "cars",
"user": "2",
"tags": [
"tag1",
"tag2"
]
}
I would to search items created by specific user (ie user: 1) and display them by category field. Result:
{
"phones": [
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"tags": [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"tags": [
"tag3",
"tag4"
]
}
]
}
Is it possible to obtain this scheme with aggregation-group functions?
Thanks you
It is possible to group by the category, but not in the way you present it. This is really a good thing because your "category" is actually data and you should really not be representing "data" as a "key", in either your storage or your output.
So it would really be recommended to transform like this:
db.collection.aggregate([
{ "$match": { "user": 1 } },
{ "$group": {
"_id": "$category",
"items": {
"$push": {
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
}
}
}},
{ "$group": {
"_id": null,
"categories": {
"$push": {
"_id": "$_id",
"items": "$items"
}
}
}}
])
You get output like this:
{
"_id" : null,
"categories" : [
{
"_id" : "bikes",
"items" : [
{
"_id": 3,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
},
{
"_id" : "phones",
"items" : [
{
"_id": 1,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id": 2,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
]
}
]
}
It really is better to have generic keys names that do not alter with the changing data. This is in fact the object oriented pattern.
If you really think you need the "data as keys" here, for the aggregation framework you either live with knowing the "categories" that you are expecting, or are otherwise prepared to generate the pipeline stages:
db.utest.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$unwind": "$phones" },
{ "$match": { "phones": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$push": "$phones" },
"bikes": { "$first": "$bikes" }
}},
{ "$unwind": "$bikes" },
{ "$match": { "bikes": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$first": "$phones" },
"bikes": { "$push": "$bikes" }
}},
{ "$project": {
"_id": 0,
"phones": 1,
"bikes": 1
}}
])
You can shorten that a bit with MongoDB 2.6, since you can just filter out the false values with the $setDifference operator:
db.collection.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$project": {
"_id": 0,
"phones": { "$setDifference": ["$phones",[false]] },
"bikes": { "$setDifference": ["$bikes",[false]] }
}}
])
Both produce output just how you want it:
{
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
The general case here is that aggregation framework just won't allow field data to be used as a key, so you need to either just group on data or specify the key names yourself.
The only way you get "dynamic" key names is by using mapReduce instead:
db.collection.mapReduce(
function () {
var obj = { };
var category = this.category;
delete this.user;
delete this.category;
obj[category] = [this];
emit(null,obj);
},
function (key,values) {
var reduced = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(key) {
if ( !reduced.hasOwnProperty(key) )
reduced[key] = [];
value[key].forEach(function(item) {
reduced[key].push(item);
});
});
});
return reduced;
},
{
"query": { "user": "1" },
"out": { "inline": 1 }
}
)
So now the key generation is dynamic, but the output is done in a very mapReduce way:
{
"_id" : null,
"value" : {
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
}
So the output is constrained by how mapReduce directs outut and evaluating the JavaScript here will be slower than the native operations of the aggregation framework. More power in manipulation, but that is the trade-off.
To conclude this, if you stick with the pattern then the first way with the aggregation framework is the fastest and best way to do this, plus you could always restructure the result once returned from the server. If you insist on breaking the pattern and need dynamic keys to come from the server then mapReduce will do it where the other aggregation framework is deemed impractical.
Related
I'm having an issue with making count for items returned from an array without assuming or using those fields in my aggregration.
Data structure looks like this:
[
{
"_id": "1",
"title": "Vanella Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "10KG"
},
{
"sugar": "15KG"
}
]
},
{
"_id": "2",
"title": "Pretzels",
"contain": "salt",
"details": [
{
"flavour": "Wheat"
},
{
"weight": "10KG"
},
{
"sugar": "15KG"
}
]
},
{
"_id": "3",
"title": "Rasmalai Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "15KG"
},
{
"sugar": "12KG"
}
]
},
{
"_id": "4",
"title": "Vanella Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "15KG"
},
{
"sugar": "12KG"
}
]
}
]
Output I want:
[
{
"details": {
"flavour": {
"Vanella": 3, //Number of times Vanella present in each document.
"Wheat": 1,
},
"weight": {
"10KG": 2,
"15KG": 2
},
"sugar": {
"12KG": 2,
"15KG": 2
}
}
}
]
Query:
db.collection.aggregate([
{
"$unwind": {
"path": "$details"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$details",
"$$ROOT"
]
}
}
},
{
"$facet": {
"flavour": [
{
"$group": {
"_id": "$flavour",
"sum": {
"$sum": 1
}
}
},
{
"$addFields": {
"flavour": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
],
"weight": [
{
"$group": {
"_id": "$weight",
"sum": {
"$sum": 1
}
}
},
{
"$addFields": {
"weight": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
]
}
},
{
"$addFields": {
"flavour": {
"$reduce": {
"input": {
"$filter": {
"input": {
"$map": {
"input": "$flavour",
"as": "w",
"in": {
"$cond": [
{
"$ne": [
"$$w.flavour",
null
]
},
{
"$let": {
"vars": {
"o": [
[
"$$w.flavour",
"$$w.sum"
]
]
},
"in": {
"$arrayToObject": "$$o"
}
}
},
null
]
}
}
},
"as": "f",
"cond": {
"$ne": [
"$$f",
null
]
}
}
},
"initialValue": {},
"in": {
"$let": {
"vars": {
"d": "$$value",
"p": "$$this"
},
"in": {
"$mergeObjects": [
"$$d",
"$$p"
]
}
}
}
}
},
"weight": {
"$reduce": {
"input": {
"$filter": {
"input": {
"$map": {
"input": "$weight",
"as": "w",
"in": {
"$cond": [
{
"$ne": [
"$$w.weight",
null
]
},
{
"$let": {
"vars": {
"o": [
[
"$$w.weight",
"$$w.sum"
]
]
},
"in": {
"$arrayToObject": "$$o"
}
}
},
null
]
}
}
},
"as": "f",
"cond": {
"$ne": [
"$$f",
null
]
}
}
},
"initialValue": {},
"in": {
"$let": {
"vars": {
"d": "$$value",
"p": "$$this"
},
"in": {
"$mergeObjects": [
"$$d",
"$$p"
]
}
}
}
}
}
}
},
{
"$project": {
"details": "$$ROOT"
}
}
])
Here I'm trying to get the flavour and weight with their count, with manually adding those fields in $filter stage. I want to do it without assuming those keys. So, even if there is 20 items present in array details it will map those items and shows me output with their counts respectively.
I hope you guys understand.
Playground:https://mongoplayground.net/p/j1mzgWvcmvd
You need to change the schema, the thing you want to do is easy, and both those queries are so complicated and slow, even the second that is much smaller has 2 $unwind and 3 $group with 3 $arrayToObject and 8 stages total because of the schema and the schema of the answer.
Don't store data in the keys of the documents, people that are new to MongoDB do those, i was doing it also, but it makes all things harder.(i can't say like never do it but you dont need it here)
Your schema should be something like
{
"_id": "2",
"title": "Pretzels",
"contain": "salt",
"details": [
{
"type" : "flavour",
"value" : "Wheat"
},
{
"type" : "weight",
"value" : "10KG"
},
{
"type" : "sugar",
"value" : "15KG"
}
]
}
See this example
Converts your schema, to the new schema and produce the results you
want but without data in keys (the first part you wouldnt need it you would need only the bellow query if you had that schema from start)
Query with the new Schema (no data in keys)
[{"$unwind": { "path": "$details"}},
{"$replaceRoot": {"newRoot": "$details"}},
{
"$group": {
"_id": {
"type": "$type",
"value": "$value"
},
"sum": {"$sum": 1}
}
},
{
"$replaceRoot": {
"newRoot": {"$mergeObjects": ["$_id","$$ROOT"]}
}
},
{"$project": {"_id": 0}},
{
"$group": {
"_id": "$type",
"values": {
"$push": {
"value": "$value",
"sum": "$sum"
}
}
}
},
{"$addFields": {"type": "$_id"}},
{"$project": {"_id": 0}}
]
MongoDB operators are not made to support for data in keys or dynamic keys(uknown keys) (to do it you do complicated things like the above)
If you want to change your schema, either do it with update in the database,
Or take the documents to the application and do it with javascript, and re-insert.
Even if you solve this question in the next one, you will have again problems.
I'm the guy from Mongodb Forum:
Try this out https://mongoplayground.net/p/tfyfpIkHilQ
I try to learn aggregation concept in MongoDB. I create an object like this for training.
"_id": "601c4bb56e018211b02abbf8",
"isDeleted": false,
"name": "TeacherName1",
"class": "7",
"students": [
{ "_id": "601c4bb56e018211b02abbf9", isDeleted:true, "name": "student-1", "studentGroup": "A", "avgResult": 36},
{ "_id": "601c4bb56e018211b02abbfa", isDeleted:false, "name": "student-2", "studentGroup": "A", "avgResult": 55},
{ "_id": "601c4bb56e018211b02abbfb", isDeleted:false, "name": "student-3", "studentGroup": "B", "avgResult": 44.66},
{ "_id": "601c4bb56e018211b02abbfc", isDeleted:false, "name": "student-4", "studentGroup": "C", "avgResult": 83.66},
{ "_id": "601c4bb56e018211b02abbfd", isDeleted:true, "name": "student-5", "studentGroup": "B", "avgResult": 37},
{ "_id": "601c4bb56e018211b02abbfe", isDeleted:true, "name": "student-6", "studentGroup": "C", "avgResult": 39.66},
]
I want to get teacher information and deleted students (isDeleted=true). So I try to get this result.
"_id": "601c4bb56e018211b02abbf8",
"isDeleted": false,
"name": "TeacherName1",
"class": "7",
"students": [
{ "_id": "601c4bb56e018211b02abbf9", isDeleted:true, ...},
{ "_id": "601c4bb56e018211b02abbfd", isDeleted:true, ...},
{ "_id": "601c4bb56e018211b02abbfe", isDeleted:true, ...},
]
I get result with use $unwind and $filter. But can I get this result with only $elemMatch?
If I use this query
this.aggregate([
{
$match: {
_id: mongoose.Types.ObjectId("601c4bb56e018211b02abbf8"),
isDeleted: false,
"students.isDeleted":true
},
},
]);
It returns all object.
If I try this
this.aggregate([
{
$match: {
_id: mongoose.Types.ObjectId("601c4bb56e018211b02abbf8"),
isDeleted: false,
students:{
$elemMatch:{
isDeleted:true
}
}
},
},
]);
It returns all object.
$match will just give you the whole doc should you match
however you may use $project with $filter using another stage
given
db.dummy.insert({studs:[{isDeleted:true, name:'a'},{isDeleted: true, name:'b'},{name:'c'}]})
db.dummy.insert({studs:[{name:'c'}]})
> match = {$match:{studs:{$elemMatch: {isDeleted:true}}}}
> project = {$project: { deletedStuds: {$filter:{input: '$studs', as:'stud', cond:{ $eq: ['$$stud.isDeleted', true]} } } }}
{
"$project" : {
"deletedStuds" : {
"$filter" : {
"input" : "$studs",
"as" : "stud",
"cond" : {
"$eq" : [
"$$stud.isDeleted",
true
]
}
}
}
}
}
> db.dummy.aggregate(match, project)
{ "_id" : ObjectId("6020351eb965951ac8a1eb62"), "deletedStuds" : [ { "isDeleted" : true, "name" : "a" }, { "isDeleted" : true, "name" : "b" } ] }
In my collection I have a category array as below.
I receive another array to my API like below
array = ['Chess','Rugby'];
I want to add a condition to my database query such that catName field from category objects exists in array.
currently I'm using the below code to get the results:
postSchemaModel.aggregate([{
"$geoNear": {
"near": { "type": "Point", "coordinates": [parseFloat(long), parseFloat(lat), ] },
"distanceField": "dist.calculated",
"maxDistance": parseInt(maxDistance),
"includeLocs": "dist.location",
"spherical": true
}
},
{ "$match": { "$or": [{ "typology": "post" }, { "typology": "chat_group" }] } },
{
"$match": {
"createdAt": {
"$gte": '2020-07-15 23:54:38.673665',
"$lt": '2020-06-15 23:54:38.673665'
}
}
},
{ "$limit": limit },
{ "$skip": startIndex },
{ "$sort": { "createdAt": -1 } },
{
"$lookup": {
"from": userSchemaModel.collection.name,
"localField": "user_id",
"foreignField": "_id",
"as": "user_id"
}
},
{
"$project": {
"post_data": 1,
"likes": 1,
"commentsCount": 1,
"post_img": 1,
"isUserLiked": 1,
"usersLiked": 1,
'exp_date': 1,
"has_img": 1,
"user_id": {
"img": "$user_id.img",
"_id": "$user_id._id",
"user_name": "$user_id.user_name",
"bday": "$user_id.bday",
"imagesource": "$user_id.imagesource",
"fb_url": "$user_id.fb_url",
},
"typology": 1,
"geometry": 1,
"category": 1,
"created": 1,
"createdAt": 1,
"updatedAt": 1,
}
},
]).then(async function(posts) {
//some code here
}
});
UPDATE : Sample Output
{
"_id": "5f0bd1b7d6ed4f0017e5177c",
"post_data": "bitch boy sudesh",
"likes": 2,
"commentsCount": 1,
"post_img": null,
"isUserLiked": true,
"usersLiked": [
"5f0bfa296ee76f0017f13787",
"5ef60bba10e9090017e2c935"
],
"exp_date": "2020-07-16T00:00:00.000Z",
"has_img": false,
"user_id": [
{
"img": [
"default-user-profile-image.png"
],
"_id": [
"5ef9a7a2922eba0017ce47e0"
],
"user_name": [
"Sudesh"
],
"bday": [
"1997-05-02T00:00:00.000Z"
],
"imagesource": [
"fb"
],
"fb_url": [
"https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=1846836948784193&width=400&ext=1596011605&hash=AeRsB0QJQH7edpRT"
]
}
],
"typology": "post",
"geometry": {
"pintype": "Point",
"_id": "5f0bd1b7d6ed4f0017e5177d",
"coordinates": [
79.9200017,
6.7088167
]
},
"category": [
{
"_id": "5f0bd1b7d6ed4f0017e5177e",
"catID": "5eef80cc5de48230887f3aa8",
"catName": "Chess"
},
{
"_id": "5f0bd1b7d6ed4f0017e5177e",
"catID": "5eef80cc5de48230887f3aa8",
"catName": "Rugby"
}
],
"created": 1594610103626,
"createdAt": "2020-07-13T03:15:03.629Z",
"updatedAt": "2020-07-18T14:02:35.080Z"
}
You can use some method if you only want to get true/false result:
category.some(element => array.includes(element.catName))
If you want to get an array of all the category objects with cat names that also exist in the array then you can filter method:
category.filter(element => array.includes(element.catName))
If you have an object called array in your code and you want to find at array of categories where cat names are in the array then you can add the condition to your $match stage:
{ "$match": { "$or": [{ "typology": "post" }, { "typology": "chat_group" }] }, "category.catName": { $in: array } }
Using another $match with "$elemMatch" solved the problem
"$match": {
"category": { "$elemMatch": { "catName": "Rugby", "catName": "Carrom" } },
}
I have a lot of documents with many attributes. After a specific $match pass, I end up with a subsection. Here it is simplified:
[
{"name": "foo", "code": "bbb"},
{"name": "foo", "code": "aaa"},
{"name": "foo", "code": "aaa"},
{"name": "foo", "code": "aaa"},
{"name": "bar", "code": "aaa"},
{"name": "bar", "code": "aaa"},
{"name": "bar", "code": "aaa"},
{"name": "baz", "code": "aaa"},
{"name": "baz", "code": "aaa"}
]
I would like to count the occurances of certain attributes so I end up with this (simplified):
{
"name": {
"foo": 4,
"bar": 3,
"baz": 2
},
"code": {
"bbb": 1,
"aaa": 8
}
}
(Or something close that I can 'translate' afterwards with Node.js)
I already do a $group stage to count other attributes (differently). Ideally I would $addToSet and also count how many times a similar value was added to the set. But I cannot figure out how.
Alternatively I was thinking to $push to end up with this (simplified):
{
"name": ["foo", "foo", "foo", "foo", "bar", "bar", "bar", "baz", "baz"],
"code": ["bbb", "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", ]
}
But I can't figure out how to turn it into (something close to) the above hypothetical result either.
For single fields alone, the closest I can come is by using the above $push and then I can use $group:
"$group": {
"_id": {"_id": "$_id", "name": "$name"},
"nameCount": {"$sum": 1}
}
Now I have _id.name and nameCount. But I have lost all the previously counted attributes, 20 or so.
Is there a way to do (something close to) what I want?
Note: Using MongoDB 3.2
For MongoDB 3.2 you are pretty much limited to mapReduce if you want to return the "data" values as "keys" in a returned document. There is however the case to consider that you actually "do not need" MongoDB to do that part for you. But to consider the approaches:
Map Reduce
db.stuff.mapReduce(
function() {
emit(null, {
name: { [this.name]: 1 },
code: { [this.code]: 1 }
})
},
function(key,values) {
let obj = { name: {}, code: {} };
values.forEach(value => {
['name','code'].forEach(key => {
Object.keys(value[key]).forEach(k => {
if (!obj[key].hasOwnProperty(k))
obj[key][k] = 0;
obj[key][k] += value[key][k];
})
})
});
return obj;
},
{ "out": { "inline": 1 } }
)
Returns:
{
"_id" : null,
"value" : {
"name" : {
"foo" : 4.0,
"bar" : 3.0,
"baz" : 2.0
},
"code" : {
"bbb" : 1.0,
"aaa" : 8.0
}
}
}
Aggregate
For MongoDB 3.4 and upwards, you can use $arrayToObject to reshape as "key/value" objects. And a bit more efficiently than simply using $push to make two large arrays which would almost certainly break the BSON limit in real world cases.
This "more or less" mirrors the mapReduce() operations:
db.stuff.aggregate([
{ "$project": {
"_id": 0,
"data": [
{ "k": "name", "v": { "k": "$name", "count": 1 } },
{ "k": "code", "v": { "k": "$code", "count": 1 } }
]
}},
{ "$unwind": "$data" },
{ "$group": {
"_id": { "k": "$data.k", "v": "$data.v.k" },
"count": { "$sum": "$data.v.count" }
}},
{ "$group": {
"_id": "$_id.k",
"v": { "$push": { "k": "$_id.v", "v": "$count" } }
}},
{ "$group": {
"_id": null,
"data": { "$push": { "k": "$_id", "v": "$v" } }
}},
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$map": {
"input": "$data",
"in": {
"k": "$$this.k",
"v": { "$arrayToObject": "$$this.v" }
}
}
}
}
}}
])
Which has similar output ( without forcing ordering of keys by applying $sort ):
{
"code" : {
"bbb" : 1.0,
"aaa" : 8.0
},
"name" : {
"baz" : 2.0,
"foo" : 4.0,
"bar" : 3.0
}
}
So it's only really in the final stage where we actually use the new features, and the output up to that point is pretty similar, and would be easy to reshape in code:
{
"_id" : null,
"data" : [
{
"k" : "code",
"v" : [
{
"k" : "bbb",
"v" : 1.0
},
{
"k" : "aaa",
"v" : 8.0
}
]
},
{
"k" : "name",
"v" : [
{
"k" : "baz",
"v" : 2.0
},
{
"k" : "foo",
"v" : 4.0
},
{
"k" : "bar",
"v" : 3.0
}
]
}
]
}
So in fact we can do just that:
db.stuff.aggregate([
{ "$project": {
"_id": 0,
"data": [
{ "k": "name", "v": { "k": "$name", "count": 1 } },
{ "k": "code", "v": { "k": "$code", "count": 1 } }
]
}},
{ "$unwind": "$data" },
{ "$group": {
"_id": { "k": "$data.k", "v": "$data.v.k" },
"count": { "$sum": "$data.v.count" }
}},
{ "$group": {
"_id": "$_id.k",
"v": { "$push": { "k": "$_id.v", "v": "$count" } }
}},
{ "$group": {
"_id": null,
"data": { "$push": { "k": "$_id", "v": "$v" } }
}},
/*
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$map": {
"input": "$data",
"in": {
"k": "$$this.k",
"v": { "$arrayToObject": "$$this.v" }
}
}
}
}
}}
*/
]).map( doc =>
doc.data.map( d => ({
k: d.k,
v: d.v.reduce((acc,curr) =>
Object.assign(acc,{ [curr.k]: curr.v })
,{}
)
})).reduce((acc,curr) =>
Object.assign(acc,{ [curr.k]: curr.v })
,{}
)
)
Which just goes to show that simply because the aggregation framework does not have the features to use "named keys" in output for earlier versions, you generally do not need them. Since the only place we actually used the new features was in the "final" stage, but we can easily do the same by simply reshaping the final output in client code.
And of course, it's the same result:
[
{
"code" : {
"bbb" : 1.0,
"aaa" : 8.0
},
"name" : {
"baz" : 2.0,
"foo" : 4.0,
"bar" : 3.0
}
}
]
So it helps to learn the lesson of exactly "where" you actually need to apply such transformations. Here it's at the "end" since we do not need that during any "aggregation" stage, and thus you simply reshape the results that can be optimally provided from the aggregation framework itself.
The Bad Ways
As noted, your attempt so far may be fine for small data, but in most real world cases "pushing" all the items in a collection into a single document without reduction is going to break the 16MB BSON Limit.
Where it would actually stay under, then you can use something like this monster with $reduce:
db.stuff.aggregate([
{ "$group": {
"_id": null,
"name": { "$push": "$name" },
"code": { "$push": "$code" }
}},
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$map": {
"input": [
{ "k": "name", "v": "$name" },
{ "k": "code", "v": "$code" }
],
"as": "m",
"in": {
"k": "$$m.k",
"v": {
"$arrayToObject": {
"$reduce": {
"input": "$$m.v",
"initialValue": [],
"in": {
"$cond": {
"if": {
"$in": [
"$$this",
{ "$map": {
"input": "$$value",
"as": "v",
"in": "$$v.k"
}}
]
},
"then": {
"$concatArrays": [
{ "$filter": {
"input": "$$value",
"as": "v",
"cond": { "$ne": [ "$$v.k", "$$this" ] }
}},
[{
"k": "$$this",
"v": {
"$sum": [
{ "$arrayElemAt": [
"$$value.v",
{ "$indexOfArray": [ "$$value.k", "$$this" ] }
]},
1
]
}
}]
]
},
"else": {
"$concatArrays": [
"$$value",
[{ "k": "$$this", "v": 1 }]
]
}
}
}
}
}
}
}
}
}
}
}}
])
Which produces:
{
"name" : {
"foo" : 4.0,
"bar" : 3.0,
"baz" : 2.0
},
"code" : {
"bbb" : 1.0,
"aaa" : 8.0
}
}
Or indeed the same reduction process in client code:
db.stuff.aggregate([
{ "$group": {
"_id": null,
"name": { "$push": "$name" },
"code": { "$push": "$code" }
}},
]).map( doc =>
["name","code"].reduce((acc,curr) =>
Object.assign(
acc,
{ [curr]: doc[curr].reduce((acc,curr) =>
Object.assign(acc,
(acc.hasOwnProperty(curr))
? { [curr]: acc[curr] += 1 }
: { [curr]: 1 }
),{}
)
}
),
{}
)
)
Which again has the same result:
{
"name" : {
"foo" : 4.0,
"bar" : 3.0,
"baz" : 2.0
},
"code" : {
"bbb" : 1.0,
"aaa" : 8.0
}
}
I have the following document in my collection.
{
"_id" : ObjectId("55961a28bffebcb8058b4570"),
"title" : "BackOffice 2",
"cts" : NumberLong(1435900456),
"todo_items" : [
{
"id" : "55961a42bffebcb7058b4570",
"task_desc" : "test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
},
{
"id" : "559639afbffebcc7098b45a6",
"task_desc" : "test 2",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1435911809)
},
{
"id" : "559a22f5bffebcb0048b476c",
"task_desc" : "test 3",
}
],
"uts" : NumberLong(1436164853)
}
I need an aggregation query to perform following, if there is field "completed_by" and "completed_date" and if there is a value which is not null push in to the "completed" array field, otherwise push them into the "incomplete" field.
Following is a sample result I want.
{
"_id" : ObjectId("55961a28bffebcb8058b4570"),
"completed" : [
{
"id":"557fccb5bffebcf7048b457c",
"title":"test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
},
{
"id":"557fccb5bffebcf7048b457c",
"title":"test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
}
],
"incomplete":[
{
"id" : "559a22f5bffebcb0048b476c",
"title" : "test 3"
}
]
}
As long as your "array" items have "distinct" identifiers ( which they have ) there are a couple of approaches to this;
Firstly, without actually "aggregating accross documents":
db.collection.aggregate([
{ "$project": {
"title": 1,
"cts": 1,
"completed": { "$setDifference": [
{ "$map": {
"input": "$todo_items",
"as": "i",
"in": {
"$cond": [
"$$i.completed_date",
"$$i",
false
]
}
}},
[false]
]},
"incomplete": { "$setDifference": [
{ "$map": {
"input": "$todo_items",
"as": "i",
"in": {
"$cond": [
"$$i.completed_date",
false,
"$$i"
]
}
}},
[false]
]}
}}
])
That requires that you at least have MongoDB 2.6 available on the server in order to use the required $map and $setDifference operators. It's pretty fast considering that all the work is done in a single $project stage.
The alternative, which you should only use when "aggregating across documents", is available to all versions supporting the aggregation framework post MongoDB 2.2:
db.collection.aggregate([
{ "$unwind": "$todo_items" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": {
"$addToSet": {
"$cond": [
"$todo_items.completed_date",
"$todo_items",
null
]
}
},
"incomplete": {
"$addToSet": {
"$cond": [
"$todo_items.completed_date",
null,
"$todo_items",
]
}
}
}},
{ "$unwind": "$completed" },
{ "$match": { "completed": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": { "$push": "$completed" },
"incomplete": { "$first": "$incomplete" }
}}
{ "$unwind": "$incomplete" },
{ "$match": { "incomplete": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": { "$first": "$completed" },
"incomplete": { "$push": "$incomplete" }
}}
])
Which isn't entirely all there since you need to cater for conditions where an array may end up empty. But that is not the real lesson here since MongoDB 2.6 is already a couple of years in circulation.
In aggregation, you cannot really exclude the "null/false" results, but you can "filter" them.
Also, unless you are actually "aggregating accross documents" as mentioned already, then the second form with $unwind to process the arrays comes with a "lot" of overhead. So you really should be altering the array contents in your client code as each document is read.
Can you please check the below :
db.collection.aggregate([
{$unwind : "$todo_items"},
{$group: {_id : "$_id" , completed : {{$cond :
{
if : { $and : [ {"todo_items.completed_by" : {$exists: true, $ne : null }},
{"todo_items.completed_date" : {$exists : true, $ne : null}} ] } },
then : {$push : {"old_completed" : "$todo_items"}},
else: {$push : {"old_incompleted" : "$todo_items"}}
} } } },
{$project: {_id : "$_id", completed : "$completed.old_completed" ,
incompleted : "$completed.old_incompleted"}}
]);