I try to learn aggregation concept in MongoDB. I create an object like this for training.
"_id": "601c4bb56e018211b02abbf8",
"isDeleted": false,
"name": "TeacherName1",
"class": "7",
"students": [
{ "_id": "601c4bb56e018211b02abbf9", isDeleted:true, "name": "student-1", "studentGroup": "A", "avgResult": 36},
{ "_id": "601c4bb56e018211b02abbfa", isDeleted:false, "name": "student-2", "studentGroup": "A", "avgResult": 55},
{ "_id": "601c4bb56e018211b02abbfb", isDeleted:false, "name": "student-3", "studentGroup": "B", "avgResult": 44.66},
{ "_id": "601c4bb56e018211b02abbfc", isDeleted:false, "name": "student-4", "studentGroup": "C", "avgResult": 83.66},
{ "_id": "601c4bb56e018211b02abbfd", isDeleted:true, "name": "student-5", "studentGroup": "B", "avgResult": 37},
{ "_id": "601c4bb56e018211b02abbfe", isDeleted:true, "name": "student-6", "studentGroup": "C", "avgResult": 39.66},
]
I want to get teacher information and deleted students (isDeleted=true). So I try to get this result.
"_id": "601c4bb56e018211b02abbf8",
"isDeleted": false,
"name": "TeacherName1",
"class": "7",
"students": [
{ "_id": "601c4bb56e018211b02abbf9", isDeleted:true, ...},
{ "_id": "601c4bb56e018211b02abbfd", isDeleted:true, ...},
{ "_id": "601c4bb56e018211b02abbfe", isDeleted:true, ...},
]
I get result with use $unwind and $filter. But can I get this result with only $elemMatch?
If I use this query
this.aggregate([
{
$match: {
_id: mongoose.Types.ObjectId("601c4bb56e018211b02abbf8"),
isDeleted: false,
"students.isDeleted":true
},
},
]);
It returns all object.
If I try this
this.aggregate([
{
$match: {
_id: mongoose.Types.ObjectId("601c4bb56e018211b02abbf8"),
isDeleted: false,
students:{
$elemMatch:{
isDeleted:true
}
}
},
},
]);
It returns all object.
$match will just give you the whole doc should you match
however you may use $project with $filter using another stage
given
db.dummy.insert({studs:[{isDeleted:true, name:'a'},{isDeleted: true, name:'b'},{name:'c'}]})
db.dummy.insert({studs:[{name:'c'}]})
> match = {$match:{studs:{$elemMatch: {isDeleted:true}}}}
> project = {$project: { deletedStuds: {$filter:{input: '$studs', as:'stud', cond:{ $eq: ['$$stud.isDeleted', true]} } } }}
{
"$project" : {
"deletedStuds" : {
"$filter" : {
"input" : "$studs",
"as" : "stud",
"cond" : {
"$eq" : [
"$$stud.isDeleted",
true
]
}
}
}
}
}
> db.dummy.aggregate(match, project)
{ "_id" : ObjectId("6020351eb965951ac8a1eb62"), "deletedStuds" : [ { "isDeleted" : true, "name" : "a" }, { "isDeleted" : true, "name" : "b" } ] }
Related
In my API im receiving a JSON with 2 arrays, one Employees and another with Managers, the goal is to add the Managers to each Employee, knowing that the 1st index of the managers corresponds to the 1st index of the employees and so on:
This is an example of the request to the API
{
"employees" : [ {
"code" : "111111",
"name" : "Zé"
},
{
"code" : "222222",
"name" : "João"
},
{
"code" : "444444",
"name" : "António"
}],
"managers" : [
[
{
"name": "vitor",
"level" : "1"
},
{
"name": "Antonio",
"level" : "2"
}
],
[
{
"name": "Jose",
"level" : "1"
},
{
"name": "Ines",
"level" : "2"
}
],
[
{
"name": "Luis",
"level" : "1"
},
{
"name": "Ana",
"level" : "2"
}
]
]
}
and my goal is to get something like this:
[
{
"code": "111111",
"name": "Zé",
"managers": [
{
"name": "vitor",
"level": "1"
},
{
"name": "Antonio",
"level": "2"
}
]
},
{
"code": "222222",
"name": "João",
"managers": [
{
"name": "Jose",
"level": "1"
},
{
"name": "Ines",
"level": "2"
}
]
},
{
"code": "444444",
"name": "António",
"managers": [
{
"name": "Jose",
"level": "1"
},
{
"name": "Ines",
"level": "2"
}
]
}
]
I already tried some things but i can't get the expected result :(
Hope you can help me!! Thanks
function associate_employee_managers({ employees, managers }) {
return employees && employees.length
&& employees
.map((emp, index) => {
return {
...emp,
["managers"]: managers && managers[index] || []
};
}) || {};
}
Considerations
There is 1-1 correspondence(based on index) between entries inside employees array and managers array
Illustration
function associate_employee_managers({
employees,
managers
}) {
return employees && employees.length &&
employees
.map((emp, index) => {
return {
...emp,
["managers"]: managers && managers[index] || []
};
}) || {};
}
const jsonResponse = {
"employees": [{
"code": "111111",
"name": "Zé"
},
{
"code": "222222",
"name": "João"
},
{
"code": "444444",
"name": "António"
}
],
"managers": [
[{
"name": "vitor",
"level": "1"
},
{
"name": "Antonio",
"level": "2"
}
],
[{
"name": "Jose",
"level": "1"
},
{
"name": "Ines",
"level": "2"
}
],
[{
"name": "Luis",
"level": "1"
},
{
"name": "Ana",
"level": "2"
}
]
]
}
console.log(associate_employee_managers(jsonResponse));
WYSIWYG => WHAT YOU SHOW IS WHAT YOU GET
I'm having an issue with making count for items returned from an array without assuming or using those fields in my aggregration.
Data structure looks like this:
[
{
"_id": "1",
"title": "Vanella Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "10KG"
},
{
"sugar": "15KG"
}
]
},
{
"_id": "2",
"title": "Pretzels",
"contain": "salt",
"details": [
{
"flavour": "Wheat"
},
{
"weight": "10KG"
},
{
"sugar": "15KG"
}
]
},
{
"_id": "3",
"title": "Rasmalai Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "15KG"
},
{
"sugar": "12KG"
}
]
},
{
"_id": "4",
"title": "Vanella Icream",
"contain": "sugar",
"details": [
{
"flavour": "Vanella"
},
{
"weight": "15KG"
},
{
"sugar": "12KG"
}
]
}
]
Output I want:
[
{
"details": {
"flavour": {
"Vanella": 3, //Number of times Vanella present in each document.
"Wheat": 1,
},
"weight": {
"10KG": 2,
"15KG": 2
},
"sugar": {
"12KG": 2,
"15KG": 2
}
}
}
]
Query:
db.collection.aggregate([
{
"$unwind": {
"path": "$details"
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$details",
"$$ROOT"
]
}
}
},
{
"$facet": {
"flavour": [
{
"$group": {
"_id": "$flavour",
"sum": {
"$sum": 1
}
}
},
{
"$addFields": {
"flavour": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
],
"weight": [
{
"$group": {
"_id": "$weight",
"sum": {
"$sum": 1
}
}
},
{
"$addFields": {
"weight": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
]
}
},
{
"$addFields": {
"flavour": {
"$reduce": {
"input": {
"$filter": {
"input": {
"$map": {
"input": "$flavour",
"as": "w",
"in": {
"$cond": [
{
"$ne": [
"$$w.flavour",
null
]
},
{
"$let": {
"vars": {
"o": [
[
"$$w.flavour",
"$$w.sum"
]
]
},
"in": {
"$arrayToObject": "$$o"
}
}
},
null
]
}
}
},
"as": "f",
"cond": {
"$ne": [
"$$f",
null
]
}
}
},
"initialValue": {},
"in": {
"$let": {
"vars": {
"d": "$$value",
"p": "$$this"
},
"in": {
"$mergeObjects": [
"$$d",
"$$p"
]
}
}
}
}
},
"weight": {
"$reduce": {
"input": {
"$filter": {
"input": {
"$map": {
"input": "$weight",
"as": "w",
"in": {
"$cond": [
{
"$ne": [
"$$w.weight",
null
]
},
{
"$let": {
"vars": {
"o": [
[
"$$w.weight",
"$$w.sum"
]
]
},
"in": {
"$arrayToObject": "$$o"
}
}
},
null
]
}
}
},
"as": "f",
"cond": {
"$ne": [
"$$f",
null
]
}
}
},
"initialValue": {},
"in": {
"$let": {
"vars": {
"d": "$$value",
"p": "$$this"
},
"in": {
"$mergeObjects": [
"$$d",
"$$p"
]
}
}
}
}
}
}
},
{
"$project": {
"details": "$$ROOT"
}
}
])
Here I'm trying to get the flavour and weight with their count, with manually adding those fields in $filter stage. I want to do it without assuming those keys. So, even if there is 20 items present in array details it will map those items and shows me output with their counts respectively.
I hope you guys understand.
Playground:https://mongoplayground.net/p/j1mzgWvcmvd
You need to change the schema, the thing you want to do is easy, and both those queries are so complicated and slow, even the second that is much smaller has 2 $unwind and 3 $group with 3 $arrayToObject and 8 stages total because of the schema and the schema of the answer.
Don't store data in the keys of the documents, people that are new to MongoDB do those, i was doing it also, but it makes all things harder.(i can't say like never do it but you dont need it here)
Your schema should be something like
{
"_id": "2",
"title": "Pretzels",
"contain": "salt",
"details": [
{
"type" : "flavour",
"value" : "Wheat"
},
{
"type" : "weight",
"value" : "10KG"
},
{
"type" : "sugar",
"value" : "15KG"
}
]
}
See this example
Converts your schema, to the new schema and produce the results you
want but without data in keys (the first part you wouldnt need it you would need only the bellow query if you had that schema from start)
Query with the new Schema (no data in keys)
[{"$unwind": { "path": "$details"}},
{"$replaceRoot": {"newRoot": "$details"}},
{
"$group": {
"_id": {
"type": "$type",
"value": "$value"
},
"sum": {"$sum": 1}
}
},
{
"$replaceRoot": {
"newRoot": {"$mergeObjects": ["$_id","$$ROOT"]}
}
},
{"$project": {"_id": 0}},
{
"$group": {
"_id": "$type",
"values": {
"$push": {
"value": "$value",
"sum": "$sum"
}
}
}
},
{"$addFields": {"type": "$_id"}},
{"$project": {"_id": 0}}
]
MongoDB operators are not made to support for data in keys or dynamic keys(uknown keys) (to do it you do complicated things like the above)
If you want to change your schema, either do it with update in the database,
Or take the documents to the application and do it with javascript, and re-insert.
Even if you solve this question in the next one, you will have again problems.
I'm the guy from Mongodb Forum:
Try this out https://mongoplayground.net/p/tfyfpIkHilQ
In my project I have user objects like this.
{
"_id": "1",
"username": "RAggro",
"name": "Vardan Tadevosyan"
},
{
"_id": "2",
"username": "XACHIK",
"name": "XACHIK"
},
{
"_id": "3",
"username": "vardar",
"name": "Vardan Gukoyan"
},
{
"_id": "4",
"username": "Gordey",
"name": "Gordey Gordeev"
},
{
"_id": "5",
"username": "id220107973",
"name": "Vardan Ayvazyan"
},
{
"_id": "6",
"username": "vvardanyan4",
"name": "Vardan Vardanyan"
},
{
"_id": "7",
"username": "svardan",
"name": "Vardan Sargsyan"
}
And I have list of _id-s, like [51,3,9,11,6, 2].
I whant to query users by 'name' and 'username', orderid like first comes users that contains in ids array then others
query: {
multi_match: {
query: "vardan",
fields: ["name", "username"],
operator: "or"
},
boosting: {
positive: {
term: {
_id: [51,3,9,11,6, 2]
}
},
positive_boost: 2.0
}
}
So the expected result is:
{
"_id": "3",
"username": "vardar",
"name": "Vardan Gukoyan"
},
{
"_id": "6",
"username": "vvardanyan4",
"name": "Vardan Vardanyan"
},
{
"_id": "1",
"username": "RAggro",
"name": "Vardan Tadevosyan"
},
{
"_id": "5",
"username": "id220107973",
"name": "Vardan Ayvazyan"
},
{
"_id": "7",
"username": "svardan",
"name": "Vardan Sargsyan"
}
But I'm fetching empty array,
Please, help how can I modify my query to reach expected ordered result.
You can do it easily using a bool/should clause that will boost the documents whose IDs are within the specified group:
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "vardan",
"fields": [
"name",
"username"
],
"operator": "or"
}
}
],
"should": [
{
"ids": {
"values": ["51","3","9","11","6","2"]
}
}
]
}
}
}
It works using this query:
{
query:{
"bool": {
"must": [{
"multi_match": {
"query": "vardan",
"fields": [
"name",
"username"
],
"operator": "or"
}
}],
"should": [{
"terms": {
"_id": ["51","3","9","11","6","2"],
"boost": 100
}
}]
}
}
}
I have a dataset that looks something like this:
{
"id": "02741544",
"items": [{
"item": "A"
}]
}, {
"id": "02472691",
"items": [{
"item": "A"
}, {
"item": "B"
}, {
"item": "C"
}]
}, {
"id": "01316523",
"items": [{
"item": "A"
}, {
"item": "B"
}]
}, {
"id": "01316526",
"items": [{
"item": "A"
}, {
"item": "B"
}]
}, {
"id": "01316529",
"items": [{
"item": "A"
}, {
"item": "D"
}]
},
I'm trying to craft a query that will give me an output that looks like this:
{
"item": "A",
"ids": [{
"id": "02741544"
}, {
"id": "02472691"
}, {
"id": "01316523"
}, {
"id": "01316526"
}, {
"id": "01316529"
}]
}, {
"item": "B",
"ids": [{
"id": "02472691"
}, {
"id": "01316523"
}, {
"id": "01316526"
}]
}, {
"item": "C",
"ids": [{
"id": "02472691"
}]
}, {
"item": "D",
"ids": [{
"id": "02472691"
}]
},
Basically, I'm trying to get the distinct items from the item array in the object, and then returning an array of ids for each obj that has that item in it's item array.
Better use the aggregation framework in which you need to run an operation that consists of the following pipeline steps (in the given order):
$unwind - This initial step will flatten the items array i.e. it produces a copy of each document per array entry. This is necessary for processing the documents further down the pipeline as "denormalised" documents which you can aggregate as groups.
$group - This will group the flattened documents by the item subdocument key and create the ids list by using the $push accumulator operator.
-- UPDATE --
As #AminJ pointed out in the comments, if items can have duplicate item values and you don't want duplicate ids in the result you can use $addToSet instead of $push
The following example demonstrates this:
db.collection.aggregate([
{ "$unwind": "$items" },
{
"$group": {
"_id": "$items.item",
"ids": {
"$push": { "id": "$id" } /* or use
"$addToSet": { "id": "$id" } if you don't want duplicate ids */
}
}
}
])
Sample Output
{
"_id" : "A",
"ids" : [
{ "id" : "02741544" },
{ "id" : "02472691" },
{ "id" : "01316523" },
{ "id" : "01316526" },
{ "id" : "01316529" }
]
}
/* 2 */
{
"_id" : "B",
"ids" : [
{ "id" : "02472691" },
{ "id" : "01316523" },
{ "id" : "01316526" }
]
}
/* 3 */
{
"_id" : "C",
"ids" : [
{ "id" : "02472691" }
]
}
/* 4 */
{
"_id" : "D",
"ids" : [
{ "id" : "01316529" }
]
}
The result from an aggregate() function is a cursor to the documents produced by the final stage of the aggregation pipeline operation. So if you want the results in an array you can use the cursor's toArray() method which returns an array that contains all the documents from it.
For example:
var pipeline = [
{ "$unwind": "$items" },
{
"$group": {
"_id": "$items.item",
"ids": {
"$push": { "id": "$id" } /* or use
"$addToSet": { "id": "$id" } if you don't want duplicate ids */
}
}
}
],
results = db.collection.aggregate(pipeline).toArray();
printjson(results);
Here's a solution using an aggregation pipeline:
db.col.aggregate([
{
$unwind: "$items"
},
{
$project: {
id: 1,
item: "$items.item"
}
},
{
$group: {
_id: "$item",
ids: {
$push: "$id"
}
}
}
])
I have this example items collection:
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag2",
"tag3"
]
},
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"category": "bikes",
"user": "1",
"tags": [
"tag3",
"tag4"
]
},
{
"_id": "4",
"field1": "value1",
"field2": "value2",
"category": "cars",
"user": "2",
"tags": [
"tag1",
"tag2"
]
}
I would to search items created by specific user (ie user: 1) and display them by category field. Result:
{
"phones": [
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"tags": [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"tags": [
"tag3",
"tag4"
]
}
]
}
Is it possible to obtain this scheme with aggregation-group functions?
Thanks you
It is possible to group by the category, but not in the way you present it. This is really a good thing because your "category" is actually data and you should really not be representing "data" as a "key", in either your storage or your output.
So it would really be recommended to transform like this:
db.collection.aggregate([
{ "$match": { "user": 1 } },
{ "$group": {
"_id": "$category",
"items": {
"$push": {
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
}
}
}},
{ "$group": {
"_id": null,
"categories": {
"$push": {
"_id": "$_id",
"items": "$items"
}
}
}}
])
You get output like this:
{
"_id" : null,
"categories" : [
{
"_id" : "bikes",
"items" : [
{
"_id": 3,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
},
{
"_id" : "phones",
"items" : [
{
"_id": 1,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id": 2,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
]
}
]
}
It really is better to have generic keys names that do not alter with the changing data. This is in fact the object oriented pattern.
If you really think you need the "data as keys" here, for the aggregation framework you either live with knowing the "categories" that you are expecting, or are otherwise prepared to generate the pipeline stages:
db.utest.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$unwind": "$phones" },
{ "$match": { "phones": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$push": "$phones" },
"bikes": { "$first": "$bikes" }
}},
{ "$unwind": "$bikes" },
{ "$match": { "bikes": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$first": "$phones" },
"bikes": { "$push": "$bikes" }
}},
{ "$project": {
"_id": 0,
"phones": 1,
"bikes": 1
}}
])
You can shorten that a bit with MongoDB 2.6, since you can just filter out the false values with the $setDifference operator:
db.collection.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$project": {
"_id": 0,
"phones": { "$setDifference": ["$phones",[false]] },
"bikes": { "$setDifference": ["$bikes",[false]] }
}}
])
Both produce output just how you want it:
{
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
The general case here is that aggregation framework just won't allow field data to be used as a key, so you need to either just group on data or specify the key names yourself.
The only way you get "dynamic" key names is by using mapReduce instead:
db.collection.mapReduce(
function () {
var obj = { };
var category = this.category;
delete this.user;
delete this.category;
obj[category] = [this];
emit(null,obj);
},
function (key,values) {
var reduced = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(key) {
if ( !reduced.hasOwnProperty(key) )
reduced[key] = [];
value[key].forEach(function(item) {
reduced[key].push(item);
});
});
});
return reduced;
},
{
"query": { "user": "1" },
"out": { "inline": 1 }
}
)
So now the key generation is dynamic, but the output is done in a very mapReduce way:
{
"_id" : null,
"value" : {
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
}
So the output is constrained by how mapReduce directs outut and evaluating the JavaScript here will be slower than the native operations of the aggregation framework. More power in manipulation, but that is the trade-off.
To conclude this, if you stick with the pattern then the first way with the aggregation framework is the fastest and best way to do this, plus you could always restructure the result once returned from the server. If you insist on breaking the pattern and need dynamic keys to come from the server then mapReduce will do it where the other aggregation framework is deemed impractical.