MongoDB Aggregate Group and Summarise - javascript

I have an aggregate function that works well by grouping data from 2 collections by Month.
var pipeline = [
{
"$match": {
"merchant": new ObjectID("5f2a4e4efb740d9c6e810d67")
}
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m",
"date": "$purchaseDate"
}
},
"totalWarrantyWholesalePrice": {
"$sum": "$warrantyWholesalePrice"
},
"totalWarrantyPrice": {
"$sum": "$warrantyPrice"
},
"totalSaleAmount": {
"$sum": "$purchasePrice"
},
"totalCoverAmount": {
"$sum": "$suggestedRetailPrice"
},
"totalContracts": {
"$sum": 1.0
}
}
},
{
"$lookup": {
"from": "transactions",
"let": {
"statementMonth": "$_id"
},
"pipeline": [
{
"$addFields": {
"transMonth": {
"$dateToString": {
"format": "%Y-%m",
"date": "$date"
}
}
}
},
{
"$match": {
"$expr": {
"$eq": [
"$$statementMonth",
"$transMonth"
]
}
}
}
],
"as": "transactions"
}
},
{
"$unwind": {
"path": "$transactions",
"preserveNullAndEmptyArrays": true
}
},
{
"$group": {
"_id": "$_id",
"totalContracts": {
"$first": "$totalContracts"
},
"totalWarrantyWholesalePrice": {
"$first": "$totalWarrantyWholesalePrice"
},
"totalWarrantyPrice": {
"$first": "$totalWarrantyPrice"
},
"totalSaleAmount": {
"$first": "$totalSaleAmount"
},
"totalCoverAmount": {
"$first": "$totalCoverAmount"
},
"totalAmountPaid": {
"$sum": "$transactions.amountPaid"
},
"totalAmountDue": {
"$sum": "$totalAmountDue"
}
}
},
{
"$addFields": {
"totalAmountDue": {
"$subtract": [
"$totalWarrantyPrice",
"$totalWarrantyWholesalePrice"
]
}
}
},
{
"$project": {
"_id": 1.0,
"totalContracts": 1.0,
"totalWarrantyWholesalePrice": 1.0,
"totalWarrantyPrice": 1.0,
"totalSaleAmount": 1.0,
"totalCoverAmount": 1.0,
"totalAmountPaid": 1.0,
"totalAmountDue": {
"$round": [
"$totalAmountDue",
2.0
]
},
"balance": {
"$round": [
{
"$subtract": [
"$totalAmountDue",
"$totalAmountPaid"
]
},
2.0
]
},
"status": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
{
"$subtract": [
"$totalAmountDue",
"$totalAmountPaid"
]
},
0.0
]
},
"then": "PAID"
},
{
"case": {
"$and": [
{
"$gt": [
{
"$subtract": [
"$totalAmountDue",
"$totalAmountPaid"
]
},
0.0
]
},
{
"$gt": [
"$totalAmountPaid",
0.0
]
}
]
},
"then": "PARTIAL"
}
],
"default": "DUE"
}
}
}
},
{
"$sort": {
"_id": -1.0
}
}
];
This produces the following output:
{
"_id" : "2020-08",
"totalContracts" : 10.0,
"totalWarrantyWholesalePrice" : NumberInt(109),
"totalWarrantyPrice" : 163.55,
"totalSaleAmount" : NumberInt(9000),
"totalCoverAmount" : NumberInt(10000),
"totalAmountPaid" : 47.0,
"totalAmountDue" : 54.55,
"balance" : 7.55,
"status" : "PARTIAL"
}
{
"_id" : "2020-07",
"totalContracts" : 1.0,
"totalWarrantyWholesalePrice" : NumberInt(23),
"totalWarrantyPrice" : NumberInt(40),
"totalSaleAmount" : NumberInt(900),
"totalCoverAmount" : NumberInt(1000),
"totalAmountPaid" : NumberInt(0),
"totalAmountDue" : NumberInt(17),
"balance" : NumberInt(17),
"status" : "DUE"
}
{
"_id" : "2020-06",
"totalContracts" : 1.0,
"totalWarrantyWholesalePrice" : NumberInt(0),
"totalWarrantyPrice" : NumberInt(0),
"totalSaleAmount" : NumberInt(900),
"totalCoverAmount" : NumberInt(1000),
"totalAmountPaid" : NumberInt(0),
"totalAmountDue" : NumberInt(0),
"balance" : NumberInt(0),
"status" : "PAID"
}
However, I would like to summarise the monthly groups, while also displaying the monthly summaries. I can work out how to complete either with two aggregators, however I would like to consolidate this into one. How can I achieve the following?
{
lifetimeTotals: {
"totalContracts" : 12.0,
"totalWarrantyWholesalePrice" : NumberInt(132),
"totalWarrantyPrice" : 203.55,
"totalSaleAmount" : NumberInt(9900),
"totalCoverAmount" : NumberInt(11000),
"totalAmountPaid" : 47.0,
"totalAmountDue" : 71.55,
"balance" : 24.55,
},
monthTotals: {
"2020-08": {
"totalContracts" : 10.0,
"totalWarrantyWholesalePrice" : NumberInt(109),
"totalWarrantyPrice" : 163.55,
"totalSaleAmount" : NumberInt(9000),
"totalCoverAmount" : NumberInt(10000),
"totalAmountPaid" : 47.0,
"totalAmountDue" : 54.55,
"balance" : 7.55,
"status" : "PARTIAL"
},
"2020-07": {
"totalContracts" : 1.0,
"totalWarrantyWholesalePrice" : NumberInt(23),
"totalWarrantyPrice" : NumberInt(40),
"totalSaleAmount" : NumberInt(900),
"totalCoverAmount" : NumberInt(1000),
"totalAmountPaid" : NumberInt(0),
"totalAmountDue" : NumberInt(17),
"balance" : NumberInt(17),
"status" : "DUE"
}
}
},

You can acheive this with $facet. I wrote the a mongo query based on your last output.
$facet helps you to categorize the incoming fields. So by using $group, we get all sum inside lifetimeTotals[] as one field
$map helps to run whole / modify the current objects.
$arrayToObject is alwasy looking for key value pair. (k:v). We already got it with $map. key (k) as "_id" and value (v) as "{REST OF OTHER FIELDS}".
Here is the code
[
{
$facet: {
lifetimeTotals: [
{
$match: {
_id: {
$exists: true
}
}
},
{
$group: {
_id: null,
totalContracts: {
$sum: "$totalContracts"
},
totalWarrantyWholesalePrice: {
$sum: "$totalWarrantyWholesalePrice"
},
totalWarrantyPrice: {
$sum: "$totalWarrantyPrice"
},
/** Rest of other fields*/
}
}
],
months: [
{
$match: {
_id: {
$exists: true
}
}
}
]
}
},
{
$project: {
lifetimeTotals: 1,
monthTotals: {
$arrayToObject: {
$map: {
input: "$months",
in: {
k: "$$this._id",
v: {
totalContracts: "$$this.totalContracts",
totalWarrantyWholesalePrice: "$$this.totalWarrantyWholesalePrice",
totalWarrantyPrice: "$$this.totalWarrantyPrice"
/** Rest of other fields*/
}
}
}
}
}
}
}
]
Working Mongo playground

Related

Modify the query to get the expected result

I am trying to modify query to get expected output.I am able to write the query but not getting the output as expected so that I may bind in the front end.
Actual output:-
{
"_id" : null,
"first" : 3571.0,
"second" : 24.0
}
Expected output:-
{ "_id" : null,
"opertion":edit,
"count" : 3571.0,
}
{ "_id" : null,
"opertion":read,
"count" : 24,
}
{ "_id" : null,
"opertion":update,
"count" : 9000,
}
Myquery:-
db.getCollection('blog').aggregate([
{ "$group": {
"_id": null,
"first": {
"$sum": {
"$cond": [{ "$in": ["$Operation", ["edit1", "edit2"]] }, 1, 0]
}
},
"second": {
"$sum": {
"$cond": [{ "$in": ["$Operation", ["read1", "read2"]] }, 1, 0]
}
}
},
},
])
if you have collection which is like as below:
[
{
"_id" : 1,
"operation" : "edit1" # some extra fields
},
{
"_id" : 2,
"operation" : "read1"
},
{
"_id" : 3,
"operation" : "update1"
}
]
by using $project and $cond you can rename the "read1", "read2" to read or updates to update, or edits to edit then by grouping on the new operation field you can get the count of each operation.
you can use this query:
db.aggregate([
{
"$project": {
"new_operation":
{
"$cond": [
{"$in":
["$Operation", ["edit1", "edit2"]]
}, "edit", {
"$cond": [
{"$in":
["$operation", ["read1", "read2"]]
}, "read", "update"]
}
]
}
}
},
{
"$group": {
"_id": "$new_operation",
"count": {"$sum": 1}
}
}
])

mongodb - aggregating and unwinding foreign ref documents

So for my example database set up:
db.lists.insertMany([
{ _id: "1", name: "list1", included_lists: ["2"], items: ["i1"] },
{ _id: "2", name: "list2", included_lists: [], items: ["i2", "i3"] }
])
db.items.insertMany([
{ _id: "i1", name: "item1", details: [{}, {}, {}] },
{ _id: "i2", name: "item2", details: [{}, {}, {}] },
{ _id: "i3", name: "item3", details: [{}, {}, {}] }
])
I'm currently getting my items data via:
db.lists.aggregate([
{ "$match": { "_id": { "$in": ["1", "2"] } } },
{
"$lookup": {
"from": "items",
"localField": "items",
"foreignField": "_id",
"as": "item"
}
},
{ "$unwind": "$item" },
{
"$facet": {
"results": [
{ "$skip": 0 },
{ "$limit": 10 },
{
"$project": {
name: 1,
item: 1
}
}
],
"total": [
{ "$count": "total" },
]
}
}
]).pretty()
which returns:
{
"results" : [
{
"_id" : "1",
"name" : "list1",
"item" : {
"_id" : "i1",
"name" : "item1",
"details" : [
{
},
{
},
{
}
]
}
},
{
"_id" : "2",
"name" : "list2",
"item" : {
"_id" : "i2",
"name" : "item2",
"details" : [
{
},
{
},
{
}
]
}
},
{
"_id" : "2",
"name" : "list2",
"item" : {
"_id" : "i3",
"name" : "item3",
"details" : [
{
},
{
},
{
}
]
}
}
],
"total" : [
{
"total" : 3
}
]
}
What I'm trying to do, is remove the { "$match": { "_id": { "$in": ["1", "2"] } } }, as I want to remove the query needed to get the array of ids, and instead just get all the ids from list _id and its included_lists ids. Then have return all the items return like my result.
This question is similar to: mongodb - unwinding nested subdocuments but I've reasked due to ambiguity and lack of db documents.
you can do it with graph lookup and then group
db.lists.aggregate([
{ "$match": { "_id": { "$in": ["1"] } } },
{
$graphLookup: {
from: "lists",
startWith: "$_id" ,
connectFromField: "included_lists",
connectToField: "_id",
as: "connected",
}
},
{$unwind:"$connected"},
{ $group:{_id:"$connected._id",items:{$first:'$connected.items'},name:{$first:'$connected.name'}}},
{
"$lookup": {
"from": "items",
"localField": "items",
"foreignField": "_id",
"as": "item"
}
},
{ "$unwind": "$item" },
{
"$facet": {
"results": [
{ "$skip": 0 },
{ "$limit": 10 },
{
"$project": {
name: 1,
item: 1
}
}
],
"total": [
{ "$count": "total" },
]
}
}
]).pretty()

Group by Day and Item Total, but Output Item Names as Keys

I've been trying these examples : https://docs.mongodb.com/manual/reference/operator/aggregation/push/ and
https://docs.mongodb.com/manual/reference/operator/aggregation/addToSet/
Sample documents:
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:05:00Z") }
{ "_id" : 6, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-15T12:05:10Z") }
{ "_id" : 7, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T14:12:12Z") }
But my need is kind of mixes of them. In push example, the results look like:
{
"_id" : { "day" : 46, "year" : 2014 },
"itemsSold" : [
{ "item" : "abc", "quantity" : 10 },
{ "item" : "xyz", "quantity" : 10 },
{ "item" : "xyz", "quantity" : 5 },
{ "item" : "xyz", "quantity" : 10 }
]
}
{
"_id" : { "day" : 34, "year" : 2014 },
"itemsSold" : [
{ "item" : "jkl", "quantity" : 1 },
{ "item" : "xyz", "quantity" : 5 }
]
}
{
"_id" : { "day" : 1, "year" : 2014 },
"itemsSold" : [ { "item" : "abc", "quantity" : 2 } ]
}
And in $addToSet example, results look like:
{ "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : [ "xyz", "abc" ] }
{ "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : [ "xyz", "jkl" ] }
{ "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : [ "abc" ] }
What I want is going to be like:
{ "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : { "xyz": 25, "abc": 10 } }
{ "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : { "xyz": 5, "jkl": 1 ] }
{ "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : { "abc": 2 } }
Is this possible? If it is, any guide, direction would be helpful.
Based on your data you want two $group stages, in order to first collect per "item" and then to add those item details to an array.
Depending on your MongoDB version you have available is how you process the rest. For MongoDB 3.6 ( of from 3.4.7 ) you can use $arrayToObject in order to reshape the data:
db.collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"dayOfYear": { "$dayOfYear": "$date" },
"item": "$item"
},
"total": { "$sum": "$quantity" }
}},
{ "$group": {
"_id": {
"year": "$_id.year",
"dayOfYear": "$_id.dayOfYear"
},
"itemsSold": { "$push": { "k": "$_id.item", "v": "$total" } }
}},
{ "$sort": { "_id": -1 } },
{ "$addFields": {
"itemsSold": { "$arrayToObject": "$itemsSold" }
}}
])
Or with earlier versions, you can simply post process the results. All the "aggregation" work is done before the last stage anyway:
db.collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"dayOfYear": { "$dayOfYear": "$date" },
"item": "$item"
},
"total": { "$sum": "$quantity" }
}},
{ "$group": {
"_id": {
"year": "$_id.year",
"dayOfYear": "$_id.dayOfYear"
},
"itemsSold": { "$push": { "k": "$_id.item", "v": "$total" } }
}},
{ "$sort": { "_id": -1 } },
/*
{ "$addFields": {
"itemsSold": { "$arrayToObject": "$itemsSold" }
}}
*/
]).map( d => Object.assign( d,
{
itemsSold: d.itemsSold.reduce((acc,curr) =>
Object.assign(acc, { [curr.k]: curr.v }),
{}
)
}
))
Either way produces the same desired result:
{
"_id" : {
"year" : 2014,
"dayOfYear" : 46
},
"itemsSold" : {
"xyz" : 25,
"abc" : 10
}
}
{
"_id" : {
"year" : 2014,
"dayOfYear" : 34
},
"itemsSold" : {
"jkl" : 1,
"xyz" : 5
}
}
{
"_id" : {
"year" : 2014,
"dayOfYear" : 1
},
"itemsSold" : {
"abc" : 2
}
}
So you can do things with new aggregation features, but really that end result is just "reshaping" which is usually best left to client processing instead.

MongoDB nested query - BadValue error

Here's the query I'm trying to run:
{ ownerId: 14,
'$or':
[ { '$or':
[ { '$and':
[ { provider: 'INSTAGRAM' },
{ tags:
{ '$or':
[ { '$in': [ 'skyhotel' ] },
{ '$or': [ { '$all': ["skyhotel","excellent"] } ] } ] } } ] },
{ '$and':
[ { provider: 'VKONTAKTE' },
{ tags:
{ '$or':
[ { '$in': [ 'skyhotel' ] },
{ '$or': [ { '$all': ["skyhotel","excellent"] } ] } ] } } ] } ] },
{ '$or':
[ { '$and':
[ { provider: 'INSTAGRAM' },
{ authorLogin: { '$in': [ 'valera92', 'petyan' ] } } ] },
{ '$and':
[ { provider: 'VKONTAKTE' },
{ authorLogin: { '$in': [ 'valera92' ] } } ] } ] },
{ '$or':
[ { '$and':
[ { provider: 'INSTAGRAM' },
{ locationId: { '$in': [ '32454234' ] } } ] } ] },
{ '$or':
[ { '$and':
[ { provider: 'INSTAGRAM' },
{ location: { '$or': [ { '$geoWithin': { '$centerSphere': [ [ '56.829782', '60.593162' ], 0.000012629451881788331 ] } } ] } } ] } ] } ] }
It seems to be malformed according to mongo standards. The error I'm getting is:
Can't canonicalize query: BadValue unknown operator: $or
What would be the proper way in which I could reformat this query?
EDIT: Document example
{
"_id" : ObjectId("570362332ee1a7ab1ecb9899"),
"proextid" : "INSTAGRAM_fdfsfsdfsdfwefwef2r3232",
"updatedAt" : ISODate("2016-04-05T06:58:59.683Z"),
"createdAt" : ISODate("2016-04-05T06:58:59.683Z"),
"ownerId" : 7,
"authorId" : "390599885",
"authorName" : "name",
"authorLogin" : "login",
"authorDetail" : {
"authorPicture" : "url",
"authorLink" : "url"
},
"externalCreatedAt" : ISODate("2015-08-29T22:42:04.000Z"),
"externalId" : "fdsfsdfsdfsdfwer2342342423423r23r",
"detailType" : "PHOTO",
"location" : [
0,
0
],
"locationId" : "",
"locationTitle" : "",
"provider" : "INSTAGRAM",
"detail" : {},
"description" : "hello",
"tags" : [
"ленинградскийпроспект",
"москва",
"архитектура",
"историческоенаследие",
"петровскийдворец"
],
"commentsCount" : 1,
"likesCount" : 40,
"groupName" : "",
"__v" : 0
}
Your query could be simplified as something like:
{
ownerId:14,
provider: { $in: [ 'INSTAGRAM', 'VKONTAKTE' ] },
'$or': [
{
tags: { $in: [ 'skyhotel', 'excellent' ] }
},
{
authorLogin: { $in: [ 'valera92', 'petyan' ] },
},
{
locationId:{ '$in':[ '32454234' ] }
},
{
location:{
{
'$geoWithin':{
'$centerSphere':[ [ '56.829782', '60.593162' ], 0.000012629451881788331 ]
}
}
}
}
]
}
Now this may not be exactly what you're looking for. But from what you asked, and the information you provided, this is the best suggestion I can give.

How can I put null values to separate field and others to different to field in MongoDB aggregation?

I have the following document in my collection.
{
"_id" : ObjectId("55961a28bffebcb8058b4570"),
"title" : "BackOffice 2",
"cts" : NumberLong(1435900456),
"todo_items" : [
{
"id" : "55961a42bffebcb7058b4570",
"task_desc" : "test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
},
{
"id" : "559639afbffebcc7098b45a6",
"task_desc" : "test 2",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1435911809)
},
{
"id" : "559a22f5bffebcb0048b476c",
"task_desc" : "test 3",
}
],
"uts" : NumberLong(1436164853)
}
I need an aggregation query to perform following, if there is field "completed_by" and "completed_date" and if there is a value which is not null push in to the "completed" array field, otherwise push them into the "incomplete" field.
Following is a sample result I want.
{
"_id" : ObjectId("55961a28bffebcb8058b4570"),
"completed" : [
{
"id":"557fccb5bffebcf7048b457c",
"title":"test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
},
{
"id":"557fccb5bffebcf7048b457c",
"title":"test 1",
"completed_by" : "557fccb5bffebcf7048b457c",
"completed_date" : NumberLong(1436161096)
}
],
"incomplete":[
{
"id" : "559a22f5bffebcb0048b476c",
"title" : "test 3"
}
]
}
As long as your "array" items have "distinct" identifiers ( which they have ) there are a couple of approaches to this;
Firstly, without actually "aggregating accross documents":
db.collection.aggregate([
{ "$project": {
"title": 1,
"cts": 1,
"completed": { "$setDifference": [
{ "$map": {
"input": "$todo_items",
"as": "i",
"in": {
"$cond": [
"$$i.completed_date",
"$$i",
false
]
}
}},
[false]
]},
"incomplete": { "$setDifference": [
{ "$map": {
"input": "$todo_items",
"as": "i",
"in": {
"$cond": [
"$$i.completed_date",
false,
"$$i"
]
}
}},
[false]
]}
}}
])
That requires that you at least have MongoDB 2.6 available on the server in order to use the required $map and $setDifference operators. It's pretty fast considering that all the work is done in a single $project stage.
The alternative, which you should only use when "aggregating across documents", is available to all versions supporting the aggregation framework post MongoDB 2.2:
db.collection.aggregate([
{ "$unwind": "$todo_items" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": {
"$addToSet": {
"$cond": [
"$todo_items.completed_date",
"$todo_items",
null
]
}
},
"incomplete": {
"$addToSet": {
"$cond": [
"$todo_items.completed_date",
null,
"$todo_items",
]
}
}
}},
{ "$unwind": "$completed" },
{ "$match": { "completed": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": { "$push": "$completed" },
"incomplete": { "$first": "$incomplete" }
}}
{ "$unwind": "$incomplete" },
{ "$match": { "incomplete": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"cts": { "$first": "$cts" },
"completed": { "$first": "$completed" },
"incomplete": { "$push": "$incomplete" }
}}
])
Which isn't entirely all there since you need to cater for conditions where an array may end up empty. But that is not the real lesson here since MongoDB 2.6 is already a couple of years in circulation.
In aggregation, you cannot really exclude the "null/false" results, but you can "filter" them.
Also, unless you are actually "aggregating accross documents" as mentioned already, then the second form with $unwind to process the arrays comes with a "lot" of overhead. So you really should be altering the array contents in your client code as each document is read.
Can you please check the below :
db.collection.aggregate([
{$unwind : "$todo_items"},
{$group: {_id : "$_id" , completed : {{$cond :
{
if : { $and : [ {"todo_items.completed_by" : {$exists: true, $ne : null }},
{"todo_items.completed_date" : {$exists : true, $ne : null}} ] } },
then : {$push : {"old_completed" : "$todo_items"}},
else: {$push : {"old_incompleted" : "$todo_items"}}
} } } },
{$project: {_id : "$_id", completed : "$completed.old_completed" ,
incompleted : "$completed.old_incompleted"}}
]);

Categories

Resources