MongoDB aggregate join two $group - javascript

I have a model that looks like this:
{
tokens: [
{
value: {
type: String,
required: true,
unique: true,
},
origin: {
type: String,
default: 'Unknown',
},
grabbedAt: {
type: Date,
default: Date.now,
},
},
], ...
}
Now I want to format the data in the following way that all "tokens" with a date of the past 12 days are returned and grouped by their origin with a count per day.
So the result would look like this: [{ origin: 'Unknown', data: [0,1,2,...] }, { origin: 'origin2', data: [1,10,...] }]
The data array would hold the count of tokens acquired on past 12 days, beginning with the first day to the 12th day.
I already tried something like this:
Account.aggregate([
{ $unwind: '$tokens' },
{ $match: { 'tokens.grabbedAt': { $gte: beforeDate } } },
{
$group: {
_id: { origin: '$tokens.origin', date: '$tokens.grabbedAt' },
count: { $sum: 1 },
},
},
{ $project: { _id: 0, origin: '$_id.origin', date: '$_id.date', count: '$count' } },
{ $sort: { date: 1 } },
]);
But using this code each date and origin is included multiple times. So how can I "join" or merge these two $groups?

One way to do it, is $unwind and $group the tokens by origin and then use 3 steps to create a list of the 12 needed dates. Then we can use a $set step to create the empty dates objects that were missing. Now we can $concatArrays our real measurements with the "artificial" ones of the empty days. The last part is just to group and sum it up.
db.collection.aggregate([
{$unwind: "$tokens"},
{$match: {"tokens.grabbedAt": {$gte: beforeDate}}},
{$sort: {"tokens.date": 1}},
{
$group: {
_id: "$tokens.origin",
res: {
$push: {
dateString: {$dateToString: {date: "$tokens.grabbedAt",
format: "%Y-%m-%d"}}, count: 1
}
}
}
},
{
$addFields: {startDate: beforeDate, range: {$range: [0, 12, 1]}}
},
{
$set: {
dateStrings: {
$map: {
input: "$range",
in: {
dateString: {
$dateToString: {
date: {
$add: [
"$startDate",
{$multiply: ["$$this", 24, 60, 60, 1000]}
]
},
format: "%Y-%m-%d"
}
},
count: 0
}
}
}
}
},
{$project: {data: {$concatArrays: ["$dateStrings", "$res"]}}},
{$unwind: "$data"},
{$group: {
_id: {origin: "$_id", date: "$data.dateString"},
count: {$sum: "$data.count"}
}
},
{$group: {_id: "$_id.origin", date: {$push: "$count"}}}
])
Playground

Related

Timezone Issue with DayJS React and MongoDB

In each record that I save to MongoDB, I execute it with Date.noe, example:
createdAt: {
type: Date,
default: Date.now
}
So in mongoDB I have the record saved as follows:
createdAt: 2023-02-01T01:39:03.377+00:00
In React I get the records and in this way I show the date using DayJS:
dayjs(createdAt).format('DD/MM/YY')
But the problem is that the date indicates that the registration was made on 02-2023-01 but when using DayJS the date changes to 01-31-2023
Changing the day decreases the record by one day and this is causing me conflicts since they are about financial records.
On my server I don't have any time stamp, I use MongoDB from Atlas (cloud.mongodb)
This is my query to mongoDB where when requesting the information for the month of February, it correctly returns this, but in React if the record is from February 1, 2023, it shows me February 31, 2023:
db.purchases.aggregate([
{
$match: {
"detail.category._id": ObjectId("63bf4d0b10dfcae061b6eab0")
}
},
{
$unwind: "$detail"
},
{
$group: {
_id: "$detail.category._id",
total: { $sum: "$total" },
totalProductPurchases: { $sum: "$detail.quantity" },
purchasesCount: { $sum: 1 },
purchases: {
$push: {
$cond: [
{
$and: [
{ $eq: [{ $year: "$createdAt" }, 2023] },
{ $eq: [{ $month: "$createdAt" }, 2] },
{ $gte: [{ $dayOfMonth: "$createdAt" }, 1] },
{ $lte: [{ $dayOfMonth: "$createdAt" }, 31] }
]
},
{
purchaseId: "$_id",
month: { $month: "$createdAt" },
isoWeek: { $isoDayOfWeek: "$createdAt" },
dayOfMonth: { $dayOfMonth: "$createdAt" },
createdAt: "$createdAt",
total: "$total",
subtotal: "$subtotal",
tax: "$tax",
discount: "$discount",
totalBeforeTax: "$totalBeforeTax",
createdAt: "$createdAt",
paymentType: "$paymentType"
},
false
]
}
},
product_counts: {
$push: {
product: "$detail.name",
count: "$detail.quantity"
} },
}
},
{
$unwind: "$product_counts"
},
{
$group: {
_id: "$product_counts.product",
ProductPurchasesNumber: { $sum: "$product_counts.count" },
purchases: { $first: "$purchases"},
totalProductPurchases: { $first: "$totalProductPurchases"},
total: { $first: "$total"},
purchasesCount: { $first: "$purchasesCount"}
}
},
{
$sort: { count: -1 }
},
{
$limit: 3
},
{
$group: {
_id: null,
top3Products: { $push: { product: "$_id", ProductPurchasesNumber: "$ProductPurchasesNumber" } },
total: { $first: "$total" },
totalProductPurchases: { $first: "$totalProductPurchases" },
purchasesCount: { $first: "$purchasesCount" },
purchases: { $first: "$purchases" }
}
},
{
$addFields: {
purchasesArrayLength: { $size: "$purchases" },
filteredPurchases: {
$filter: {
input: "$purchases",
as: "purchase",
cond: { $ne: [ "$$purchase", false ] }
}
}
},
},
{
$project: {
purchases: 0
}
}
])
How can I avoid these kinds of errors? Thank you.
If you only want to retrieve records matching dates in the client-side timezone, send the dates in the API call.
For example, on the React side
// These will be local date instances
const startDate = new Date(2023, 1, 1); // Start of day Feb 1st
const endDate = new Date(2023, 2, 1, 0, 0, -1); // End of day Feb 28
const params = new URLSearchParams({
startDate: startDate.toISOString(),
endDate: endDate.toISOString(),
});
fetch(`/api/purchases?${params}`)
.then(...);
and on the server-side, parse the dates from the query string and use them in your MongoDB aggregate query
const startDate = new Date(req.query.startDate);
const endDate = new Date(req.query.endDate);
Your requirements and questions are not really clear to me, but I would write the conditions like this:
{
$cond: [
{
$eq: [
{ $dateTrunc: { date: "$createdAt", unit: 'month', timezone: 'America/New_York' } },
DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()
]
},
...
]
}
or
{
$cond: [
{
$and: [
{ $gte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()] },
{ $lte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).endOf('month').toJSDate()] }
]
},
...
]
}
I prefer luxon over Day.js, I think the same function are also available in Day.js.

MongoDB fill missing dates in aggregation pipeline

I have this pipeline :
let pipeline = [
{
$match: {
date: { $gte: new Date("2022-10-19"), $lte: new Date("2022-10-26") },
},
},
{
$group: {
_id: "$date",
tasks: { $push: "$$ROOT" },
},
},
{
$sort: { _id: -1 },
},
];
const aggregationData = await ScheduleTaskModel.aggregate(pipeline);
where i group all "tasks" between a date range by date and i get that result :
[
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
}
]
as you see i have "tasks" only for 2 dates in that range,what if i want all dates to appear even the ones with no tasks so it would be like this with empty arrays ?
[
{
"date": "2022-10-26T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-25T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-24T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-23T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-22T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-19T00:00:00.000Z",
"tasks": []
},
]
i tried to use $densify but unfortunately it requires upgrading my mongoDb atlas cluster which is not possible..
The answer of #WernfriedDomscheitAnother has a downside of grouping together all the documents in the collection, creating one large document, while a document has a size limit. A variation on it, without this downside, can be:
$match only the relevant document, same as in your current query
Use $facet to handle the case of no relevant documents at all. This will allow you to group all the relevant documents as you did in your query, but to keep a working-document even if there are any.
Add the relevant dates inside an array (since we use $facet this will happen even if the first match is empty)
Concatenate the array of matched data with the array of empty entries, use the real-data first.
$unwind the separate the documents by date, and $group again by date to remove the duplicates.
Format the result
db.collection.aggregate([
{$match: {date: {$gte: new Date("2022-10-19"), $lte: new Date("2022-10-26")}}},
{$facet: {
data: [
{$group: {_id: "$date", tasks: {$push: "$$ROOT"}}},
{$project: {date: "$_id", tasks: 1}}
]
}},
{$addFields: {
dates: {$map: {
input: {$range: [0, 8]},
// maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}},
tasks: []
}
}}
}},
{$project: {data: {$concatArrays: ["$data", "$dates"]}}},
{$unwind: "$data"},
{$group: {_id: "$data.date", "tasks": {$first: "$data.tasks"}}},
{$project: { _id: 0, date: "$_id", tasks: 1 }},
{$sort: { date: -1 }},
])
See how it works on the playground example
New function $densify would be the simplest, of course. The manual way of doing it would be this one:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$set: {
dates: {
$map: {
input: { $range: [ 0, 8 ] }, // maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {
$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}
}
}
}
}
}
},
{
$set: {
dates: {
$map: {
input: "$dates",
as: "d",
in: {
$mergeObjects: [
"$$d",
{
tasks: {
$filter: {
input: "$data",
cond: { $eq: [ "$$d.date", "$$this.date" ] }
}
}
}
]
}
}
}
}
},
{
$project: {
data: {
$map: {
input: "$dates",
in: {
$cond: {
if: { $eq: [ "$$this.tasks", [] ] },
then: "$$this",
else: { $first: "$$this.tasks" }
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceWith: "$data" }
])
Mongo Playground

MongoDB update value of deeply nested array and sort

I have this document of timestamps sorted by time:
{
_id: '1',
timestamps: [
{
id: '589b32cf-28b3-4a25-8fd1-5e4f86682199',
time: '2022-04-13T19:00:00.122Z'
},
{
id: '781a47de-d21a-4c2c-9814-b46f4a3cfa30',
time: '2022-04-13T20:00:00.498Z'
}
]
};
I want to update a timestamp by id, change the time, and sort
example: update timestamp with id: '589b32cf-28b3-4a25-8fd1-5e4f86682199':
{
id: '589b32cf-28b3-4a25-8fd1-5e4f86682199',
time: '2022-04-13T20:30:00.122Z'
}
the updated document should like this:
{
_id: '1',
timestamps: [
{
id: '32bb3-2222-1111-j878-b4000a3wwa30',
time: '2022-04-13T19:30:00.122Z'
},
{
id: '589b32cf-28b3-4a25-8fd1-5e4f86682199',
time: '2022-04-13T20:30:00.122Z'
}
]
};
I came up with this method to update and sort each element in the array:
const updateResult = await TimestampCollection.updateOne(
{ _id: '1' },
{
$push: {
timestamps: {
$each: [{ id: timestamp.id, time: timestamp.time }],
$sort: { time: 1 }
}
}
}
);
However this pushed a new object to the array with the same id and updated time:
{
_id: '1',
timestamps: [
{
id: '589b32cf-28b3-4a25-8fd1-5e4f86682199',
time: '2022-04-13T19:00:00.122Z'
},
{
id: '32bb3-2222-1111-j878-b4000a3wwa30',
time: '2022-04-13T19:30:00.122Z'
},
{
id: '589b32cf-28b3-4a25-8fd1-5e4f86682199',
time: '2022-04-13T20:30:00.122Z'
}
]
};
Any idea how to fix this?
I didn't find a way to do this in a single query. Sadly combining $set and $push in a single query leads to a conflict...
db.collection.update({
_id: "1",
"timestamps.id": timestamp.id
},
{
"$set": {
"timestamps.$.time": timestamp.time
},
});
db.collection.update({
_id: "1",
},
{
"$push": {
"timestamps": {
"$each": [],
"$sort": {
"time": 1
}
}
}
})
This solution involves two queries. One for updating the element in the array and the other for sorting the array.

aggregate data by date interval

This is how I'm doing an aggregation query to get the average time between start and end time (both in the format ISODate("2020-02-24T13:08:00.123Z")).
But I need to split the result data into two groups as I need to get the average data for all datasets with start time 04/2019 - 09/2020 and the second group all data with start time 10/2019 - 04/2020.
I don't get it how to group by these two interval for an ISODate value
const data = await Data.aggregate([
{
$match: {
type: { $exists: true },
statsIgnore: { $exists: false }
}
},
{
$group: {
_id: '$type',
Datasets: { $sum: 1 },
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}
}
]).toArray()
My data structure
[
{
_id: ObjectId("5d9242cf863feb0b8d70d12e"),
timeStart: ISODate("2020-02-24T13:08:00.123Z"),
timeEnd: ISODate("2020-02-24T13:18:00.123Z"),
type: 'type1'
},
{
_id: ObjectId("5d9242cf863feb0b8d70d12f"),
timeStart: ISODate("2019-08-29T17:05:00.123Z"),
timeEnd: ISODate("2019-08-29T17:25:00.123Z"),
type: 'type1'
}
]
In this simple data example there is only one type with a single dataset for summer and a single dataset for winter interval.
So the result should be 10 minutes average for winter and 20 minutes average for summer (for type1 group).
The approach I took is to check that the timeStart is in the range you're looking for in the initial $match stage. Then I added an $addFields stage that checks if the season is summer based on the start date. Then I grouped by my new summer field.
[
{$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
},
timeStart: {
$gte: ISODate("2019-04-01T00:00:00Z"),
$lt: ISODate("2020-04-01T00:00:00Z")
}
}},
{$addFields: {
summer: { $lt: ["$timeStart", ISODate("2019-09-01T00:00:00Z")]}
}},
{$group: {
_id: "$summer",
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}}]
Check if this meets your requirements:
db.data.aggregate([
{
$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
}
}
},
{
$group: {
_id: {
type: "$type",
season: {
$arrayElemAt: [
[
"None",
"Winter",
"Winter",
"Spring",
"Spring",
"Spring",
"Summer",
"Summer",
"Summer",
"Autumn",
"Autumn",
"Autumn",
"Winter"
],
{
$month: "$timeStart"
}
]
}
},
Datasets: {
$sum: 1
},
Average: {
$avg: {
$divide: [
{
$subtract: [
"$timeEnd",
"$timeStart"
]
},
60000
]
}
}
}
}
])
MongoPlayground

Sort nested array using push in MongoDB?

Consider the query
EightWeekGamePlan.aggregate(
[
{
$group: {
_id: {
LeadId: "$LeadId",
BusinessName: "$BusinessName",
PhoneNumberMasque: "$PhoneNumberMasque",
City: "$City",
Rooms: "$Rooms",
dateToString: { format: "%Y-%m-%d", date: "$InserDate" }
},
Weeks: {
$push: {
Week: "$Week",
Status: "$Status",
InsertDate: "$InsertDate"
},
// $sort: { Week: 1 } // doesn't work
}
}
}
]
How can I sort the nested array Weeks by Week (it's a number ranging 1-8) ?
I've tried with $sort: { Week: 1 } but the query didn't work out.
Use $sort before $group stage
EightWeekGamePlan.aggregate([
{ $sort: { Week: 1 }},
{ $group: {
_id: {
LeadId: "$LeadId",
BusinessName: "$BusinessName",
PhoneNumberMasque: "$PhoneNumberMasque",
City: "$City",
Rooms: "$Rooms",
dateToString: { format: "%Y-%m-%d", date: "$InserDate" }
},
Weeks: {
$push: {
Week: "$Week",
Status: "$Status",
InsertDate: "$InsertDate"
}
}
}}
])

Categories

Resources