aggregate data by date interval - javascript

This is how I'm doing an aggregation query to get the average time between start and end time (both in the format ISODate("2020-02-24T13:08:00.123Z")).
But I need to split the result data into two groups as I need to get the average data for all datasets with start time 04/2019 - 09/2020 and the second group all data with start time 10/2019 - 04/2020.
I don't get it how to group by these two interval for an ISODate value
const data = await Data.aggregate([
{
$match: {
type: { $exists: true },
statsIgnore: { $exists: false }
}
},
{
$group: {
_id: '$type',
Datasets: { $sum: 1 },
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}
}
]).toArray()
My data structure
[
{
_id: ObjectId("5d9242cf863feb0b8d70d12e"),
timeStart: ISODate("2020-02-24T13:08:00.123Z"),
timeEnd: ISODate("2020-02-24T13:18:00.123Z"),
type: 'type1'
},
{
_id: ObjectId("5d9242cf863feb0b8d70d12f"),
timeStart: ISODate("2019-08-29T17:05:00.123Z"),
timeEnd: ISODate("2019-08-29T17:25:00.123Z"),
type: 'type1'
}
]
In this simple data example there is only one type with a single dataset for summer and a single dataset for winter interval.
So the result should be 10 minutes average for winter and 20 minutes average for summer (for type1 group).

The approach I took is to check that the timeStart is in the range you're looking for in the initial $match stage. Then I added an $addFields stage that checks if the season is summer based on the start date. Then I grouped by my new summer field.
[
{$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
},
timeStart: {
$gte: ISODate("2019-04-01T00:00:00Z"),
$lt: ISODate("2020-04-01T00:00:00Z")
}
}},
{$addFields: {
summer: { $lt: ["$timeStart", ISODate("2019-09-01T00:00:00Z")]}
}},
{$group: {
_id: "$summer",
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}}]

Check if this meets your requirements:
db.data.aggregate([
{
$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
}
}
},
{
$group: {
_id: {
type: "$type",
season: {
$arrayElemAt: [
[
"None",
"Winter",
"Winter",
"Spring",
"Spring",
"Spring",
"Summer",
"Summer",
"Summer",
"Autumn",
"Autumn",
"Autumn",
"Winter"
],
{
$month: "$timeStart"
}
]
}
},
Datasets: {
$sum: 1
},
Average: {
$avg: {
$divide: [
{
$subtract: [
"$timeEnd",
"$timeStart"
]
},
60000
]
}
}
}
}
])
MongoPlayground

Related

Timezone Issue with DayJS React and MongoDB

In each record that I save to MongoDB, I execute it with Date.noe, example:
createdAt: {
type: Date,
default: Date.now
}
So in mongoDB I have the record saved as follows:
createdAt: 2023-02-01T01:39:03.377+00:00
In React I get the records and in this way I show the date using DayJS:
dayjs(createdAt).format('DD/MM/YY')
But the problem is that the date indicates that the registration was made on 02-2023-01 but when using DayJS the date changes to 01-31-2023
Changing the day decreases the record by one day and this is causing me conflicts since they are about financial records.
On my server I don't have any time stamp, I use MongoDB from Atlas (cloud.mongodb)
This is my query to mongoDB where when requesting the information for the month of February, it correctly returns this, but in React if the record is from February 1, 2023, it shows me February 31, 2023:
db.purchases.aggregate([
{
$match: {
"detail.category._id": ObjectId("63bf4d0b10dfcae061b6eab0")
}
},
{
$unwind: "$detail"
},
{
$group: {
_id: "$detail.category._id",
total: { $sum: "$total" },
totalProductPurchases: { $sum: "$detail.quantity" },
purchasesCount: { $sum: 1 },
purchases: {
$push: {
$cond: [
{
$and: [
{ $eq: [{ $year: "$createdAt" }, 2023] },
{ $eq: [{ $month: "$createdAt" }, 2] },
{ $gte: [{ $dayOfMonth: "$createdAt" }, 1] },
{ $lte: [{ $dayOfMonth: "$createdAt" }, 31] }
]
},
{
purchaseId: "$_id",
month: { $month: "$createdAt" },
isoWeek: { $isoDayOfWeek: "$createdAt" },
dayOfMonth: { $dayOfMonth: "$createdAt" },
createdAt: "$createdAt",
total: "$total",
subtotal: "$subtotal",
tax: "$tax",
discount: "$discount",
totalBeforeTax: "$totalBeforeTax",
createdAt: "$createdAt",
paymentType: "$paymentType"
},
false
]
}
},
product_counts: {
$push: {
product: "$detail.name",
count: "$detail.quantity"
} },
}
},
{
$unwind: "$product_counts"
},
{
$group: {
_id: "$product_counts.product",
ProductPurchasesNumber: { $sum: "$product_counts.count" },
purchases: { $first: "$purchases"},
totalProductPurchases: { $first: "$totalProductPurchases"},
total: { $first: "$total"},
purchasesCount: { $first: "$purchasesCount"}
}
},
{
$sort: { count: -1 }
},
{
$limit: 3
},
{
$group: {
_id: null,
top3Products: { $push: { product: "$_id", ProductPurchasesNumber: "$ProductPurchasesNumber" } },
total: { $first: "$total" },
totalProductPurchases: { $first: "$totalProductPurchases" },
purchasesCount: { $first: "$purchasesCount" },
purchases: { $first: "$purchases" }
}
},
{
$addFields: {
purchasesArrayLength: { $size: "$purchases" },
filteredPurchases: {
$filter: {
input: "$purchases",
as: "purchase",
cond: { $ne: [ "$$purchase", false ] }
}
}
},
},
{
$project: {
purchases: 0
}
}
])
How can I avoid these kinds of errors? Thank you.
If you only want to retrieve records matching dates in the client-side timezone, send the dates in the API call.
For example, on the React side
// These will be local date instances
const startDate = new Date(2023, 1, 1); // Start of day Feb 1st
const endDate = new Date(2023, 2, 1, 0, 0, -1); // End of day Feb 28
const params = new URLSearchParams({
startDate: startDate.toISOString(),
endDate: endDate.toISOString(),
});
fetch(`/api/purchases?${params}`)
.then(...);
and on the server-side, parse the dates from the query string and use them in your MongoDB aggregate query
const startDate = new Date(req.query.startDate);
const endDate = new Date(req.query.endDate);
Your requirements and questions are not really clear to me, but I would write the conditions like this:
{
$cond: [
{
$eq: [
{ $dateTrunc: { date: "$createdAt", unit: 'month', timezone: 'America/New_York' } },
DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()
]
},
...
]
}
or
{
$cond: [
{
$and: [
{ $gte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()] },
{ $lte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).endOf('month').toJSDate()] }
]
},
...
]
}
I prefer luxon over Day.js, I think the same function are also available in Day.js.

MongoDB fill missing dates in aggregation pipeline

I have this pipeline :
let pipeline = [
{
$match: {
date: { $gte: new Date("2022-10-19"), $lte: new Date("2022-10-26") },
},
},
{
$group: {
_id: "$date",
tasks: { $push: "$$ROOT" },
},
},
{
$sort: { _id: -1 },
},
];
const aggregationData = await ScheduleTaskModel.aggregate(pipeline);
where i group all "tasks" between a date range by date and i get that result :
[
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
}
]
as you see i have "tasks" only for 2 dates in that range,what if i want all dates to appear even the ones with no tasks so it would be like this with empty arrays ?
[
{
"date": "2022-10-26T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-25T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-24T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-23T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-22T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-19T00:00:00.000Z",
"tasks": []
},
]
i tried to use $densify but unfortunately it requires upgrading my mongoDb atlas cluster which is not possible..
The answer of #WernfriedDomscheitAnother has a downside of grouping together all the documents in the collection, creating one large document, while a document has a size limit. A variation on it, without this downside, can be:
$match only the relevant document, same as in your current query
Use $facet to handle the case of no relevant documents at all. This will allow you to group all the relevant documents as you did in your query, but to keep a working-document even if there are any.
Add the relevant dates inside an array (since we use $facet this will happen even if the first match is empty)
Concatenate the array of matched data with the array of empty entries, use the real-data first.
$unwind the separate the documents by date, and $group again by date to remove the duplicates.
Format the result
db.collection.aggregate([
{$match: {date: {$gte: new Date("2022-10-19"), $lte: new Date("2022-10-26")}}},
{$facet: {
data: [
{$group: {_id: "$date", tasks: {$push: "$$ROOT"}}},
{$project: {date: "$_id", tasks: 1}}
]
}},
{$addFields: {
dates: {$map: {
input: {$range: [0, 8]},
// maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}},
tasks: []
}
}}
}},
{$project: {data: {$concatArrays: ["$data", "$dates"]}}},
{$unwind: "$data"},
{$group: {_id: "$data.date", "tasks": {$first: "$data.tasks"}}},
{$project: { _id: 0, date: "$_id", tasks: 1 }},
{$sort: { date: -1 }},
])
See how it works on the playground example
New function $densify would be the simplest, of course. The manual way of doing it would be this one:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$set: {
dates: {
$map: {
input: { $range: [ 0, 8 ] }, // maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {
$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}
}
}
}
}
}
},
{
$set: {
dates: {
$map: {
input: "$dates",
as: "d",
in: {
$mergeObjects: [
"$$d",
{
tasks: {
$filter: {
input: "$data",
cond: { $eq: [ "$$d.date", "$$this.date" ] }
}
}
}
]
}
}
}
}
},
{
$project: {
data: {
$map: {
input: "$dates",
in: {
$cond: {
if: { $eq: [ "$$this.tasks", [] ] },
then: "$$this",
else: { $first: "$$this.tasks" }
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceWith: "$data" }
])
Mongo Playground

MongoDB aggregate join two $group

I have a model that looks like this:
{
tokens: [
{
value: {
type: String,
required: true,
unique: true,
},
origin: {
type: String,
default: 'Unknown',
},
grabbedAt: {
type: Date,
default: Date.now,
},
},
], ...
}
Now I want to format the data in the following way that all "tokens" with a date of the past 12 days are returned and grouped by their origin with a count per day.
So the result would look like this: [{ origin: 'Unknown', data: [0,1,2,...] }, { origin: 'origin2', data: [1,10,...] }]
The data array would hold the count of tokens acquired on past 12 days, beginning with the first day to the 12th day.
I already tried something like this:
Account.aggregate([
{ $unwind: '$tokens' },
{ $match: { 'tokens.grabbedAt': { $gte: beforeDate } } },
{
$group: {
_id: { origin: '$tokens.origin', date: '$tokens.grabbedAt' },
count: { $sum: 1 },
},
},
{ $project: { _id: 0, origin: '$_id.origin', date: '$_id.date', count: '$count' } },
{ $sort: { date: 1 } },
]);
But using this code each date and origin is included multiple times. So how can I "join" or merge these two $groups?
One way to do it, is $unwind and $group the tokens by origin and then use 3 steps to create a list of the 12 needed dates. Then we can use a $set step to create the empty dates objects that were missing. Now we can $concatArrays our real measurements with the "artificial" ones of the empty days. The last part is just to group and sum it up.
db.collection.aggregate([
{$unwind: "$tokens"},
{$match: {"tokens.grabbedAt": {$gte: beforeDate}}},
{$sort: {"tokens.date": 1}},
{
$group: {
_id: "$tokens.origin",
res: {
$push: {
dateString: {$dateToString: {date: "$tokens.grabbedAt",
format: "%Y-%m-%d"}}, count: 1
}
}
}
},
{
$addFields: {startDate: beforeDate, range: {$range: [0, 12, 1]}}
},
{
$set: {
dateStrings: {
$map: {
input: "$range",
in: {
dateString: {
$dateToString: {
date: {
$add: [
"$startDate",
{$multiply: ["$$this", 24, 60, 60, 1000]}
]
},
format: "%Y-%m-%d"
}
},
count: 0
}
}
}
}
},
{$project: {data: {$concatArrays: ["$dateStrings", "$res"]}}},
{$unwind: "$data"},
{$group: {
_id: {origin: "$_id", date: "$data.dateString"},
count: {$sum: "$data.count"}
}
},
{$group: {_id: "$_id.origin", date: {$push: "$count"}}}
])
Playground

How to group MongoDB records by a certain date range?

I'm a little new to MongoDB and I'm having trouble querying with it.
Suppose I have the following dataset,
[
{
_id: '1',
date: "2020-12-31T22:02:11.257Z",
},
{
_id: '2',
date: "2020-12-31T22:05:11.257Z",
},
{
_id: '3',
date: "2021-01-01T22:02:11.257Z",
},
{
_id: '4',
date: "2021-01-02T12:02:11.257Z",
},
{
_id: '5',
date: "2021-01-02T22:02:11.257Z",
}
]
I'm trying to group all records by day. From my frontend, I send over a month, and then I run the query based on that. So if the user select January, I would run the following query:
router.get('/', async (req, res) => {
const {selectedMonth, selectedYear} = req.query; // january would be '1' here
const data = await db.collection.find({"date": {
"$gt": new Date(selectedYear, parseInt(selectedMonth) - 1, 1),
"$lte": new Date(selectedYear, parseInt(selectedMonth), 1)
}}).sort({ date: -1 })
Here, I'm getting the all records that are within the selected range. So, if a user selected January 2021, I'm fetching all records that are greater than December 31, 2020 and less than or equal to January 31, 2021.
The problem here is that I want to get a count of all records per day. I'm able to fetch all records within the specified date range, but I'm looking for something like the below, to be returned:
[
"2021-01-01": [
{ _id: '3', date: "2021-01-01T22:02:11.257Z" },
],
"2021-01-02": [
{ _id: '4', date: "2021-01-02T12:02:11.257Z" },
{ _id: '5', date: "2021-01-02T22:02:11.257Z" },
]
]
I was thinking of looping through the returned data and building my own response object, but I'm wondering if there's a better way to do this? Here what I'm currently doing,
const result = []
let count = 0;
data.forEach((record, index) => {
// first record will always set the base
if (index === 0) {
result.push({
date: record.date.toLocaleDateString(),
count: 1
})
} else {
// If the record is the same date, then increase counter
if (record.date.toLocaleDateString() === result[count].date) {
result[count].count = result[count].count + 1
} else {
// push a new record and increase count
result.push({
date: record.date.toLocaleDateString(),
count: 1
})
count = count + 1
}
}
});
Which yields,
result [
{ date: '1/2/2021', count: 2 },
{ date: '1/1/2021', count: 1 }
]
You'd need aggregation pipeline for this:
db.collection.aggregate([
// First Stage: filter out dates
{
$match: {
date: { $gte: new ISODate("2020-01-01"), $lt: new ISODate("2020-12-31") },
},
},
// Second Stage: group by day of the year
{
$group: {
_id: { $dateToString: { format: "%d-%m-%Y", date: "$date" } },
count: { $sum: 1 },
},
},
// Third Stage, reshape the output documents
{
$project: {
_id: 0,
date: "$_id",
count: 1
},
},
]);
What you need can be done using the aggregation framework which has a number of operators that you can use
for the different pipelines. The first pipeline step is the filtering where you use $match pipeline stage together with
$expr query operator and the $month and $year date operators:
const pipeline = [
// First pipeline step
{ '$match': {
'$expr': {
'$and': [
{ '$eq': [ { '$month': '$date' }, parseInt(selectedMonth) ] },
{ '$eq': [ { '$year': '$date' }, parseInt(selectedYear) ] }
]
}
} }
];
The next step would be to group all the documents returned after filtering by day with $dateToString within $group as follows:
const pipeline = [
// First pipeline step
{ '$match': {
'$expr': {
'$and': [
{ '$eq': [ { '$month': '$date' }, parseInt(selectedMonth) ] },
{ '$eq': [ { '$year': '$date' }, parseInt(selectedYear) ] }
]
}
} },
// Second pipeline step
{ '$group': {
'_id': { '$dateToString': { 'format': '%Y-%m-%d', 'date': '$date' } },
'data': { '$push': '$$ROOT' },
'count': { '$sum': 1 }
} }
];
The next steps will be to reshape the documents to your desired projection where you can leverage the use of $arrayToObject operator and a $replaceRoot pipeline to get the desired result.
const pipeline = [
// First pipeline step
{ '$match': {
'$expr': {
'$and': [
{ '$eq': [ { '$month': '$date' }, parseInt(selectedMonth) ] },
{ '$eq': [ { '$year': '$date' }, parseInt(selectedYear) ] }
]
}
} },
// Second pipeline step
{ '$group': {
'_id': { '$dateToString': { 'format': '%Y-%m-%d', 'date': '$date' } },
'data': { '$push': '$$ROOT' },
'count': { '$sum': 1 }
} },
// Third pipeline step
{ '$group': {
'_id': null,
'counts': {
'$push': {
'k': '$_id',
'v': {
'data': '$data',
'count': '$count'
}
}
}
} },
// Fourth pipeline step
{ '$replaceRoot': {
'newRoot': { '$arrayToObject': '$counts' }
} }
];
Which can then be combined and ran as follows:
router.get('/', async (req, res) => {
const { selectedMonth, selectedYear } = req.query; // january would be '1' here
const pipeline = [...]; // pipeline above
const data = await db.collection.aggregate(pipeline).toArray();
console.log(data);
}
For a final result of the form:
[
{
"2021-01-01": [
{ _id: '3', date: "2021-01-01T22:02:11.257Z" },
],
"2021-01-02": [
{ _id: '4', date: "2021-01-02T12:02:11.257Z" },
{ _id: '5', date: "2021-01-02T22:02:11.257Z" },
]
}
]
update your third pipeline step to:
// Third pipeline step
{ '$group': {
'_id': null,
'counts': {
'$push': {
'k': '$_id',
'v': '$data'
}
}
} },
And for a final result of the form:
[
{
"2021-01-01": 1,
"2021-01-02": 2
}
]
your third pipeline step should be:
// Third pipeline step
{ '$group': {
'_id': null,
'counts': {
'$push': {
'k': '$_id',
'v': '$count'
}
}
} },

mongo aggregation adding dates and sums

I feel like this could be really complex. I know this is possible in JS by taking each record, determine the difference in months, then increase each model.month by 1 within a for loop that stops once the months difference has been met. I just can't wrap my head around the possibility with aggregation. Any help would be a lifesaver!
Here is my data:
{
model: car
serial: bbbaaa
warranty_start_date: 03/05/2018
warranty_end_date: 06/16/2018
},
{
model: car
serial: jjjwww
warranty_start_date: 03/02/2018
warranty_end_date: 05/20/2018
},
{
model: truck
serial: tttvvv
warranty_start_date: 05/06/2016
warranty_end_date: 07/15/2016
}
This is how I want it to end up:
{
model: car
in_warranty_these_months: { 03/2018: 2, 04/2018: 2, 05/2018: 2, 06/2018: 1 }
},
{
model: truck
in_warranty_these_months: { 05/2016: 1, 06/2016: 1, 07/2016: 1 }
}
UPDATE
with major help from #mickl,
the below code works like a charm:
db.col.aggregate([
{
$addFields: {
monthsRange: {
$range: [
{ $add: [
{ $multiply: [12, {$year: "$warranty_start_date"}] },
{$month: "$warranty_start_date"} ]
},
{ $add: [
{ $multiply: [12, {$year: "$warranty_end_date"}] },
{ $add: [{$month: "$warranty_end_date"}, 1] } ]
}, 1]
}
}
},
{
$unwind: "$monthsRange"
},
{
$group: {
_id: { model: "$model", month: "$monthsRange" },
count: {$sum:1}
}
},
{
$group: {
_id: "$_id.model",
pairs: {
$push: {
k: {
$dateToString: {
format: "%m_%Y",
date: {
$dateFromParts: {
day: 1,
month: {
$cond: [{
$eq: [0, {
$mod: ["$_id.month", 12]
}]
}, {
$trunc: 12
}, {
$mod: ["$_id.month", 12]
}]
},
year: {
$cond: [{
$eq: [0, {
$mod: ["$_id.month", 12]
}]
}, {
$subtract: [{
$trunc: {
$divide: ["$_id.month", 12]
}
}, 1]
}, {
$trunc: {
$divide: ["$_id.month", 12]
}
}]
}
}
}
}
},
v: "$count"
}
}
}
},
{
$project: {
_id: 0,
model: "$_id",
in_warranty_these_months: {
$arrayToObject: "$pairs"
}
}
}
])
You can use following aggrregation:
db.col.aggregate([
{
$addFields: {
monthsRange: {
$range: [
{ $add: [
{ $multiply: [12, {$year: "$warranty_start_date"}] },
{$month: "$warranty_start_date"} ]
},
{ $add: [
{ $multiply: [12, {$year: "$warranty_end_date"}] },
{ $add: [{$month: "$warranty_end_date"}, 1] } ]
}, 1]
}
}
},
{
$unwind: "$monthsRange"
},
{
$group: {
_id: { model: "$model", month: "$monthsRange" },
count: {$sum:1}
}
},
{
$group: {
_id: "$_id.model",
pairs: {
$push: {
k: {
$dateToString: {
format: "%m/%Y",
date: {
$dateFromParts: {
day: 1,
month: { $add: [{ $mod: [ "$_id.month", 12 ] }, 1] },
year: { $trunc: { $divide: [ "$_id.month", 12 ] } }
}
}
}
},
v: "$count"
}
}
}
},
{
$project: {
_id: 0,
model: "$_id",
in_warranty_these_months: {
$arrayToObject: "$pairs"
}
}
}
])
Basically you have to generate ranges (using $range) based on your dates. To do that you can convert your dates to numbers based on following formula: 12 *year + month. This will give you a possibility to use $range to generate four values for first document, three values for second, etc.
Then you can use $group to count each month per model.
In last step we want to use $arrayToObject so we have to transform our data to objects with two properties, k and v. To do that we have to transform our numbers back to desired format using $dateFromParts and $dateToString

Categories

Resources