mongo aggregation adding dates and sums - javascript

I feel like this could be really complex. I know this is possible in JS by taking each record, determine the difference in months, then increase each model.month by 1 within a for loop that stops once the months difference has been met. I just can't wrap my head around the possibility with aggregation. Any help would be a lifesaver!
Here is my data:
{
model: car
serial: bbbaaa
warranty_start_date: 03/05/2018
warranty_end_date: 06/16/2018
},
{
model: car
serial: jjjwww
warranty_start_date: 03/02/2018
warranty_end_date: 05/20/2018
},
{
model: truck
serial: tttvvv
warranty_start_date: 05/06/2016
warranty_end_date: 07/15/2016
}
This is how I want it to end up:
{
model: car
in_warranty_these_months: { 03/2018: 2, 04/2018: 2, 05/2018: 2, 06/2018: 1 }
},
{
model: truck
in_warranty_these_months: { 05/2016: 1, 06/2016: 1, 07/2016: 1 }
}
UPDATE
with major help from #mickl,
the below code works like a charm:
db.col.aggregate([
{
$addFields: {
monthsRange: {
$range: [
{ $add: [
{ $multiply: [12, {$year: "$warranty_start_date"}] },
{$month: "$warranty_start_date"} ]
},
{ $add: [
{ $multiply: [12, {$year: "$warranty_end_date"}] },
{ $add: [{$month: "$warranty_end_date"}, 1] } ]
}, 1]
}
}
},
{
$unwind: "$monthsRange"
},
{
$group: {
_id: { model: "$model", month: "$monthsRange" },
count: {$sum:1}
}
},
{
$group: {
_id: "$_id.model",
pairs: {
$push: {
k: {
$dateToString: {
format: "%m_%Y",
date: {
$dateFromParts: {
day: 1,
month: {
$cond: [{
$eq: [0, {
$mod: ["$_id.month", 12]
}]
}, {
$trunc: 12
}, {
$mod: ["$_id.month", 12]
}]
},
year: {
$cond: [{
$eq: [0, {
$mod: ["$_id.month", 12]
}]
}, {
$subtract: [{
$trunc: {
$divide: ["$_id.month", 12]
}
}, 1]
}, {
$trunc: {
$divide: ["$_id.month", 12]
}
}]
}
}
}
}
},
v: "$count"
}
}
}
},
{
$project: {
_id: 0,
model: "$_id",
in_warranty_these_months: {
$arrayToObject: "$pairs"
}
}
}
])

You can use following aggrregation:
db.col.aggregate([
{
$addFields: {
monthsRange: {
$range: [
{ $add: [
{ $multiply: [12, {$year: "$warranty_start_date"}] },
{$month: "$warranty_start_date"} ]
},
{ $add: [
{ $multiply: [12, {$year: "$warranty_end_date"}] },
{ $add: [{$month: "$warranty_end_date"}, 1] } ]
}, 1]
}
}
},
{
$unwind: "$monthsRange"
},
{
$group: {
_id: { model: "$model", month: "$monthsRange" },
count: {$sum:1}
}
},
{
$group: {
_id: "$_id.model",
pairs: {
$push: {
k: {
$dateToString: {
format: "%m/%Y",
date: {
$dateFromParts: {
day: 1,
month: { $add: [{ $mod: [ "$_id.month", 12 ] }, 1] },
year: { $trunc: { $divide: [ "$_id.month", 12 ] } }
}
}
}
},
v: "$count"
}
}
}
},
{
$project: {
_id: 0,
model: "$_id",
in_warranty_these_months: {
$arrayToObject: "$pairs"
}
}
}
])
Basically you have to generate ranges (using $range) based on your dates. To do that you can convert your dates to numbers based on following formula: 12 *year + month. This will give you a possibility to use $range to generate four values for first document, three values for second, etc.
Then you can use $group to count each month per model.
In last step we want to use $arrayToObject so we have to transform our data to objects with two properties, k and v. To do that we have to transform our numbers back to desired format using $dateFromParts and $dateToString

Related

Mongodb $sort based on bigger value of 2 parameters

Example Documents:
[
{"subdocument": {"value":100,"additionalValue":300}},
{"subdocument": {"value":100} // additionalValue doesn't exist on this one
]
What I want: at the end of my aggregation:
{
"largest": // The entire first item because 300 is the highest overall value
"smallest": // The entire second item because 100 is the smallest "" average
"average": 150 // 1st item average is 200, 2nd item is 100 and their combined average is 150
}
What I did:
{ $sort: { 'subdocument.value': -1 } },
{
$group: {
_id: null,
average: { $avg: '$subdocument.value' },
items: { $push: '$$ROOT' },
},
},
{ $set: { largest: { $first: '$items' } } },
{ $set: { smallest: { $last: '$items' } } },
{ $project: { largest: 1, smallest: 1, average: 1 } },
But this does not include the additonalValue field.
And I don't know of any way to get a "larger one" expression
something like: average: { $max: { $larger: ['$subdocument.value', 'subdocument.additonalValue'] }},
IMPORTANT NOTE: the additionalValue field is optional.
Simply applying the operation twice on the values.
db.collection.aggregate([
{
$group: {
_id: null,
largest: {
$max: {
$max: [
"$subdocument.value",
"$subdocument.additionalValue"
]
}
},
smallest: {
$min: {
$min: [
"$subdocument.value",
"$subdocument.additionalValue"
]
}
},
average: {
$avg: {
$avg: [
"$subdocument.value",
"$subdocument.additionalValue"
]
}
}
}
},
{
"$lookup": {
"from": "collection",
"let": {
l: "$largest"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$l",
{
$max: [
"$subdocument.value",
"$subdocument.additionalValue"
]
}
]
}
}
}
],
"as": "largestItems"
}
},
{
"$lookup": {
"from": "collection",
"let": {
s: "$smallest"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$s",
{
$min: [
"$subdocument.value",
"$subdocument.additionalValue"
]
}
]
}
}
}
],
"as": "smallestItems"
}
}
])
Mongo Playground
Just as reference the solution by #ray combined with a filter instead of lookup that I came up with:
{ $sort: { 'subdoc.value': -1 } },
{
$group: {
_id: null,
min: { $min: '$subdoc.value' },
average: {
$avg: { $avg: ['$subdoc.value', '$subdoc.additionalValue'] },
},
max: {
$max: { $max: ['$subdoc.additionalValue', '$subdoc.value'] },
},
items: { $push: '$$ROOT' },
},
},
{
$set: {
largest: {
$first: {
$filter: {
input: '$items',
as: 'item',
cond: {
$eq: [
{
$max: [
'$$item.subdoc.value',
'$$item.subdoc.additionalValue',
],
},
'$max',
],
},
},
},
},
},
},
{ $set: { smallest: { $last: '$items' } } },
{ $project: { largest: 1, smallest: 1, averaget: 1 } },

Timezone Issue with DayJS React and MongoDB

In each record that I save to MongoDB, I execute it with Date.noe, example:
createdAt: {
type: Date,
default: Date.now
}
So in mongoDB I have the record saved as follows:
createdAt: 2023-02-01T01:39:03.377+00:00
In React I get the records and in this way I show the date using DayJS:
dayjs(createdAt).format('DD/MM/YY')
But the problem is that the date indicates that the registration was made on 02-2023-01 but when using DayJS the date changes to 01-31-2023
Changing the day decreases the record by one day and this is causing me conflicts since they are about financial records.
On my server I don't have any time stamp, I use MongoDB from Atlas (cloud.mongodb)
This is my query to mongoDB where when requesting the information for the month of February, it correctly returns this, but in React if the record is from February 1, 2023, it shows me February 31, 2023:
db.purchases.aggregate([
{
$match: {
"detail.category._id": ObjectId("63bf4d0b10dfcae061b6eab0")
}
},
{
$unwind: "$detail"
},
{
$group: {
_id: "$detail.category._id",
total: { $sum: "$total" },
totalProductPurchases: { $sum: "$detail.quantity" },
purchasesCount: { $sum: 1 },
purchases: {
$push: {
$cond: [
{
$and: [
{ $eq: [{ $year: "$createdAt" }, 2023] },
{ $eq: [{ $month: "$createdAt" }, 2] },
{ $gte: [{ $dayOfMonth: "$createdAt" }, 1] },
{ $lte: [{ $dayOfMonth: "$createdAt" }, 31] }
]
},
{
purchaseId: "$_id",
month: { $month: "$createdAt" },
isoWeek: { $isoDayOfWeek: "$createdAt" },
dayOfMonth: { $dayOfMonth: "$createdAt" },
createdAt: "$createdAt",
total: "$total",
subtotal: "$subtotal",
tax: "$tax",
discount: "$discount",
totalBeforeTax: "$totalBeforeTax",
createdAt: "$createdAt",
paymentType: "$paymentType"
},
false
]
}
},
product_counts: {
$push: {
product: "$detail.name",
count: "$detail.quantity"
} },
}
},
{
$unwind: "$product_counts"
},
{
$group: {
_id: "$product_counts.product",
ProductPurchasesNumber: { $sum: "$product_counts.count" },
purchases: { $first: "$purchases"},
totalProductPurchases: { $first: "$totalProductPurchases"},
total: { $first: "$total"},
purchasesCount: { $first: "$purchasesCount"}
}
},
{
$sort: { count: -1 }
},
{
$limit: 3
},
{
$group: {
_id: null,
top3Products: { $push: { product: "$_id", ProductPurchasesNumber: "$ProductPurchasesNumber" } },
total: { $first: "$total" },
totalProductPurchases: { $first: "$totalProductPurchases" },
purchasesCount: { $first: "$purchasesCount" },
purchases: { $first: "$purchases" }
}
},
{
$addFields: {
purchasesArrayLength: { $size: "$purchases" },
filteredPurchases: {
$filter: {
input: "$purchases",
as: "purchase",
cond: { $ne: [ "$$purchase", false ] }
}
}
},
},
{
$project: {
purchases: 0
}
}
])
How can I avoid these kinds of errors? Thank you.
If you only want to retrieve records matching dates in the client-side timezone, send the dates in the API call.
For example, on the React side
// These will be local date instances
const startDate = new Date(2023, 1, 1); // Start of day Feb 1st
const endDate = new Date(2023, 2, 1, 0, 0, -1); // End of day Feb 28
const params = new URLSearchParams({
startDate: startDate.toISOString(),
endDate: endDate.toISOString(),
});
fetch(`/api/purchases?${params}`)
.then(...);
and on the server-side, parse the dates from the query string and use them in your MongoDB aggregate query
const startDate = new Date(req.query.startDate);
const endDate = new Date(req.query.endDate);
Your requirements and questions are not really clear to me, but I would write the conditions like this:
{
$cond: [
{
$eq: [
{ $dateTrunc: { date: "$createdAt", unit: 'month', timezone: 'America/New_York' } },
DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()
]
},
...
]
}
or
{
$cond: [
{
$and: [
{ $gte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()] },
{ $lte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).endOf('month').toJSDate()] }
]
},
...
]
}
I prefer luxon over Day.js, I think the same function are also available in Day.js.

MongoDB fill missing dates in aggregation pipeline

I have this pipeline :
let pipeline = [
{
$match: {
date: { $gte: new Date("2022-10-19"), $lte: new Date("2022-10-26") },
},
},
{
$group: {
_id: "$date",
tasks: { $push: "$$ROOT" },
},
},
{
$sort: { _id: -1 },
},
];
const aggregationData = await ScheduleTaskModel.aggregate(pipeline);
where i group all "tasks" between a date range by date and i get that result :
[
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
}
]
as you see i have "tasks" only for 2 dates in that range,what if i want all dates to appear even the ones with no tasks so it would be like this with empty arrays ?
[
{
"date": "2022-10-26T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-25T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-24T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-23T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-22T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-19T00:00:00.000Z",
"tasks": []
},
]
i tried to use $densify but unfortunately it requires upgrading my mongoDb atlas cluster which is not possible..
The answer of #WernfriedDomscheitAnother has a downside of grouping together all the documents in the collection, creating one large document, while a document has a size limit. A variation on it, without this downside, can be:
$match only the relevant document, same as in your current query
Use $facet to handle the case of no relevant documents at all. This will allow you to group all the relevant documents as you did in your query, but to keep a working-document even if there are any.
Add the relevant dates inside an array (since we use $facet this will happen even if the first match is empty)
Concatenate the array of matched data with the array of empty entries, use the real-data first.
$unwind the separate the documents by date, and $group again by date to remove the duplicates.
Format the result
db.collection.aggregate([
{$match: {date: {$gte: new Date("2022-10-19"), $lte: new Date("2022-10-26")}}},
{$facet: {
data: [
{$group: {_id: "$date", tasks: {$push: "$$ROOT"}}},
{$project: {date: "$_id", tasks: 1}}
]
}},
{$addFields: {
dates: {$map: {
input: {$range: [0, 8]},
// maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}},
tasks: []
}
}}
}},
{$project: {data: {$concatArrays: ["$data", "$dates"]}}},
{$unwind: "$data"},
{$group: {_id: "$data.date", "tasks": {$first: "$data.tasks"}}},
{$project: { _id: 0, date: "$_id", tasks: 1 }},
{$sort: { date: -1 }},
])
See how it works on the playground example
New function $densify would be the simplest, of course. The manual way of doing it would be this one:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$set: {
dates: {
$map: {
input: { $range: [ 0, 8 ] }, // maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {
$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}
}
}
}
}
}
},
{
$set: {
dates: {
$map: {
input: "$dates",
as: "d",
in: {
$mergeObjects: [
"$$d",
{
tasks: {
$filter: {
input: "$data",
cond: { $eq: [ "$$d.date", "$$this.date" ] }
}
}
}
]
}
}
}
}
},
{
$project: {
data: {
$map: {
input: "$dates",
in: {
$cond: {
if: { $eq: [ "$$this.tasks", [] ] },
then: "$$this",
else: { $first: "$$this.tasks" }
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceWith: "$data" }
])
Mongo Playground

Mongodb time difference check

here's my data in a mongodb document
[
{
startTime: "08:00",
endTime: "09:00",
id: 1
},
{
startTime: "08:10",
endTime: "09:00",
id: 2
},
{
startTime: "10:00",
endTime: "11:00",
id: 3
}
]
I need to find a way to get time overlapping elements using mongoose with node.js
I trid using $dateFromString but no luck so far.
for example item id 1 and 2 has time overlap
db.collection.aggregate([
{
$match: {}
},
{
$setWindowFields: {
partitionBy: "",
sortBy: { startTime: 1 },
output: {
previous_endTime: {
$shift: {
output: "$endTime",
by: -1,
default: "Not available"
}
}
}
}
},
{
$set: {
c: {
$cond: {
if: {
$and: [
{ $gte: [ "$previous_endTime", "$endTime" ] },
{ $gte: [ "$previous_endTime", "$startTime" ] }
]
},
then: 0,
else: 1
}
}
}
},
{
$setWindowFields: {
partitionBy: "",
sortBy: { startTime: 1 },
output: {
c: {
$sum: "$c",
window: { documents: [ "unbounded", "current" ] }
}
}
}
},
{
$group: {
_id: "$c",
c: { $sum: 1 },
id_List: { $push: "$id" }
}
},
{
$match: { c: { $gt: 1 } }
}
])
mongoplayground

aggregate data by date interval

This is how I'm doing an aggregation query to get the average time between start and end time (both in the format ISODate("2020-02-24T13:08:00.123Z")).
But I need to split the result data into two groups as I need to get the average data for all datasets with start time 04/2019 - 09/2020 and the second group all data with start time 10/2019 - 04/2020.
I don't get it how to group by these two interval for an ISODate value
const data = await Data.aggregate([
{
$match: {
type: { $exists: true },
statsIgnore: { $exists: false }
}
},
{
$group: {
_id: '$type',
Datasets: { $sum: 1 },
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}
}
]).toArray()
My data structure
[
{
_id: ObjectId("5d9242cf863feb0b8d70d12e"),
timeStart: ISODate("2020-02-24T13:08:00.123Z"),
timeEnd: ISODate("2020-02-24T13:18:00.123Z"),
type: 'type1'
},
{
_id: ObjectId("5d9242cf863feb0b8d70d12f"),
timeStart: ISODate("2019-08-29T17:05:00.123Z"),
timeEnd: ISODate("2019-08-29T17:25:00.123Z"),
type: 'type1'
}
]
In this simple data example there is only one type with a single dataset for summer and a single dataset for winter interval.
So the result should be 10 minutes average for winter and 20 minutes average for summer (for type1 group).
The approach I took is to check that the timeStart is in the range you're looking for in the initial $match stage. Then I added an $addFields stage that checks if the season is summer based on the start date. Then I grouped by my new summer field.
[
{$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
},
timeStart: {
$gte: ISODate("2019-04-01T00:00:00Z"),
$lt: ISODate("2020-04-01T00:00:00Z")
}
}},
{$addFields: {
summer: { $lt: ["$timeStart", ISODate("2019-09-01T00:00:00Z")]}
}},
{$group: {
_id: "$summer",
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}}]
Check if this meets your requirements:
db.data.aggregate([
{
$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
}
}
},
{
$group: {
_id: {
type: "$type",
season: {
$arrayElemAt: [
[
"None",
"Winter",
"Winter",
"Spring",
"Spring",
"Spring",
"Summer",
"Summer",
"Summer",
"Autumn",
"Autumn",
"Autumn",
"Winter"
],
{
$month: "$timeStart"
}
]
}
},
Datasets: {
$sum: 1
},
Average: {
$avg: {
$divide: [
{
$subtract: [
"$timeEnd",
"$timeStart"
]
},
60000
]
}
}
}
}
])
MongoPlayground

Categories

Resources