MongoDB fill missing dates in aggregation pipeline - javascript

I have this pipeline :
let pipeline = [
{
$match: {
date: { $gte: new Date("2022-10-19"), $lte: new Date("2022-10-26") },
},
},
{
$group: {
_id: "$date",
tasks: { $push: "$$ROOT" },
},
},
{
$sort: { _id: -1 },
},
];
const aggregationData = await ScheduleTaskModel.aggregate(pipeline);
where i group all "tasks" between a date range by date and i get that result :
[
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
}
]
as you see i have "tasks" only for 2 dates in that range,what if i want all dates to appear even the ones with no tasks so it would be like this with empty arrays ?
[
{
"date": "2022-10-26T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-25T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-24T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-23T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-22T00:00:00.000Z",
"tasks": []
},
{
"date": "2022-10-21T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-20T00:00:00.000Z",
"tasks": [...tasks with this date]
},
{
"date": "2022-10-19T00:00:00.000Z",
"tasks": []
},
]
i tried to use $densify but unfortunately it requires upgrading my mongoDb atlas cluster which is not possible..

The answer of #WernfriedDomscheitAnother has a downside of grouping together all the documents in the collection, creating one large document, while a document has a size limit. A variation on it, without this downside, can be:
$match only the relevant document, same as in your current query
Use $facet to handle the case of no relevant documents at all. This will allow you to group all the relevant documents as you did in your query, but to keep a working-document even if there are any.
Add the relevant dates inside an array (since we use $facet this will happen even if the first match is empty)
Concatenate the array of matched data with the array of empty entries, use the real-data first.
$unwind the separate the documents by date, and $group again by date to remove the duplicates.
Format the result
db.collection.aggregate([
{$match: {date: {$gte: new Date("2022-10-19"), $lte: new Date("2022-10-26")}}},
{$facet: {
data: [
{$group: {_id: "$date", tasks: {$push: "$$ROOT"}}},
{$project: {date: "$_id", tasks: 1}}
]
}},
{$addFields: {
dates: {$map: {
input: {$range: [0, 8]},
// maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}},
tasks: []
}
}}
}},
{$project: {data: {$concatArrays: ["$data", "$dates"]}}},
{$unwind: "$data"},
{$group: {_id: "$data.date", "tasks": {$first: "$data.tasks"}}},
{$project: { _id: 0, date: "$_id", tasks: 1 }},
{$sort: { date: -1 }},
])
See how it works on the playground example

New function $densify would be the simplest, of course. The manual way of doing it would be this one:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$set: {
dates: {
$map: {
input: { $range: [ 0, 8 ] }, // maybe more dynamic with $dateDiff -> { $dateDiff: { startDate: new Date("2022-10-19"), endDate: new Date("2022-10-26") }, unit: "day" } }
in: {
date: {
$dateAdd: {
startDate: ISODate("2022-10-19T00:00:00.000Z"),
unit: "day",
amount: "$$this"
}
}
}
}
}
}
},
{
$set: {
dates: {
$map: {
input: "$dates",
as: "d",
in: {
$mergeObjects: [
"$$d",
{
tasks: {
$filter: {
input: "$data",
cond: { $eq: [ "$$d.date", "$$this.date" ] }
}
}
}
]
}
}
}
}
},
{
$project: {
data: {
$map: {
input: "$dates",
in: {
$cond: {
if: { $eq: [ "$$this.tasks", [] ] },
then: "$$this",
else: { $first: "$$this.tasks" }
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceWith: "$data" }
])
Mongo Playground

Related

Timezone Issue with DayJS React and MongoDB

In each record that I save to MongoDB, I execute it with Date.noe, example:
createdAt: {
type: Date,
default: Date.now
}
So in mongoDB I have the record saved as follows:
createdAt: 2023-02-01T01:39:03.377+00:00
In React I get the records and in this way I show the date using DayJS:
dayjs(createdAt).format('DD/MM/YY')
But the problem is that the date indicates that the registration was made on 02-2023-01 but when using DayJS the date changes to 01-31-2023
Changing the day decreases the record by one day and this is causing me conflicts since they are about financial records.
On my server I don't have any time stamp, I use MongoDB from Atlas (cloud.mongodb)
This is my query to mongoDB where when requesting the information for the month of February, it correctly returns this, but in React if the record is from February 1, 2023, it shows me February 31, 2023:
db.purchases.aggregate([
{
$match: {
"detail.category._id": ObjectId("63bf4d0b10dfcae061b6eab0")
}
},
{
$unwind: "$detail"
},
{
$group: {
_id: "$detail.category._id",
total: { $sum: "$total" },
totalProductPurchases: { $sum: "$detail.quantity" },
purchasesCount: { $sum: 1 },
purchases: {
$push: {
$cond: [
{
$and: [
{ $eq: [{ $year: "$createdAt" }, 2023] },
{ $eq: [{ $month: "$createdAt" }, 2] },
{ $gte: [{ $dayOfMonth: "$createdAt" }, 1] },
{ $lte: [{ $dayOfMonth: "$createdAt" }, 31] }
]
},
{
purchaseId: "$_id",
month: { $month: "$createdAt" },
isoWeek: { $isoDayOfWeek: "$createdAt" },
dayOfMonth: { $dayOfMonth: "$createdAt" },
createdAt: "$createdAt",
total: "$total",
subtotal: "$subtotal",
tax: "$tax",
discount: "$discount",
totalBeforeTax: "$totalBeforeTax",
createdAt: "$createdAt",
paymentType: "$paymentType"
},
false
]
}
},
product_counts: {
$push: {
product: "$detail.name",
count: "$detail.quantity"
} },
}
},
{
$unwind: "$product_counts"
},
{
$group: {
_id: "$product_counts.product",
ProductPurchasesNumber: { $sum: "$product_counts.count" },
purchases: { $first: "$purchases"},
totalProductPurchases: { $first: "$totalProductPurchases"},
total: { $first: "$total"},
purchasesCount: { $first: "$purchasesCount"}
}
},
{
$sort: { count: -1 }
},
{
$limit: 3
},
{
$group: {
_id: null,
top3Products: { $push: { product: "$_id", ProductPurchasesNumber: "$ProductPurchasesNumber" } },
total: { $first: "$total" },
totalProductPurchases: { $first: "$totalProductPurchases" },
purchasesCount: { $first: "$purchasesCount" },
purchases: { $first: "$purchases" }
}
},
{
$addFields: {
purchasesArrayLength: { $size: "$purchases" },
filteredPurchases: {
$filter: {
input: "$purchases",
as: "purchase",
cond: { $ne: [ "$$purchase", false ] }
}
}
},
},
{
$project: {
purchases: 0
}
}
])
How can I avoid these kinds of errors? Thank you.
If you only want to retrieve records matching dates in the client-side timezone, send the dates in the API call.
For example, on the React side
// These will be local date instances
const startDate = new Date(2023, 1, 1); // Start of day Feb 1st
const endDate = new Date(2023, 2, 1, 0, 0, -1); // End of day Feb 28
const params = new URLSearchParams({
startDate: startDate.toISOString(),
endDate: endDate.toISOString(),
});
fetch(`/api/purchases?${params}`)
.then(...);
and on the server-side, parse the dates from the query string and use them in your MongoDB aggregate query
const startDate = new Date(req.query.startDate);
const endDate = new Date(req.query.endDate);
Your requirements and questions are not really clear to me, but I would write the conditions like this:
{
$cond: [
{
$eq: [
{ $dateTrunc: { date: "$createdAt", unit: 'month', timezone: 'America/New_York' } },
DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()
]
},
...
]
}
or
{
$cond: [
{
$and: [
{ $gte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).startOf('month').toJSDate()] },
{ $lte: ["$createdAt", DateTime.local({ zone: "America/New_York" }).endOf('month').toJSDate()] }
]
},
...
]
}
I prefer luxon over Day.js, I think the same function are also available in Day.js.

Creating a structure using an aggregation query that groups by 2 ids

I have a collection of various documents similar to what is shown below as 3 objects.
{
comment:{
text_sentiment: "positive",
topic: "A"
}
}, // DOC-1
{
comment:{
text_sentiment: "negative",
topic: "A"
}}, // DOC-2
{
comment:{
text_sentiment: "positive",
topic: "B"
}},..//DOC-3 ..
I want to write an aggregation that returns results in the following structure:
{
topic: "A",
topicOccurance: 2,
sentiment: {
positive: 3,
negative: 2,
neutral: 0
}
},...
I have written an aggregation that is able to group for topic and text_sentiment but I do not know, how can I create a structure similar to the one shown above. Here is the aggregation that I created.
db.MyCollection.aggregate({
$match: {
_id: "xyz",
"comment.topic": {$exists: 1},
}
},{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {$sum: 1},
}
},{
$project: {
topic: {
name: "$_id.topic",
occurence: "$total"
},
sentiment: "$_id.text_sentiment"
}
},{
$sort: {"topic.occurence": -1}
})
It grouped by topic and sentiment, but the structure does not match the one above. How can I get a similar structure?
Answer 1
You need 2 $group stages.
$match
$group - Group by comment.topic and comment.topic and $sum.
$group - Group by _id.topic, $sum; and add text_sentiment and total from previous stage into text_sentiments via $push.
$project - Decorate output documents. Set sentiment with converting from text_sentiments array to key-value pair via $arrayToObject.
$sort
db.collection.aggregate([
{
$match: {
_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {
$sum: 1
},
}
},
{
$group: {
_id: "$_id.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: {
k: "$_id.text_sentiment",
v: "$total"
}
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"$arrayToObject": "$text_sentiments"
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
Sample Mongo Playground (Answer 1)
Answer 2
As mentioned text_sentiment values are fixed, you can use the query below:
db.collection.aggregate([
{
$match: {
_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: "$comment.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: "$comment.text_sentiment"
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"positive": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"positive"
]
},
"then": 1,
"else": 0
}
}
]
}
}
},
"negative": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"negative"
]
},
"then": 1,
"else": 0
}
}
]
}
}
},
"neutral": {
$reduce: {
input: "$text_sentiments",
initialValue: 0,
in: {
$sum: [
"$$value",
{
"$cond": {
"if": {
$eq: [
"$$this",
"neutral"
]
},
"then": 1,
"else": 0
}
}
]
}
}
}
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
Disadvantage: When the text_sentiment value is added/removed, then you have to modify the query.
Sample Mongo Playground (Answer 2)
Answer 3
Another approach similar to Answer 2 is using $size and $filter to replace $reduce.
db.collection.aggregate([
{
$match: {
//_id: "xyz",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: "$comment.topic",
total: {
$sum: 1
},
text_sentiments: {
$push: "$comment.text_sentiment"
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"positive": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"positive"
]
}
}
}
},
"negative": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"negative"
]
}
}
}
},
"neutral": {
$size: {
$filter: {
input: "$text_sentiments",
cond: {
$eq: [
"$$this",
"neutral"
]
}
}
}
},
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
Sample Mongo Playground (Answer 3)

Mongodb, Verify if my query range is not overlapping a list of Date Ranges in mongodb

I have the following data:
const Availabilities = new Schema({
id: Number,
reservations: [{ from: Date, to: Date }]
});
const Availability = mongoose.model('Availability', Availabilities);
Basically i want to get a list of products in which the dates do not overlap with the query
[
{
id: 1,
reservations: [
{
from: '2022-01-01',
to: '2022-03-31',
},
{
from: '2022-04-01',
to: '2022-06-01',
},
],
},
{
id: 2,
reservations: [
{
from: '2022-01-01',
to: '2022-12-31',
},
],
},
{
id: 3,
reservations: [
{
from: '2022-02-01',
to: '2022-06-30',
},
],
},
]
and I want to filter all those who are not in a specific range.
Example:
query = { from: "2022-07-01", to: "2022-08-31" }
should return
[
{
id: 1,
reservations: [
{
from: '2022-01-01',
to: '2022-03-31',
},
{
from: '2022-04-01',
to: '2022-06-01',
},
],
},
{
id: 3,
reservations: [
{
from: '2022-02-01',
to: '2022-06-30',
},
],
},
]
You can just check if one of two conditions are met, either:
to is smaller than the start of your input ( meaning the session ended before )
from is bigger than the end of your input ( meaning the session started after )
This is how it'll look in code:
db.collection.aggregate([
{
$match: {
reservations: {
$elemMatch: {
$or: [
{
to: {
$lt: "2022-07-01"
},
},
{
from: {
$gt: "2022-08-31"
}
}
]
}
}
}
},
{
$addFields: {
reservations: {
$filter: {
input: "$reservations",
cond: {
$or: [
{
$lt: [
"$$this.to",
"2022-07-01"
]
},
{
$gt: [
"$$this.to",
"2022-08-31"
]
}
]
}
}
}
}
}
])
Mongo Playground

aggregate data by date interval

This is how I'm doing an aggregation query to get the average time between start and end time (both in the format ISODate("2020-02-24T13:08:00.123Z")).
But I need to split the result data into two groups as I need to get the average data for all datasets with start time 04/2019 - 09/2020 and the second group all data with start time 10/2019 - 04/2020.
I don't get it how to group by these two interval for an ISODate value
const data = await Data.aggregate([
{
$match: {
type: { $exists: true },
statsIgnore: { $exists: false }
}
},
{
$group: {
_id: '$type',
Datasets: { $sum: 1 },
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}
}
]).toArray()
My data structure
[
{
_id: ObjectId("5d9242cf863feb0b8d70d12e"),
timeStart: ISODate("2020-02-24T13:08:00.123Z"),
timeEnd: ISODate("2020-02-24T13:18:00.123Z"),
type: 'type1'
},
{
_id: ObjectId("5d9242cf863feb0b8d70d12f"),
timeStart: ISODate("2019-08-29T17:05:00.123Z"),
timeEnd: ISODate("2019-08-29T17:25:00.123Z"),
type: 'type1'
}
]
In this simple data example there is only one type with a single dataset for summer and a single dataset for winter interval.
So the result should be 10 minutes average for winter and 20 minutes average for summer (for type1 group).
The approach I took is to check that the timeStart is in the range you're looking for in the initial $match stage. Then I added an $addFields stage that checks if the season is summer based on the start date. Then I grouped by my new summer field.
[
{$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
},
timeStart: {
$gte: ISODate("2019-04-01T00:00:00Z"),
$lt: ISODate("2020-04-01T00:00:00Z")
}
}},
{$addFields: {
summer: { $lt: ["$timeStart", ISODate("2019-09-01T00:00:00Z")]}
}},
{$group: {
_id: "$summer",
Average: {
$avg: {
$divide: [
{ $subtract: ['$timeEnd', '$timeStart'] },
60000
]
}
}
}}]
Check if this meets your requirements:
db.data.aggregate([
{
$match: {
type: {
$exists: true
},
statsIgnore: {
$exists: false
}
}
},
{
$group: {
_id: {
type: "$type",
season: {
$arrayElemAt: [
[
"None",
"Winter",
"Winter",
"Spring",
"Spring",
"Spring",
"Summer",
"Summer",
"Summer",
"Autumn",
"Autumn",
"Autumn",
"Winter"
],
{
$month: "$timeStart"
}
]
}
},
Datasets: {
$sum: 1
},
Average: {
$avg: {
$divide: [
{
$subtract: [
"$timeEnd",
"$timeStart"
]
},
60000
]
}
}
}
}
])
MongoPlayground

MongoDB $lookup and $match object.key in foreign array

I am doing a $lookup to find 'events' where a customer is an attendee. The list of attendants is an array like this:
attendee: [{customer: <ID>}]
I tried this but it always returns an empty array:
$lookup: {
from: "events",
let: { customer: "$_id" },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$attendee.customer', '$$customer'] },
]
},
}
},
{ $limit: 1 },
{ $sort: {start: -1} },
{ $project: { id: "$_id", start: 1, end: 1, name: 1, host: 1 } },
],
as: "event"
}
You are matching the fields on array so just replace $eq to $in
Your new code will be
$lookup: {
from: "events",
let: { customer: "$_id" },
pipeline: [
{
$match: {
$expr: {
$in: [
"$attendee.customer",
"$$customer"
]
},
}
},
{ $limit: 1 },
{ $sort: {start: -1} },
{ $project: { id: "$_id", start: 1, end: 1, name: 1, host: 1 } },
],
as: "event"
}

Categories

Resources