MongoDB Conditional Projection based on existence of query of subdocument in Array - javascript

I have a schema in which properties can have respective "override" documents stored in an Array("overrides")
E.g.
{
foo:'original foo',
overrides: [
{property:'foo', value:'foo override'},
{property:'bar', value:'bar override'},
]
}
I want to project a field for the override value if it exists, otherwise, the original property.
So something like this
project: { overrideOrOriginal: {$cond: fooOverrideExists ? fooOverrideValue : originalFooValue }
So in this example, I would expect overrideOrOriginal to equal 'foo override' . If - {property:'foo', value:'foo override'} subDoc didn't exist in the overrides array (or if overrides array itself didn't even exist)...then I'd expect overrideOrOriginal = 'original foo'
How can I do this?
I was thinking I'd need $exists in tandem with $cond. But the complication here is that I'm searching for a subDoc in an Array based on a query
Thanks!

$ifNull to check if field is null then return empty array
$in to check "foo" is in overrides.property array
$indexOfArray to get index of array element in overrides.property array
$arrayElemAt to get element by specific index return from above operator
let fooOverrideExists = "foo";
db.collection.find({},
{
overrideOrOriginal: {
$cond: [
{
$in: [
fooOverrideExists,
{ $ifNull: ["$overrides.property", []] }
]
},
{
$arrayElemAt: [
"$overrides.value",
{ $indexOfArray: ["$overrides.property", fooOverrideExists] }
]
},
"$foo"
]
}
})
Playground

Query
find the property , key-value(kv) (it works for all property names)
(assumes your schema with only string value the value of that property)
checks if that it exists in the overrides array
if it exists, takes the value from the array
else keeps the original
*checks also cases where override doesnt exists, or its empty array, or property doesn't exist
*in case you want to do it only for a specific "foo" ignore the big first $set and use this code
Test code here
db.collection.aggregate([
{
"$set": {
"kv": {
"$arrayElemAt": [
{
"$filter": {
"input": {
"$objectToArray": "$$ROOT"
},
"cond": {
"$eq": [
{
"$type": "$$this.v"
},
"string"
]
}
}
},
0
]
}
}
},
{
"$set": {
"index": {
"$indexOfArray": [
"$overrides.property",
"$kv.k"
]
}
}
},
{
"$project": {
"_id": 0,
"overrideOrOriginal": {
"$cond": [
{
"$or": [
{
"$eq": [
"$index",
-1
]
},
{
"$not": [
"$overrides"
]
}
]
},
"$kv.v",
{
"$arrayElemAt": [
"$overrides.value",
"$index"
]
}
]
}
}
}
])

Related

Convert $objectToArray map element to String

I have a collection in database that I am trying to retrieve some data from it , the query is working fine when $orderID has string elements , but is failing when $orderID has some numbers in array , and it is throwing
query failed: (Location40395) PlanExecutor error during aggregation :: caused by :: $arrayToObject requires an array of key-value pairs, where the key must be of type string. Found key type: double
I think there must be some old data when we were saving orderID as a number so that is why it is failing from some range of dates
Query
{
"Order_Details": {
"$map": {
"input": {
"$objectToArray": {
"$arrayToObject": {
"$zip": {
"inputs": [
"$orderID",
"$total_value_of_order"
]
}
}
}
},
"as": "el",
"in": {
"orderID": "$$el.k",
"total_value_of_order": "$$el.v"
}
}
}
}
I am trying to typecast el.k to string I am using $toString but can't seem to work , the way I am trying it is
{
"as": "el",
"in": {
"orderID": {
"$toString": "$$el.k"
},
"total_value_of_order": "$$el.v"
}
}
Example collection
[
{
"_id": ObjectId("5e529ee5f8647eb59e5620a2"),
"visitID": "dVmy7flXFHzzkn9HiMt8IoWvthoTZW",
"date": ISODate("2022-02-08T16:29:13.413Z"),
"control": true,
"orderID": [
122343242
],
"target": "test",
"total_value_of_order": [
60
]
}
]
You are close, the approach is fine. you just have a couple of syntax issues.
The major thing that needs to change is the input for $arrayToObject, currently your input looks like this:
[[number, number], [number, number]]
However $arrayToObject expects input in a certain format:
[{k: string, v: value}]
So this it what we'll add, like so:
db.collection.aggregate([
{
$project: {
"Order_Details": {
"$map": {
"input": {
"$objectToArray": {
"$arrayToObject": {
$map: {
input: {
"$zip": {
"inputs": [
"$orderID",
"$total_value_of_order"
]
}
},
in: {
k: {
$toString: {
"$arrayElemAt": [
"$$this",
0
]
}
},
v: {
"$arrayElemAt": [
"$$this",
1
]
}
}
}
}
}
},
"as": "el",
"in": {
"orderID": "$$el.v",
"total_value_of_order": "$$el.k"
}
}
}
}
}
])
Mongo Playground
Notice the "orderid" format changes to string which affects it's structure, I recommend just switching between the k and v in the pipeline, like this

How to avoid time gaps while aggregating OHLC in mongoDB

I am trying to extract ohlc 5 min interval from a 1 min interval DB stored in mongoDB. Below is my current query.
myModel.aggregate([
{"$project":
{
"data":
{
"$let":
{
"vars":
{
"mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
"maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
},
"in":
{
"$map":
{
"input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
"as":"rge",
"in":
{
"$let":
{
"vars":
{
"five":
{
"$filter":
{
"input":"$data",
"as":"fres",
"cond":
{
"$and":
[
{"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
{"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
]
}
}
}
},
"in":
[
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
{"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
{"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
]
}
}
}
}
}
}
}
}
]);
It seem to extract the 5 min but not taking care of gaps in 1 min interval data. Instead for those time instants, I am getting null array. How do we avoid null arrays?
Sample 1 DB data:
https://gist.github.com/parthi2929/36e6898cff7be45ccdd008ec750e70e9
5 min extracted output snapshot is here
As you can see in snapshot, I get lot of null arrays. How do I avoid them?
I tried inserting { "$ne":[{"$arrayElemAt":["$$fres",0]},null] } in $and operator, but it did not help.
Update 14th Feb 2018: As per Veeram's suggestion, below is the modified code incorporated with the suggested changes. However, I still get one empty array (that is apparently many empty arrays in that time gap is now gone, but replaced by single empty array) which should also be fixed.
db.getCollection('ohlc-koinex-1').aggregate(
[
{"$project":
{
"data":
{
"$let":
{
"vars":
{
"mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
"maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
},
"in":
{
"$setDifference":
[
{
"$map":
{
"input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
"as":"rge",
"in":
{
"$let":
{
"vars":
{
"five":
{
"$filter":
{
"input":"$data",
"as":"fres",
"cond":
{
"$and":
[
{"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
{"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
]
}
}
}
},
"in":
{
"$cond":[
{"$eq":["$$five",[]]},
"$$five",
[
{"$arrayElemAt": [{"$arrayElemAt":["$$five",-1]},0]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
{"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
{"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
]
]
}
}
}
}
},[]
]
}
}
}
}
}
]
)
Here is the snapshot of the result
You can add $cond operator to account for gaps followed by $filter to filter the empty array values.
You require two changes.
The first change to store [] values for gaps instead of array with null values.
Update inner $let expression to below:
{
"$let":{
"vars":{"five":...},
"in":{
"$cond":[
{"$eq":["$$five",[]]},
"$$five",
[{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
....
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}]
]
}
}
}
The second change to filter the empty array values from the output.
{
"$project":{
"data":{
"$let":{
"vars":{"mints":...},
"in":{"$filter":{"input":{"$map":...},as:"flr", "cond":{"$ne":["$$flr",[]]}}}
}
}
}
}

How to retrieve documents with conditioning an array of nested objects?

The structure of the objects stored in mongodb is the following:
obj = {_id: "55c898787c2ab821e23e4661", ingredients: [{name: "ingredient1", value: "70.2"}, {name: "ingredient2", value: "34"}, {name: "ingredient3", value: "15.2"}, ...]}
What I would like to do is retrieve all documents, which value of specific ingredient is greater than arbitrary number.
To be more specific, suppose we want to retrieve all the documents which contain ingredient with name "ingredient1" and its value is greater than 50.
Trying the following I couldn't retrieve desired results:
var collection = db.get('docs');
var queryTest = collection.find({$where: 'this.ingredients.name == "ingredient1" && parseFloat(this.ingredients.value) > 50'}, function(e, docs) {
console.log(docs);
});
Does anyone know what is the correct query to condition upon specific array element names and values?
Thanks!
You really don't need the JavaScript evaluation of $where here, just use basic query operators with an $elemMatch query for the array. While true that the "value" elements here are in fact strings, this is not really the point ( as I explain at the end of this ). The main point is to get it right the first time:
collection.find(
{
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
},
{ "ingredients.$": 1 }
)
The $ in the second part is the postional operator, which projects only the matched element of the array from the query conditions.
This is also considerably faster than the JavaScript evaluation, in both that the evaluation code does not need to be compiled and uses native coded operators, as well as that an "index" can be used on the "name" and even "value" elements of the array to aid in filtering the matches.
If you expect more than one match in the array, then the .aggregate() command is the best option. With modern MongoDB versions this is quite simple:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$eq": [ { "$ifNull": [ "$name", "ingredient1" ] }, "ingredient1" ] },
{ "$gt": [ { "$ifNull": [ "$value", 60 ] }, 50 ] }
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
])
And even simplier in forthcoming releases which introduce the $filter operator:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$project": {
"ingredients": {
"$filter": {
"input": "$ingredients",
"as": "ingredient",
"cond": {
"$and": [
{ "$eq": [ "$$ingredient.name", "ingredient1" ] },
{ "$gt": [ "$$ingredient.value", 50 ] }
]
}
}
}
}}
])
Where in both cases you are effectively "filtering" the array elements that do not match the conditions after the initial document match.
Also, since your "values" are actually "strings" right now, you reaally should change this to be numeric. Here is a basic process:
var bulk = collection.initializeOrderedBulkOp(),
count = 0;
collection.find().forEach(function(doc) {
doc.ingredients.forEach(function(ingredient,idx) {
var update = { "$set": {} };
update["$set"]["ingredients." + idx + ".value"] = parseFloat(ingredients.value);
bulk.find({ "_id": doc._id }).updateOne(update);
count++;
if ( count % 1000 != 0 ) {
bulk.execute();
bulk = collection.initializeOrderedBulkOp();
}
})
]);
if ( count % 1000 != 0 )
bulk.execute();
And that will fix the data so the query forms here work.
This is much better than processing with JavaScript $where which needs to evaluate every document in the collection without the benefit of an index to filter. Where the correct form is:
collection.find(function() {
return this.ingredients.some(function(ingredient) {
return (
( ingredient.name === "ingredient1" ) &&
( parseFloat(ingredient.value) > 50 )
);
});
})
And that can also not "project" the matched value(s) in the results as the other forms can.
Try using $elemMatch:
var queryTest = collection.find(
{ ingredients: { $elemMatch: { name: "ingredient1", value: { $gte: 50 } } } }
);

How to Sort by Weighted Values

I have this problem that I want to sort the result of a query based on the field values from another collection,
Problem: I want to first get the user 123 friends and then get their posts and then sort the post with the friends strength value,
I have this :
POST COLLECTON:
{
user_id: 8976,
post_text: 'example working',
}
{
user_id: 673,
post_text: 'something',
}
USER COLLECTON:
{
user_id: 123,
friends: {
{user_id: 673,strength:4}
{user_id: 8976,strength:1}
}
}
Based on the information you have retrieved from your user you essentially want to come out to an aggregation framework query that looks like this:
db.posts.aggregate([
{ "$match": { "user_id": { "$in": [ 673, 8976 ] } } },
{ "$project": {
"user_id": 1,
"post_text": 1,
"weight": {
"$cond": [
{ "$eq": [ "$user_id", 8976 ] },
1,
{ "$cond": [
{ "$eq": [ "$user_id", 673 ] },
4,
0
]}
]
}
}},
{ "$sort": { "weight": -1 } }
])
So why aggregation when this does not aggregate? As you can see, the aggregation framework does more than just aggregate. Here it is being used to "project" a new field into the document an populate it with a "weight" to sort on. This allows you to get the results back ordered by the value you want them to be sorted on.
Of course, you need to get from your initial data to this form in a "generated" way that you do do for any data. This takes a few steps, but here I'll present the JavaScript way to do it, which should be easy to convert to most languages
Also presuming your actual "user" looks more like this, which would be valid:
{
"user_id": 123,
"friends": [
{ "user_id": 673, "strength": 4 },
{ "user_id": 8976, "strength": 1 }
]
}
From an object like this you then construct the aggregation pipeline:
// user is the structure shown above
var stack = [];
args = [];
user.friends.forEach(function(friend) {
args.push( friend.user_id );
var rec = {
"$cond": [
{ "$eq": [ "user_id", friend.user_id ] },
friend.strength
]
};
if ( stack.length == 0 ) {
rec["$cond"].push(0);
} else {
var last = stack.pop();
rec["$cond"].push( last );
}
stack.push( rec );
});
var pipeline = [
{ "$match": { "user_id": { "$in": args } } },
{ "$project": {
"user_id": 1,
"post_text": 1,
"weight": stack[0]
}},
{ "$sort": { "weight": -1 } }
];
db.posts.aggregate(pipeline);
And that is all there is to it. Now you have some code to go through the list of "friends" for a user and construct another query to get all posts from those friends weighted by the "strength" value for each.
Of course you could do much the same things with a query for all posts by just removing or changing the $match, but keeping the "weight" projection you can "float" all of the "friends" posts to the top.

Mongodb find() only include non-empty arrays

What I'm trying to achieve with a find query is to only include "someArray"s if it's inner array is not empty. For example the JSON below:
{
"document": "some document",
"someArray": [
{
"innerArray": [
"not empty"
]
},
{
"innerArray": [
[] //empty
]
}
]
}
Would return this:
{
"document": "some document",
"someArray": [
{
"innerArray": [
"not empty"
]
}
]
}
I'm using the following find:
Visit.find({'someArray.innerArray.0': {$exists: true}}, function(err, data){});
However, this returns all data.
Have also tried:
Visit.find({}, {'someArray.innerArray': {$gt: 0}}, function(err, data) {});
But this returns nothing
Any ideas on how to approach this?
Cheers
The general case here to check for a non-empty array is to check to see if the "first" element actually exists. For single matches you can project with the positional $ operator:
Vist.find(
{ "someArray.innerArray.0": { "$exists": true } },
{ "document": 1,"someArray.$": 1},
function(err,data) {
}
);
If you need more than a single match or have arrays nested more deeply than this, then the aggregation framework is what you need to handle the harder projection and/or "filter" the array results for more than one match:
Visit.aggregate(
[
// Match documents that "contain" the match
{ "$match": {
"someArray.innerArray.0": { "$exists": true }
}},
// Unwind the array documents
{ "$unwind": "$someArray" },
// Match the array documents
{ "$match": {
"someArray.innerArray.0": { "$exists": true }
}},
// Group back to form
{ "$group": {
"_id": "$_id",
"document": { "$first": "$document" },
"someArray": { "$push": "$someArray" }
}}
],function(err,data) {
}
)
Worth noting here that you are calling this "empty" but in fact is is not, as it actually contains another empty array. You probably don't want to do that with real data, but if you have then you would need to filter like this:
Visit.aggregate(
[
{ "$match": {
"someArray": { "$elemMatch": { "innerArray.0": { "$ne": [] } } }
}},
{ "$unwind": "$someArray" },
{ "$match": {
"someArray.innerArray.0": { "$ne": [] }
}},
{ "$group": {
"_id": "$_id",
"document": { "$first": "$document" },
"someArray": { "$push": "$someArray" }
}}
],function(err,data) {
}
);

Categories

Resources