How to avoid time gaps while aggregating OHLC in mongoDB - javascript

I am trying to extract ohlc 5 min interval from a 1 min interval DB stored in mongoDB. Below is my current query.
myModel.aggregate([
{"$project":
{
"data":
{
"$let":
{
"vars":
{
"mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
"maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
},
"in":
{
"$map":
{
"input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
"as":"rge",
"in":
{
"$let":
{
"vars":
{
"five":
{
"$filter":
{
"input":"$data",
"as":"fres",
"cond":
{
"$and":
[
{"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
{"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
]
}
}
}
},
"in":
[
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
{"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
{"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
]
}
}
}
}
}
}
}
}
]);
It seem to extract the 5 min but not taking care of gaps in 1 min interval data. Instead for those time instants, I am getting null array. How do we avoid null arrays?
Sample 1 DB data:
https://gist.github.com/parthi2929/36e6898cff7be45ccdd008ec750e70e9
5 min extracted output snapshot is here
As you can see in snapshot, I get lot of null arrays. How do I avoid them?
I tried inserting { "$ne":[{"$arrayElemAt":["$$fres",0]},null] } in $and operator, but it did not help.
Update 14th Feb 2018: As per Veeram's suggestion, below is the modified code incorporated with the suggested changes. However, I still get one empty array (that is apparently many empty arrays in that time gap is now gone, but replaced by single empty array) which should also be fixed.
db.getCollection('ohlc-koinex-1').aggregate(
[
{"$project":
{
"data":
{
"$let":
{
"vars":
{
"mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
"maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
},
"in":
{
"$setDifference":
[
{
"$map":
{
"input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
"as":"rge",
"in":
{
"$let":
{
"vars":
{
"five":
{
"$filter":
{
"input":"$data",
"as":"fres",
"cond":
{
"$and":
[
{"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
{"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
]
}
}
}
},
"in":
{
"$cond":[
{"$eq":["$$five",[]]},
"$$five",
[
{"$arrayElemAt": [{"$arrayElemAt":["$$five",-1]},0]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
{"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
{"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
]
]
}
}
}
}
},[]
]
}
}
}
}
}
]
)
Here is the snapshot of the result

You can add $cond operator to account for gaps followed by $filter to filter the empty array values.
You require two changes.
The first change to store [] values for gaps instead of array with null values.
Update inner $let expression to below:
{
"$let":{
"vars":{"five":...},
"in":{
"$cond":[
{"$eq":["$$five",[]]},
"$$five",
[{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
....
{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}]
]
}
}
}
The second change to filter the empty array values from the output.
{
"$project":{
"data":{
"$let":{
"vars":{"mints":...},
"in":{"$filter":{"input":{"$map":...},as:"flr", "cond":{"$ne":["$$flr",[]]}}}
}
}
}
}

Related

Convert $objectToArray map element to String

I have a collection in database that I am trying to retrieve some data from it , the query is working fine when $orderID has string elements , but is failing when $orderID has some numbers in array , and it is throwing
query failed: (Location40395) PlanExecutor error during aggregation :: caused by :: $arrayToObject requires an array of key-value pairs, where the key must be of type string. Found key type: double
I think there must be some old data when we were saving orderID as a number so that is why it is failing from some range of dates
Query
{
"Order_Details": {
"$map": {
"input": {
"$objectToArray": {
"$arrayToObject": {
"$zip": {
"inputs": [
"$orderID",
"$total_value_of_order"
]
}
}
}
},
"as": "el",
"in": {
"orderID": "$$el.k",
"total_value_of_order": "$$el.v"
}
}
}
}
I am trying to typecast el.k to string I am using $toString but can't seem to work , the way I am trying it is
{
"as": "el",
"in": {
"orderID": {
"$toString": "$$el.k"
},
"total_value_of_order": "$$el.v"
}
}
Example collection
[
{
"_id": ObjectId("5e529ee5f8647eb59e5620a2"),
"visitID": "dVmy7flXFHzzkn9HiMt8IoWvthoTZW",
"date": ISODate("2022-02-08T16:29:13.413Z"),
"control": true,
"orderID": [
122343242
],
"target": "test",
"total_value_of_order": [
60
]
}
]
You are close, the approach is fine. you just have a couple of syntax issues.
The major thing that needs to change is the input for $arrayToObject, currently your input looks like this:
[[number, number], [number, number]]
However $arrayToObject expects input in a certain format:
[{k: string, v: value}]
So this it what we'll add, like so:
db.collection.aggregate([
{
$project: {
"Order_Details": {
"$map": {
"input": {
"$objectToArray": {
"$arrayToObject": {
$map: {
input: {
"$zip": {
"inputs": [
"$orderID",
"$total_value_of_order"
]
}
},
in: {
k: {
$toString: {
"$arrayElemAt": [
"$$this",
0
]
}
},
v: {
"$arrayElemAt": [
"$$this",
1
]
}
}
}
}
}
},
"as": "el",
"in": {
"orderID": "$$el.v",
"total_value_of_order": "$$el.k"
}
}
}
}
}
])
Mongo Playground
Notice the "orderid" format changes to string which affects it's structure, I recommend just switching between the k and v in the pipeline, like this

MongoDB Conditional Projection based on existence of query of subdocument in Array

I have a schema in which properties can have respective "override" documents stored in an Array("overrides")
E.g.
{
foo:'original foo',
overrides: [
{property:'foo', value:'foo override'},
{property:'bar', value:'bar override'},
]
}
I want to project a field for the override value if it exists, otherwise, the original property.
So something like this
project: { overrideOrOriginal: {$cond: fooOverrideExists ? fooOverrideValue : originalFooValue }
So in this example, I would expect overrideOrOriginal to equal 'foo override' . If - {property:'foo', value:'foo override'} subDoc didn't exist in the overrides array (or if overrides array itself didn't even exist)...then I'd expect overrideOrOriginal = 'original foo'
How can I do this?
I was thinking I'd need $exists in tandem with $cond. But the complication here is that I'm searching for a subDoc in an Array based on a query
Thanks!
$ifNull to check if field is null then return empty array
$in to check "foo" is in overrides.property array
$indexOfArray to get index of array element in overrides.property array
$arrayElemAt to get element by specific index return from above operator
let fooOverrideExists = "foo";
db.collection.find({},
{
overrideOrOriginal: {
$cond: [
{
$in: [
fooOverrideExists,
{ $ifNull: ["$overrides.property", []] }
]
},
{
$arrayElemAt: [
"$overrides.value",
{ $indexOfArray: ["$overrides.property", fooOverrideExists] }
]
},
"$foo"
]
}
})
Playground
Query
find the property , key-value(kv) (it works for all property names)
(assumes your schema with only string value the value of that property)
checks if that it exists in the overrides array
if it exists, takes the value from the array
else keeps the original
*checks also cases where override doesnt exists, or its empty array, or property doesn't exist
*in case you want to do it only for a specific "foo" ignore the big first $set and use this code
Test code here
db.collection.aggregate([
{
"$set": {
"kv": {
"$arrayElemAt": [
{
"$filter": {
"input": {
"$objectToArray": "$$ROOT"
},
"cond": {
"$eq": [
{
"$type": "$$this.v"
},
"string"
]
}
}
},
0
]
}
}
},
{
"$set": {
"index": {
"$indexOfArray": [
"$overrides.property",
"$kv.k"
]
}
}
},
{
"$project": {
"_id": 0,
"overrideOrOriginal": {
"$cond": [
{
"$or": [
{
"$eq": [
"$index",
-1
]
},
{
"$not": [
"$overrides"
]
}
]
},
"$kv.v",
{
"$arrayElemAt": [
"$overrides.value",
"$index"
]
}
]
}
}
}
])

how to query in mongodb with loop in same collection, until i find null or empty value?

I have stacked in a nested object. here is my collection.
{
"key": 1,
"subKey": ""
},
{
"key": 2,
"subKey": 1
},
{
"key": 3,
"subKey": 2
},
{
"key": 4,
"subKey": 3
}
I want to query Key:4, which gives me result
{
"key": 4,
"subKey": 3
}
after getting result i want to query "subKey": 3 as a key:"$subKey" and i want to run a loop, until i find a empty subKey in our case It is Key:1. and whenever i found an empty subKey i want it document as a parent.
In the end, I want the result
{
"key": 4,
"parent":{"key":1,"subKey":"",....}
}
or similar.
Is it possible by using MongoDB built-in function? if not available how do I achieve this goal?
also, I want an alternative solution for it if there is.
You can achieve using $graphLookup
play
db.collection.aggregate([
{
$graphLookup: {
from: "collection",
startWith: "$key",
connectFromField: "subKey",
connectToField: "key",
as: "keys"
}
}
])
If you want a match filter add it,
play
db.collection.aggregate([
{
$match: {
key: 4
}
},
{
$graphLookup: {
from: "collection",
startWith: "$key",
connectFromField: "subKey",
connectToField: "key",
as: "keys"
}
}
])
Important consideration:
The $graphLookup stage must stay within the 100 MiB memory limit. If allowDiskUse: true is specified for the aggregate() operation, the $graphLookup stage ignores the option
To transform the data, you cannot have duplicate keys in parent object. So parent should be an array
play
db.collection.aggregate([
{
$match: {
key: 4
}
},
{
$graphLookup: {
from: "collection",
startWith: "$key",
connectFromField: "subKey",
connectToField: "key",
as: "keys"
}
},
{
"$addFields": {
"parent": {
"$map": {
"input": "$keys",
"as": "res",
"in": {
"key": "$$res.key",
"subKey": "$$res.subKey"
}
}
},
"key": "$key",
}
},
{
$project: {
keys: 0
}
}
])

mongodb to return object from facet

Is it possible to have facet to return as an object instead of an array? It seems a bit counter intuitive to need to access result[0].total instead of just result.total
code (using mongoose):
Model
.aggregate()
.match({
"name": { "$regex": name },
"user_id": ObjectId(req.session.user.id),
"_id": { "$nin": except }
})
.facet({
"results": [
{ "$skip": start },
{ "$limit": finish },
{
"$project": {
"map_levels": 0,
"template": 0
}
}
],
"total": [
{ "$count": "total" },
]
})
.exec()
Each field you get using $facet represents separate aggregation pipeline and that's why you always get an array. You can use $addFields to overwrite existing total with single element. To get that first item you can use $arrayElemAt
Model
.aggregate()
.match({
"name": { "$regex": name },
"user_id": ObjectId(req.session.user.id),
"_id": { "$nin": except }
})
.facet({
"results": [
{ "$skip": start },
{ "$limit": finish },
{
"$project": {
"map_levels": 0,
"template": 0
}
}
],
"total": [
{ "$count": "total" },
]
})
.addFields({
"total": {
$arrayElemAt: [ "$total", 0 ]
}
})
.exec()
You can try this as well
Model
.aggregate()
.match({
"name": { "$regex": name },
"user_id": ObjectId(req.session.user.id),
"_id": { "$nin": except }
})
.facet({
"results": [
{ "$skip": start },
{ "$limit": finish },
{
"$project": {
"map_levels": 0,
"template": 0
}
}
],
"total": [
{ "$count": "total" },
]
})
.addFields({
"total": {
"$ifNull": [{ "$arrayElemAt": [ "$total.total", 0 ] }, 0]
}
})
.exec()
imagine that you want to pass the result of $facet to the next stage, let's say $match. well $match accepts an array of documents as input and return an array of documents that matched an expression, if the output of $facet was just an element we can't pass its output to $match because the type of output of $facet is not the same as the type of input of $match ($match is just an example). In my opinion it's better to keep the output of $facet as array to avoid handling those types of situations.
PS : nothing official in what i said

How to retrieve documents with conditioning an array of nested objects?

The structure of the objects stored in mongodb is the following:
obj = {_id: "55c898787c2ab821e23e4661", ingredients: [{name: "ingredient1", value: "70.2"}, {name: "ingredient2", value: "34"}, {name: "ingredient3", value: "15.2"}, ...]}
What I would like to do is retrieve all documents, which value of specific ingredient is greater than arbitrary number.
To be more specific, suppose we want to retrieve all the documents which contain ingredient with name "ingredient1" and its value is greater than 50.
Trying the following I couldn't retrieve desired results:
var collection = db.get('docs');
var queryTest = collection.find({$where: 'this.ingredients.name == "ingredient1" && parseFloat(this.ingredients.value) > 50'}, function(e, docs) {
console.log(docs);
});
Does anyone know what is the correct query to condition upon specific array element names and values?
Thanks!
You really don't need the JavaScript evaluation of $where here, just use basic query operators with an $elemMatch query for the array. While true that the "value" elements here are in fact strings, this is not really the point ( as I explain at the end of this ). The main point is to get it right the first time:
collection.find(
{
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
},
{ "ingredients.$": 1 }
)
The $ in the second part is the postional operator, which projects only the matched element of the array from the query conditions.
This is also considerably faster than the JavaScript evaluation, in both that the evaluation code does not need to be compiled and uses native coded operators, as well as that an "index" can be used on the "name" and even "value" elements of the array to aid in filtering the matches.
If you expect more than one match in the array, then the .aggregate() command is the best option. With modern MongoDB versions this is quite simple:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$eq": [ { "$ifNull": [ "$name", "ingredient1" ] }, "ingredient1" ] },
{ "$gt": [ { "$ifNull": [ "$value", 60 ] }, 50 ] }
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
])
And even simplier in forthcoming releases which introduce the $filter operator:
collection.aggregate([
{ "$match": {
"ingredients": {
"$elemMatch": {
"name": "ingredient1",
"value": { "$gt": 50 }
}
}
}},
{ "$project": {
"ingredients": {
"$filter": {
"input": "$ingredients",
"as": "ingredient",
"cond": {
"$and": [
{ "$eq": [ "$$ingredient.name", "ingredient1" ] },
{ "$gt": [ "$$ingredient.value", 50 ] }
]
}
}
}
}}
])
Where in both cases you are effectively "filtering" the array elements that do not match the conditions after the initial document match.
Also, since your "values" are actually "strings" right now, you reaally should change this to be numeric. Here is a basic process:
var bulk = collection.initializeOrderedBulkOp(),
count = 0;
collection.find().forEach(function(doc) {
doc.ingredients.forEach(function(ingredient,idx) {
var update = { "$set": {} };
update["$set"]["ingredients." + idx + ".value"] = parseFloat(ingredients.value);
bulk.find({ "_id": doc._id }).updateOne(update);
count++;
if ( count % 1000 != 0 ) {
bulk.execute();
bulk = collection.initializeOrderedBulkOp();
}
})
]);
if ( count % 1000 != 0 )
bulk.execute();
And that will fix the data so the query forms here work.
This is much better than processing with JavaScript $where which needs to evaluate every document in the collection without the benefit of an index to filter. Where the correct form is:
collection.find(function() {
return this.ingredients.some(function(ingredient) {
return (
( ingredient.name === "ingredient1" ) &&
( parseFloat(ingredient.value) > 50 )
);
});
})
And that can also not "project" the matched value(s) in the results as the other forms can.
Try using $elemMatch:
var queryTest = collection.find(
{ ingredients: { $elemMatch: { name: "ingredient1", value: { $gte: 50 } } } }
);

Categories

Resources