Unwind Multiple Document Arrays Into New Documents - javascript
Today I run into a situation I need to sync a mongoDB collection to vertica (SQL Database) where my object keys will be the columns of the table in SQL.
I use mongoDB aggregation framework, first to query, manipulate and project the wanted result document and then I sync it to vertica.
The schema I want to aggregate looks like this:
{
userId: 123
firstProperty: {
firstArray: ['x','y','z'],
anotherAttr: 'abc'
},
anotherProperty: {
secondArray: ['a','b','c'],
anotherAttr: 'def'
}
}
Since array values are not related with other arrays value, what I need is that each value of nested array, will be in a separate result document.
For that I use the following aggregation pipe:
db.collection('myCollection').aggregate([
{
$match: {
$or: [
{'firstProperty.firstArray.1': {$exists: true}},
{'secondProperty.secondArray.1': {$exists: true}}
]
}
},
{
$project: {
userId: 1,
firstProperty: 1,
secondProperty: 1
}
}, {
$unwind: {path:'$firstProperty.firstAray'}
}, {
$unwind: {path:'$secondProperty.secondArray'},
}, {
$project: {
userId: 1,
firstProperty: '$firstProperty.firstArray',
firstPropertyAttr: '$firstProperty.anotherAttr',
secondProperty: '$secondProperty.secondArray',
seondPropertyAttr: '$secondProperty.anotherAttr'
}
}, {
$out: 'another_collection'
}
])
What I expect is the following result:
{
userId: 'x1',
firstProperty: 'x',
firstPropertyAttr: 'a'
}
{
userId: 'x1',
firstProperty: 'y',
firstPropertyAttr: 'a'
}
{
userId: 'x1',
firstProperty: 'z',
firstPropertyAttr: 'a'
}
{
userId: 'x1',
secondProperty: 'a',
firstPropertyAttr: 'b'
}
{
userId: 'x1',
secondProperty: 'b',
firstPropertyAttr: 'b'
}
{
userId: 'x1',
secondProperty: 'c',
firstPropertyAttr: 'b'
}
Instead I get something like that:
{
userId: 'x1',
firstProperty: 'x',
firstPropertyAttr: 'b'
secondProperty: 'a',
secondPropertyAttr: 'b'
}
{
userId: 'x1',
firstProperty: 'y',
firstPropertyAttr: 'b'
secondProperty: 'b',
secondPropertyAttr: 'b'
}
{
userId: 'x1',
firstProperty: 'z',
firstPropertyAttr: 'b'
secondProperty: 'c',
secondPropertyAttr: 'b'
}
What exactly am I missing, and how can I fix it?
This is actually a much "curlier" problem than you might think it is, and it all really boils down to "named keys", which are generally a real problem and your data "should" not be using "data points" in the naming of such keys.
The other obvious problem in your attempt is called a "cartesian product". This is where you $unwind one array and then $unwind another, which results in the items from the "first" $unwind being repeated for every value present in the "second".
Addressing that second problem, the basic approach is to "combine the arrays" in order that you only $unwind from a single source. This is pretty common to all remaining approaches.
As for the approaches, these differ in the MongoDB version you have available and the general practicality of application. So let's step through them:
Remove the named keys
The most simple approach here is to simply not expect named keys in the output, and instead mark them as a "name" identifying their source in the final output. So all we want to do is specify each "expected" key within the construction of an initial "combined" array, and then simply $filter that for any null values resulting from named paths not existing in the present document.
db.getCollection('myCollection').aggregate([
{ "$match": {
"$or": [
{ "firstProperty.firstArray.0": { "$exists": true } },
{ "anotherProperty.secondArray.0": { "$exists": true } }
]
}},
{ "$project": {
"_id": 0,
"userId": 1,
"combined": {
"$filter": {
"input": [
{
"name": { "$literal": "first" },
"array": "$firstProperty.firstArray",
"attr": "$firstProperty.anotherAttr"
},
{
"name": { "$literal": "another" },
"array": "$anotherProperty.secondArray",
"attr": "$anotherProperty.anotherAttr"
}
],
"cond": {
"$ne": ["$$this.array", null ]
}
}
}
}},
{ "$unwind": "$combined" },
{ "$unwind": "$combined.array" },
{ "$project": {
"userId": 1,
"name": "$combined.name",
"value": "$combined.array",
"attr": "$combined.attr"
}}
])
From the data included in your question this would produce:
/* 1 */
{
"userId" : 123.0,
"name" : "first",
"value" : "x",
"attr" : "abc"
}
/* 2 */
{
"userId" : 123.0,
"name" : "first",
"value" : "y",
"attr" : "abc"
}
/* 3 */
{
"userId" : 123.0,
"name" : "first",
"value" : "z",
"attr" : "abc"
}
/* 4 */
{
"userId" : 123.0,
"name" : "another",
"value" : "a",
"attr" : "def"
}
/* 5 */
{
"userId" : 123.0,
"name" : "another",
"value" : "b",
"attr" : "def"
}
/* 6 */
{
"userId" : 123.0,
"name" : "another",
"value" : "c",
"attr" : "def"
}
Merge Objects - Requires MongoDB 3.4.4 minimum
To actually use "named keys" we need the $objectToArray and $arrayToObject operators that were only available since MongoDB 3.4.4. Using these and the $replaceRoot pipeline stage we can simply process to your desired output without explicitly naming the keys to output at any stage:
db.getCollection('myCollection').aggregate([
{ "$match": {
"$or": [
{ "firstProperty.firstArray.0": { "$exists": true } },
{ "anotherProperty.secondArray.0": { "$exists": true } }
]
}},
{ "$project": {
"_id": 0,
"userId": 1,
"data": {
"$reduce": {
"input": {
"$map": {
"input": {
"$filter": {
"input": { "$objectToArray": "$$ROOT" },
"cond": { "$not": { "$in": [ "$$this.k", ["_id", "userId"] ] } }
}
},
"as": "d",
"in": {
"$let": {
"vars": {
"inner": {
"$map": {
"input": { "$objectToArray": "$$d.v" },
"as": "i",
"in": {
"k": {
"$cond": {
"if": { "$ne": [{ "$indexOfCP": ["$$i.k", "Array"] }, -1] },
"then": "$$d.k",
"else": { "$concat": ["$$d.k", "Attr"] }
}
},
"v": "$$i.v"
}
}
}
},
"in": {
"$map": {
"input": {
"$arrayElemAt": [
"$$inner.v",
{ "$indexOfArray": ["$$inner.k", "$$d.k"] }
]
},
"as": "v",
"in": {
"$arrayToObject": [[
{ "k": "$$d.k", "v": "$$v" },
{
"k": { "$concat": ["$$d.k", "Attr"] },
"v": {
"$arrayElemAt": [
"$$inner.v",
{ "$indexOfArray": ["$$inner.k", { "$concat": ["$$d.k", "Attr"] }] }
]
}
}
]]
}
}
}
}
}
}
},
"initialValue": [],
"in": { "$concatArrays": [ "$$value", "$$this" ] }
}
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[{ "k": "userId", "v": "$userId" }],
{ "$objectToArray": "$data" }
]
}
}
}}
])
Which gets pretty monstrous from converting the "keys" into an array, then the "sub-keys" into an array and mapping the values from those inner arrays onto the pair of keys in output.
The key parts being $objectToArray is essentially needed to "transform" your "nested key" structures into arrays of "k" and "v" representing the "name" of the key and the "value". This gets called twice, being once for the "outer" parts of the document and excluding the "constant" fields such as "_id" and "userId" into such an array structure. Then the second call is processed on each of those "array" elements in order to make those "inner keys" a similar "array".
Matching is then done using $indexOfCP to work out which "inner key" is the one for the value and which is the "Attr". The keys are then renamed here to the "outer" key value, which we can access because that's a "v" courtesy of $objectToArray.
Then for the "inner value" which is an "array", we want to $map each entry into a combined "array" which basically has the form:
[
{ "k": "firstProperty", "v": "x" },
{ "k": "firstPropertyAttr", "v": "abc" }
]
This happens for each "inner array" element, for which $arrayToObject reverses the process and turns each "k" and "v" into "key" and "value" of an object respectively.
Since the output is still an "array of arrays" of the "inner keys" at this point, the $reduce wraps that output and applies $concatArrays while processing each element in order to "join" into a single array for "data".
All that remains is to simply $unwind the array produced from each source document, and then apply $replaceRoot, which is the part that actually allows "different key names" at the "root" of each document output.
The "merging" here is done by supplying an array of object of the same "k" and "v" construction notated for "userId", and "concatentating" that with the $objectToArray transform of the "data". Of course this "new array" is then converted to an object via $arrayToObject one final time, which forms the "object" argument to "newRoot" as an expression.
You do something like that when there is a large number of "named keys" that you can't really name explicitly. And it actually gives you the result you want:
/* 1 */
{
"userId" : 123.0,
"firstProperty" : "x",
"firstPropertyAttr" : "abc"
}
/* 2 */
{
"userId" : 123.0,
"firstProperty" : "y",
"firstPropertyAttr" : "abc"
}
/* 3 */
{
"userId" : 123.0,
"firstProperty" : "z",
"firstPropertyAttr" : "abc"
}
/* 4 */
{
"userId" : 123.0,
"anotherProperty" : "a",
"anotherPropertyAttr" : "def"
}
/* 5 */
{
"userId" : 123.0,
"anotherProperty" : "b",
"anotherPropertyAttr" : "def"
}
/* 6 */
{
"userId" : 123.0,
"anotherProperty" : "c",
"anotherPropertyAttr" : "def"
}
Named Keys without MongoDB 3.4.4 or Greater
Without the operator support as shown in the above listing, it's simply not possible for the aggregation framework to output documents with different key names.
So though it's not possible to instruct the "server" to do this via $out, you can of course simply iterate the cursor and write a new collection
var ops = [];
db.getCollection('myCollection').find().forEach( d => {
ops = ops.concat(Object.keys(d).filter(k => ['_id','userId'].indexOf(k) === -1 )
.map(k =>
d[k][Object.keys(d[k]).find(ki => /Array$/.test(ki))]
.map(v => ({
[k]: v,
[`${k}Attr`]: d[k][Object.keys(d[k]).find(ki => /Attr$/.test(ki))]
}))
)
.reduce((acc,curr) => acc.concat(curr),[])
.map( o => Object.assign({ userId: d.userId },o) )
);
if (ops.length >= 1000) {
db.getCollection("another_collection").insertMany(ops);
ops = [];
}
})
if ( ops.length > 0 ) {
db.getCollection("another_collection").insertMany(ops);
ops = [];
}
Same sort of thing as is being done in the earlier aggregation, but just "externally". It essentially produces and array of documents for each document matching the "inner" arrays, like so:
[
{
"userId" : 123.0,
"firstProperty" : "x",
"firstPropertyAttr" : "abc"
},
{
"userId" : 123.0,
"firstProperty" : "y",
"firstPropertyAttr" : "abc"
},
{
"userId" : 123.0,
"firstProperty" : "z",
"firstPropertyAttr" : "abc"
},
{
"userId" : 123.0,
"anotherProperty" : "a",
"anotherPropertyAttr" : "def"
},
{
"userId" : 123.0,
"anotherProperty" : "b",
"anotherPropertyAttr" : "def"
},
{
"userId" : 123.0,
"anotherProperty" : "c",
"anotherPropertyAttr" : "def"
}
]
These get "cached" into a big array, which when that reaches a length of 1000 or more is finally written to the new collection via .insertMany(). Of course that requires "back and forth" communication with the server, but it does get the job done in the most efficient way possible if you don't have the features available for the previous aggregation.
Conclusion
The overall point here is that unless you actually have a MongoDB that supports it, then you are not going to get documents with "different key names" in the output, solely from the aggregation pipeline.
So when you do not have that support, you either go with the first option and then use $out discarding having named keys. Or you do the final approach and simply manipulate the cursor results and write back to the new collection.
Related
Can i filter one collection that has _id in other collection and have result on in mongodb
I have the collection "A" like this { "_id" : ObjectId("5c4c1f2a5173562961468b30"), "name" : "first" }, { "_id" : ObjectId("57c162f267045ec439ba2485"), "name" : "second" } Like above i have multiple doc in a collection, Now i have colection "B" like below which has _id of Collections a's doc { "_id" : ObjectId("57d14ad36495a197593ab7ab"), "a_id" : ObjectId("5c4c1f2a5173562961468b30"), "name" : "ab" }, { "_id" : ObjectId("57d941503dd051cc04205e1e"), "a_id" : ObjectId("5c4c1f2a5173562961468b30"), "name" : "cd " } Now i need a query that will give me result collection "A" based on how many documents are associated with "B" If "A" has 2 documents in "B" I need those 2 "B" documents
This simple aggregation make your result db.A.aggregate([ { "$lookup": { "from": "B", "localField": "_id", "foreignField": "a_id", "as": "docuentInB" } }, { $match: { $expr: { $gte: [{ $size: "$docuentInB" }, 2 ] } } }, { $project: { docuentInB: 0 } } ])
Aggregation Accumulate Inner Objects
I'm new with mongo aggregations, and I need a help with creating one, I have a collection of the following document as an example: { "_id" : ObjectId("5afc2f06e1da131c9802071e"), "_class" : "Traveler", "name" : "John Due", "startTimestamp" : 1526476550933, "endTimestamp" : 1526476554823, "source" : "istanbul", "cities" : [ { "_id" : "ef8f6b26328f-0663202f94faeaeb-3981", "name" : "Moscow", "timestamp" : 1526476550940, "timeSpent" : 3180 }, { "_id" : "ef8f6b26328f-0663202f94faeaeb-1122", "name" : "Cairo", "timestamp" : 1625476550940, "timeSpent" : 318000, }, { "_id" : "ef8f6b26328f-0663202f94faeaeb-3981", "name" : "Moscow", "timestamp" : 15211276550940, "timeSpent" : 318011 } ], "variables" : [ { "_id" : "cd4318a83c9b-a8478d76bfd3e4b6-5967", "name" : "Customer Profile", "lastValue" : "", "values" : [], "additionalData" : {} }, { "_id" : "366cb8c07996-c62c37a87a86d526-d3e7", "name" : "Target Telephony Queue", "lastValue" : "", "values" : [], "additionalData" : {} }, { "_id" : "4ed84742da33-d70ba8a809b712f3-bdf4", "name" : "IMEI", "lastValue" : "", "values" : [], "additionalData" : {} }, { "_id" : "c8103687c1c8-97d749e349d785c8-9154", "name" : "Budget", "defaultValue" : "", "lastValue" : "", "values" : [ { "value" : "3000", "timestamp" : NumberLong(1526476550940), "element" : "c8103687c1c8-97d749e349d785c8-9154" } ], "additionalData" : {} } ] } I need to have a resulting document showing the how many times each city have been visited by each traveler in the collection, and the average budget (budget is an element in the variables array so the resulting document will be similar to: { "_id" : ObjectId("5afc2f06e1da131c9802071e"), "_class" : "Traveler", "name" : "John Due", "startTimestamp" : 1526476550933, "endTimestamp" : 1526476554823, "source" : "istanbul", "cities" : [ { "_id" : "ef8f6b26328f-0663202f94faeaeb-3981", "name" : "Moscow", "visited":2 }, { "_id" : "ef8f6b26328f-0663202f94faeaeb-1122", "name" : "Cairo", "visited":1 } ], "variables" : [ { "_id" : "c8103687c1c8-97d749e349d785c8-9154", "name" : "Budget", "defaultValue" : "", "lastValue" : "", "values" : [ { "value" : "3000", } ], } ], } Thank you for your help
As a quick note, you need to change your "value" field inside the "values" to be numeric, since it's presently a string. But on to the answer: If you have access to $reduce from MongoDB 3.4, then you can actually do something like this: db.collection.aggregate([ { "$addFields": { "cities": { "$reduce": { "input": "$cities", "initialValue": [], "in": { "$cond": { "if": { "$ne": [{ "$indexOfArray": ["$$value._id", "$$this._id"] }, -1] }, "then": { "$concatArrays": [ { "$filter": { "input": "$$value", "as": "v", "cond": { "$ne": [ "$$this._id", "$$v._id" ] } }}, [{ "_id": "$$this._id", "name": "$$this.name", "visited": { "$add": [ { "$arrayElemAt": [ "$$value.visited", { "$indexOfArray": [ "$$value._id", "$$this._id" ] } ]}, 1 ] } }] ] }, "else": { "$concatArrays": [ "$$value", [{ "_id": "$$this._id", "name": "$$this.name", "visited": 1 }] ] } } } } }, "variables": { "$map": { "input": { "$filter": { "input": "$variables", "cond": { "$eq": ["$$this.name", "Budget"] } } }, "in": { "_id": "$$this._id", "name": "$$this.name", "defaultValue": "$$this.defaultValue", "lastValue": "$$this.lastValue", "value": { "$avg": "$$this.values.value" } } } } }} ]) If you have MongoDB 3.6, you can clean that up a bit with $mergeObjects: db.collection.aggregate([ { "$addFields": { "cities": { "$reduce": { "input": "$cities", "initialValue": [], "in": { "$cond": { "if": { "$ne": [{ "$indexOfArray": ["$$value._id", "$$this._id"] }, -1] }, "then": { "$concatArrays": [ { "$filter": { "input": "$$value", "as": "v", "cond": { "$ne": [ "$$this._id", "$$v._id" ] } }}, [{ "_id": "$$this._id", "name": "$$this.name", "visited": { "$add": [ { "$arrayElemAt": [ "$$value.visited", { "$indexOfArray": [ "$$value._id", "$$this._id" ] } ]}, 1 ] } }] ] }, "else": { "$concatArrays": [ "$$value", [{ "_id": "$$this._id", "name": "$$this.name", "visited": 1 }] ] } } } } }, "variables": { "$map": { "input": { "$filter": { "input": "$variables", "cond": { "$eq": ["$$this.name", "Budget"] } } }, "in": { "$mergeObjects": [ "$$this", { "values": { "$avg": "$$this.values.value" } } ] } } } }} ]) But it's more or less the same thing except we keep the additionalData Going back a little before that, then you can always $unwind the "cities" to accumulate: db.collection.aggregate([ { "$unwind": "$cities" }, { "$group": { "_id": { "_id": "$_id", "cities": { "_id": "$cities._id", "name": "$cities.name" } }, "_class": { "$first": "$class" }, "name": { "$first": "$name" }, "startTimestamp": { "$first": "$startTimestamp" }, "endTimestamp" : { "$first": "$endTimestamp" }, "source" : { "$first": "$source" }, "variables": { "$first": "$variables" }, "visited": { "$sum": 1 } }}, { "$group": { "_id": "$_id._id", "_class": { "$first": "$class" }, "name": { "$first": "$name" }, "startTimestamp": { "$first": "$startTimestamp" }, "endTimestamp" : { "$first": "$endTimestamp" }, "source" : { "$first": "$source" }, "cities": { "$push": { "_id": "$_id.cities._id", "name": "$_id.cities.name", "visited": "$visited" } }, "variables": { "$first": "$variables" }, }}, { "$addFields": { "variables": { "$map": { "input": { "$filter": { "input": "$variables", "cond": { "$eq": ["$$this.name", "Budget"] } } }, "in": { "_id": "$$this._id", "name": "$$this.name", "defaultValue": "$$this.defaultValue", "lastValue": "$$this.lastValue", "value": { "$avg": "$$this.values.value" } } } } }} ]) All return (almost) the same thing: { "_id" : ObjectId("5afc2f06e1da131c9802071e"), "_class" : "Traveler", "name" : "John Due", "startTimestamp" : 1526476550933, "endTimestamp" : 1526476554823, "source" : "istanbul", "cities" : [ { "_id" : "ef8f6b26328f-0663202f94faeaeb-1122", "name" : "Cairo", "visited" : 1 }, { "_id" : "ef8f6b26328f-0663202f94faeaeb-3981", "name" : "Moscow", "visited" : 2 } ], "variables" : [ { "_id" : "c8103687c1c8-97d749e349d785c8-9154", "name" : "Budget", "defaultValue" : "", "lastValue" : "", "value" : 3000 } ] } The first two forms are of course the most optimal thing to do since they are simply working "within" the same document at all times. Operators like $reduce allow "accumulation" expressions on arrays, so we can use it here to keep a "reduced" array which we test for the unique "_id" value using $indexOfArray in order to see if the there already is an accumulated item that matches. A result of -1 means it's not there. In order construct a "reduced array" we take the "initialValue" of [] as an empty array and then add to it via $concatArrays. All of that process is decided via the "ternary" $cond operator which considers the "if" condition and "then" either "joins" the output of the $filter on the current $$value to exclude the current index _id entry, with of course another "array" representing the singular object. For that "object" we again use the $indexOfArray to actually get the matched index since we know that the item "is there", and use that to extract the current "visited" value from that entry via $arrayElemAt and $add to it in order to increment. In the "else" case we simply add an "array" as an "object" which just has a default "visited" value of 1. Using both those cases effectively accumulates unique values within the array to output. In the latter version, we just $unwind the array and use successive $group stages in order to first "count" on the unique inner entries, and then "re-construct the array" into the similar form. Using $unwind looks far more simple, but since what it actually does is take a copy of the document for every array entry, then this actually adds considerable overhead to processing. In modern versions there are generally array operators which mean you don't need to use this unless your intention is to "accumulate across documents". So if you actually need to $group on a value of a key from "inside" an array, then that is where you actually do need to use it. As for the "variables" then we can simply use the $filter again here to get the matching "Budget" entry. We do this as the input to the $map operator which allows "re-shaping" of the array content. We mainly want that so you can take the content of the "values" ( once you make it all numeric ) and use the $avg operator, which is supplied that "field path notation" form directly to the array values because it can in fact return a result from such an input. That generally makes the tour of pretty much ALL of the main "array operators" for the aggregation pipeline ( excluding the "set" operators ) all within a single pipeline stage. Also don't ever forget that you just about always want to $match with regular Query Operators as the "very first stage" of any aggregation pipeline in order to just select the documents you need. Ideally using an index. Alternates Alternates are working through the documents in client code. It generally would not be recommended since all methods above show they actually "reduce" the content as returned from the server, as is generally the point of "server aggregations". It "may" be possible due to the "document based" nature that larger result sets may take considerably more time using $unwind and client processing could be an option, but I would consider it far more likely Below is a listing that demonstrates applying a transform to the cursor stream as results are returned doing the same thing. There are three demonstrated versions of the transform, showing "exactly" the same logic as above, a implementation with lodash methods for accumulation, and a "natural" accumulation on the Map implementation: const { MongoClient } = require('mongodb'); const { chain } = require('lodash'); const uri = 'mongodb://localhost:27017'; const opts = { useNewUrlParser: true }; const log = data => console.log(JSON.stringify(data, undefined, 2)); const transform = ({ cities, variables, ...d }) => ({ ...d, cities: cities.reduce((o,{ _id, name }) => (o.map(i => i._id).indexOf(_id) != -1) ? [ ...o.filter(i => i._id != _id), { _id, name, visited: o.find(e => e._id === _id).visited + 1 } ] : [ ...o, { _id, name, visited: 1 } ] , []).sort((a,b) => b.visited - a.visited), variables: variables.filter(v => v.name === "Budget") .map(({ values, additionalData, ...v }) => ({ ...v, values: (values != undefined) ? values.reduce((o,e) => o + e.value, 0) / values.length : 0 })) }); const alternate = ({ cities, variables, ...d }) => ({ ...d, cities: chain(cities) .groupBy("_id") .toPairs() .map(([k,v]) => ({ ...v.reduce((o,{ _id, name }) => ({ ...o, _id, name }),{}), visited: v.length }) ) .sort((a,b) => b.visited - a.visited) .value(), variables: variables.filter(v => v.name === "Budget") .map(({ values, additionalData, ...v }) => ({ ...v, values: (values != undefined) ? values.reduce((o,e) => o + e.value, 0) / values.length : 0 })) }); const natural = ({ cities, variables, ...d }) => ({ ...d, cities: [ ...cities .reduce((o,{ _id, name }) => o.set(_id, [ ...(o.has(_id) ? o.get(_id) : []), { _id, name } ]), new Map()) .entries() ] .map(([k,v]) => ({ ...v.reduce((o,{ _id, name }) => ({ ...o, _id, name }),{}), visited: v.length }) ) .sort((a,b) => b.visited - a.visited), variables: variables.filter(v => v.name === "Budget") .map(({ values, additionalData, ...v }) => ({ ...v, values: (values != undefined) ? values.reduce((o,e) => o + e.value, 0) / values.length : 0 })) }); (async function() { try { const client = await MongoClient.connect(uri, opts); let db = client.db('test'); let coll = db.collection('junk'); let cursor = coll.find().map(natural); while (await cursor.hasNext()) { let doc = await cursor.next(); log(doc); } client.close(); } catch(e) { console.error(e) } finally { process.exit() } })()
Project array of Objects to Key Value
After aggregations i get this result, { "_id" : { "date" : ISODate("2017-08-30T00:00:00.000Z") }, "aggr" : [ { "gender" : "unknown", "count" : 365 }, { "gender" : "male", "count" : 2 } ] } Is it possible to convert this into below format { "date" : ISODate("2017-08-30T00:00:00.000Z"), "unknown" : 365, "male" : 2 } Tried using $unwind and $project, but couldn't convert array objects into key,value pairs
Yes, using $arrayToObject and $map to convert the existing array to a format it accepts: db.collection.aggregate([ { "$replaceRoot": { "newRoot": { "$arrayToObject": { "$concatArrays": [ [{ "k": "date", "v": "$_id.date" }], { "$map": { "input": "$aggr", "in": { "k": "$$this.gender", "v": "$$this.count" } }} ] } } }} ]) Of course if this is actually only on the "tail" of an existing aggregation and you don't have at least MongoDB 3.4.4 where the operator is introduced, then you can simply reshape the result in the client code: db.collection.aggregate([ // existing pipeline ]).map(d => Object.assign( { date: d._id.date }, d.aggr.reduce((acc,curr) => Object.assign(acc,{ [curr.gender]: curr.count }),{} ) ) )
Aggregation Unwind Document Keys as New Documents
I am having some problems in altering the schema I am using for a time series database I have constructed using Mongo DB. Currently, I have records like the one shown below: { "_id" : 20, "name" : "Bob, "location" : "London", "01/01/1993" : { "height" : "110cm", "weight" : "60kg", }, "02/01/1993" : { "height" : "112cm", "weight" : "61kg", } } I wish to use the aggregation framework to create several records for each "person", one for each "time-value" subdocument in the original record: { "_id" : 20, "name" : "Bob, "date" : "01/01/1993" "location" : "London", "height" : "110cm", "weight" : "60kg", }, { "_id" : 20, "name" : "Bob, "date" : "02/01/1993" "location" : "London", "height" : "112cm", "weight" : "61kg", } The new scheme should be much more efficient when adding a large number of time series values to each record and I shouldn't run into a max document size error! Any help on how to do this using the Mongo DB aggregation pipeline would be greatly appreciated!
Whilst there are functions in modern releases of the Aggregation Framework that can allow you to do this sort of thing, mileage may vary to whether it is actually the best solution for this. In essence you can create an array of entries comprised of the document keys "which do not include" the other top level keys which would then be included in the document. That array can then be processed with $unwind and the whole result reshaped into new documents: db.getCollection('input').aggregate([ { "$project": { "name": 1, "location": 1, "data": { "$filter": { "input": { "$objectToArray": "$$ROOT" }, "as": "d", "cond": { "$not": { "$in": [ "$$d.k", ["_id","name","location"] ] } } } } }}, { "$unwind": "$data" }, { "$replaceRoot": { "newRoot": { "$arrayToObject": { "$concatArrays": [ [{ "k": "id", "v": "$_id" }, { "k": "name", "v": "$name" }, { "k": "location", "v": "$location" }, { "k": "date", "v": "$data.k" }], { "$objectToArray": "$data.v" } ] } } }}, { "$out": "output" } ]) or alternately do all the reshaping in the initial $project within the array elements produced: db.getCollection('input').aggregate([ { "$project": { "_id": 0, "data": { "$map": { "input": { "$filter": { "input": { "$objectToArray": "$$ROOT" }, "as": "d", "cond": { "$not": { "$in": [ "$$d.k", ["_id", "name", "location"] ] } } } }, "as": "d", "in": { "$arrayToObject": { "$concatArrays": [ { "$filter": { "input": { "$objectToArray": "$$ROOT" }, "as": "r", "cond": { "$in": [ "$$r.k", ["_id", "name", "location"] ] } }}, [{ "k": "date", "v": "$$d.k" }], { "$objectToArray": "$$d.v" } ] } } } } }}, { "$unwind": "$data" }, { "$replaceRoot": { "newRoot": "$data" } }, { "$out": "output" } ]) So you use $objectToArray and $filter in order to make an array from the keys which actually contain the data points for each date. After $unwind we basically apply $arrayToObject on a set of named keys in the "array format" in order to construct the newRoot for $replaceRoot and then write to the new collection, as one new document for each data key using $out. That may only get you part of the way though, as you really should change the "date"data to a BSON Date. It takes a lot less storage space, and is easier to query as well. var updates = []; db.getCollection('output').find().forEach( d => { updates.push({ "updateOne": { "filter": { "_id": d._id }, "update": { "$set": { "date": new Date( Date.UTC.apply(null, d.date.split('/') .reverse().map((e,i) => (i == 1) ? parseInt(e)-1: parseInt(e) ) ) ) } } } }); if ( updates.length >= 500 ) { db.getCollection('output').bulkWrite(updates); updates = []; } }) if ( updates.length != 0 ) { db.getCollection('output').bulkWrite(updates); updates = []; } Of course, if your MongoDB server lacks those aggregation features then you are better off just writing the output to a new collection by iterating the loop in the first place: var output = []; db.getCollection('input').find().forEach( d => { output = [ ...output, ...Object.keys(d) .filter(k => ['_id','name','location'].indexOf(k) === -1) .map(k => Object.assign( { id: d._id, name: d.name, location: d.location, date: new Date( Date.UTC.apply(null, k.split('/') .reverse().map((e,i) => (i == 1) ? parseInt(e)-1: parseInt(e) ) ) ) }, d[k] )) ]; if ( output.length >= 500 ) { db.getCollection('output').insertMany(output); output = []; } }) if ( output.length != 0 ) { db.getCollection('output').insertMany(output); output = []; } In either of those cases we want to apply Date.UTC to the reversed string elements from the existing "string" based date and get a value than can be cast into a BSON Date. The aggregation framework itself does not allow casting of types so the only solution for that part ( and it is a necessary part ) is to actually loop and update, but using the forms at least makes it efficient to loop and update. Either case gives you the same end output: /* 1 */ { "_id" : ObjectId("599275b1e38f41729f1d64fe"), "id" : 20.0, "name" : "Bob", "location" : "London", "date" : ISODate("1993-01-01T00:00:00.000Z"), "height" : "110cm", "weight" : "60kg" } /* 2 */ { "_id" : ObjectId("599275b1e38f41729f1d64ff"), "id" : 20.0, "name" : "Bob", "location" : "London", "date" : ISODate("1993-01-02T00:00:00.000Z"), "height" : "112cm", "weight" : "61kg" }
How to match a document with the value of an object attribute in Mongodb
I'm trying to match some document in mongoDB: My Document model : profile: { languages: [ {"name": "French", "level": 1}, {"name": "English", "level": 2}, {"name": "Spanish", "level": 4} ] } What I (can) have to search my result set: lang = ["French", "English"]; objLang = [ {name: "French"}, {name: "English"} ]; What I need is to db.find() all documents that match at least one of the languages, for example : profile.languages.name = "French" or profile.languages.name = "English" WHat I mean is that if I have French or English in my option set, I need to get all the users who have a element of their languages array where name match one of my options, no matter the level of the languages. So, unless I'm wrong, I can't do db.find({"profile.languages": {$in: [{name: "French"}, {name: "English"}]}); How would you proceed ? Thanks a lot. David
Actually, you were almost right: db.collection.find({"profile.languages.name":{$in: ["French","English"]} }) Since profile.languages is an array of subdocuments, you can call in for one of the subdocument's keys and the subsequent query gets mapped to all subdocuments containing that key. However, without proper indexing, an added .explain() shows something pretty ugly: { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.languages", "indexFilterSet" : false, "parsedQuery" : { "profile.languages.name" : { "$in" : [ "English", "French" ] } }, "winningPlan" : { "stage" : "COLLSCAN", "filter" : { "profile.languages.name" : { "$in" : [ "English", "French" ] } }, "direction" : "forward" }, "rejectedPlans" : [ ] } } (serverInfowas ommited). So in order to make this query efficient, you need to create an index over the field you want to query: db.languages.ensureIndex({"profile.languages.name":1}) An added explain now tells us that the matching documents are identified via the index: { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.languages", "indexFilterSet" : false, "parsedQuery" : { "profile.languages.name" : { "$in" : [ "English", "French" ] } }, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "profile.languages.name" : 1 }, "indexName" : "profile.languages.name_1", "isMultiKey" : true, "direction" : "forward", "indexBounds" : { "profile.languages.name" : [ "[\"English\", \"English\"]", "[\"French\", \"French\"]" ] } } }, "rejectedPlans" : [ ] }, "ok" : 1 }
You could try using the dot notation to access both the elements of an array and to access the fields of an embedded document: db.find({ "profile.languages.name": { "$in": ["French", "English"] } });
Please try the below query : Solution 1 : db.collection.aggregate([ { $unwind:"$profile.languages" }, { $match:{"profile.languages.name":{"$in":["French", "English"]}} }, { $group:{_id: "$_id", profile:{"$push":"$profile.languages"}} } ]) Solution 2 : db.collection.find( { "profile.languages.name": { "$in": [ "English", "French"] } } );