How to use $regex in mongoDB query with string variable? - javascript

I am trying to do a simple wildcard query in a MongoDB database using a user-generated string variable. For example, if the user searches 'Bu', things like 'Burger' and 'Burger King' should be returned from the database. I have searched and tried several things but nothing seems to work. Any help would be greatly appreciated. Note: This is client-side JS.
var text = document.getElementById("Search_Box").value;
var regex = new RegExp(text + ".*");
client.login().then(() => db.collection('businesses')
.find({name:{$regex:regex}}).then(docs => {
console.log("[MongoDB Stitch] Connected to Stitch");

If you had the following documents:
{
"_id" : ObjectId("5a0f3a464d8a2fe38bec4e92"),
"name" : "Burger"
}
{
"_id" : ObjectId("5a0f3a464d8a2fe38bec4e94"),
"name" : "Burger King"
}
{
"_id" : ObjectId("5a0f3a464d8a2fe38bec4e96"),
"name" : "Booby's"
}
{
"_id" : ObjectId("5a0f3a464d8a2fe38bec4e98"),
"name" : "McDonald"
}
Starting with
To get everything starting with "Bu" you could do
db.collection('businesses').find({name: {$regex: '^Bu'}})
or
db.collection('businesses').find({name: {$regex: /^Bu/}})
In the middle of
If you needed anything that contained "Ki.*g" anywhere in the word you could do:
db.collection('businesses').find({name: {$regex: 'Ki.*g'}})
or
db.collection('businesses').find({name: {$regex: /Ki.*g/}})
Do the effort and go through the documentation. Everything is explained there, with a lot more details. https://docs.mongodb.com/manual/reference/operator/query/regex/

I'm having a similar problem. I'm sending:
async search (term) {
this.suggestions = await this.db.collection(this.collection).find({name: {$regex: term + '.*', $options: 'i'}}).sort({'_id': 1}).execute()
}
And I'm getting this back from Mongo
Stack Trace:
StitchError: $regex requires regular expression
at find (<native code>)
at apply (<native code>)
at executeServiceFunction (<anonymous>:10:10)
{
"service": "mongodb-atlas",
"name": "find",
"arguments": [
{
"database": "monsters",
"collection": "monsters",
"query": {
"name": {
"$regex": "Aart.*",
"$options": "i"
}
},
"project": null,
"sort": {
"_id": {
"$numberInt": "1"
}
}
}
]
}

Related

Count and Sort On Array Intersection

I have this schema
module.exports = function(conn, mongoose) {
// var autoIncrement = require('mongoose-auto-increment');
var UsersSchema = new mongoose.Schema({
first_name: String,
last_name:String,
sex: String,
fk_hobbies: []
}
, {
timestamps: true
}, {collection: 'wt_users'});
return conn.model('wt_users', UsersSchema);
};
And for example I have these users in data base
{
"_id" : ObjectId("5aca2ac25c1d8adeb4a2dab0"),
first_name:"Pierro",
last_name:"pierre",
sex:"H",
fk_hobbies: [
{
"_id" : ObjectId("5ac9f84d5c1f8adeb4a2da97"),
"name" : "Art"
},
{
"_id" : ObjectId("5ac9f84d5c8d8adeb4a2da97"),
"name" : "Sport"
},
{
"_id" : ObjectId("5ac9f84d9c1d8adeb4a2da97"),
"name" : "Fete"
},
{
"_id" : ObjectId("5acaf84d5c1d8adeb4a2da97"),
"name" : "Série"
},
{
"_id" : ObjectId("6ac9f84d5c1d8adeb4a2da97"),
"name" : "Jeux vidéo"
}
]
},
{
"_id" : ObjectId("5ac9fa075c1d8adeb4a2da99"),
first_name:"jean",
last_name:"mark",
sex:"H",
fk_hobbies: [
{
"_id" : ObjectId("5ac7f84d5c1d8adeb4a2da97"),
"name" : "Musique"
},
{
"_id" : ObjectId("5ac9f24d5c1d8adeb4a2da97"),
"name" : "Chiller"
},
{
"_id" : ObjectId("5ac9f84c5c1d8adeb4a2da97"),
"name" : "Papoter"
},
{
"_id" : ObjectId("5ac9f84d2c1d8adeb4a2da97"),
"name" : "Manger"
},
{
"_id" : ObjectId("5ac9f84d5c1d8adeb4a2da97"),
"name" : "Film"
}
]
},
{
"_id" : ObjectId("5aca0a635c1d8adeb4a2da9d"),
first_name:"michael",
last_name:"ferrari",
sex:"H",
fk_hobbies: [
{
"_id" : ObjectId("5ac9f84d5c1d8adeb4a2ea97"),
"name" : "fashion"
},
{
"_id" : ObjectId("5ac9f84d5c1e8adeb4a2da97"),
"name" : "Voyage"
},
{
"_id" : ObjectId("5ac9f84c5c1d8adeb4a2da97"),
"name" : "Papoter"
},
{
"_id" : ObjectId("5ac9f84d2c1d8adeb4a2da97"),
"name" : "Manger"
},
{
"_id" : ObjectId("5ac9f84d5c1d8adeb4a2da97"),
"name" : "Film"
}
]
},
{
"_id" : ObjectId("5ac9fa074c1d8adeb4a2da99"),
first_name:"Philip",
last_name:"roi",
sex:"H",
fk_hobbies:
[
{
"_id" : ObjectId("5ac7f84d5c1d8adeb4a2da97"),
"name" : "Musique"
},
{
"_id" : ObjectId("5ac9f24d5c1d8adeb4a2da97"),
"name" : "Chiller"
},
{
"_id" : ObjectId("5ac9f84c5c1d8adeb4a2da97"),
"name" : "Papoter"
},
{
"_id" : ObjectId("5ac9f84d2c1d8adeb4a2da97"),
"name" : "Manger"
},
{
"_id" : ObjectId("5ac9f84d5c1d8adeb4a2da97"),
"name" : "Film"
}
]
}
I want to create a mongoose query that match user getted by id, with others users in database according this :
the query will return firstly the users that have the max number of the same hobbies, that is 5, then the users that have the same 4 hobbies ...
I create a solution fully Javascipt / node js, Is there any query with mongo ?
this is my solution
//var user : the current user that search other similar users : jean mark : 5ac9fa075c1d8adeb4a2da99
//var users : all other users
var tab = []
async.each(users, function(item, next1){
var j = 0;
var hobbies = item["fk_hobbies"]
for(var i = 0; i < 5; i++)
{
var index = hobbies.findIndex(x => x["_id"] == user[0]["fk_hobbies"][i]["_id"].toString());
if(index != -1)
j++
}
if(j != 0)
tab.push({nbHob:j, user:item})
next1()
}, function ()
{
var tab2 = tab.sort(compare)
res.json({success:true, data:tab2})
})
function compare(a,b) {
if (a.nbHob > b.nbHob)
return -1;
if (a.nbHob < b.nbHob)
return 1;
return 0;
}
the displayed result is like this
nbHob : represents the number of similar hobbies
{"success":true,"data":[{"nbHob":5,"user":{"_id":"5ac9fa074c1d8adeb4a2da99","u_first_name":"Akram","u_last_name":"Cherif","u_email":"","u_login":"","u_password":"","u_user_type":0,"u_date_of_birth":"","u_civility":0,"u_sex":"H","u_phone_number":"","u_facebook_id":"","u_google_id":"","u_twitter_id":"","u_profile_image":"","u_about":"","u_profession":"","u_fk_additional_infos":[null],"u_budget":0,"u_address":{"country":"France","state":"Paris","city":"TM","zip":76001},"u_fk_hobbies":[{"name":"Musique","_id":"5ac7f84d5c1d8adeb4a2da97"},{"name":"Chiller","_id":"5ac9f24d5c1d8adeb4a2da97"},{"name":"Papoter","_id":"5ac9f84c5c1d8adeb4a2da97"},{"name":"Manger","_id":"5ac9f84d2c1d8adeb4a2da97"},{"name":"Film","_id":"5ac9f84d5c1d8adeb4a2da97"}]}},{"nbHob":3,"user":{"_id":"5aca0a635c1d8adeb4a2da9d","u_first_name":"Chawki","u_last_name":"Gasmi","u_email":"","u_login":"","u_password":"","u_user_type":0,"u_date_of_birth":"","u_civility":0,"u_sex":"H","u_phone_number":"","u_facebook_id":"","u_google_id":"","u_twitter_id":"","u_profile_image":"","u_about":"","u_profession":"","u_fk_additional_infos":[null],"u_budget":{"min":500,"max":850},"u_address":{"country":"","state":"","city":"","zip":0},"u_fk_hobbies":[{"name":"fashion","_id":"5ac9f84d5c1d8adeb4a2ea97"},{"name":"Voyage","_id":"5ac9f84d5c1e8adeb4a2da97"},{"name":"Papoter","_id":"5ac9f84c5c1d8adeb4a2da97"},{"name":"Manger","_id":"5ac9f84d2c1d8adeb4a2da97"},{"name":"Film","_id":"5ac9f84d5c1d8adeb4a2da97"}]}}]}
Your question data seems a bit messed up due to probably far to liberal copy/paste since every hobby has the same ObjectId value. But I can correct that with a full self contained example:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/people';
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
const hobbySchema = new Schema({
name: String
});
const userSchema = new Schema({
first_name: String,
last_name: String,
sex: String,
fk_hobbies: [hobbySchema]
});
const Hobby = mongoose.model('Hobby', hobbySchema)
const User = mongoose.model('User', userSchema);
const userData = [
{
"first_name" : "Pierro",
"last_name" : "pierre",
"sex" : "H",
"fk_hobbies" : [
"Art", "Sport", "Fete", "Série", "Jeux vidéo"
]
},
{
"first_name": "jean",
"last_name" : "mark",
"sex" : "H",
"fk_hobbies" : [
"Musique", "Chiller", "Papoter", "Manger", "Film"
]
},
{
"first_name" : "michael",
"last_name" : "ferrari",
"sex" : "H",
"fk_hobbies" : [
"fashion", "Voyage", "Papoter", "Manger", "Film"
]
},
{
"first_name" : "Philip",
"last_name" : "roi",
"sex" : "H",
"fk_hobbies" : [
"Musique", "Chiller", "Papoter", "Manger", "Film"
]
}
];
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.remove())
);
const hobbies = await Hobby.insertMany(
[
...userData
.reduce((o, u) => [ ...o, ...u.fk_hobbies ], [])
.reduce((o, u) => o.set(u,1) , new Map())
]
.map(([name,v]) => ({ name }))
);
const users = await User.insertMany(userData.map(u =>
({
...u,
fk_hobbies: u.fk_hobbies.map(f => hobbies.find(h => f === h.name))
})
));
let user = await User.findOne({
"first_name" : "Philip",
"last_name" : "roi"
});
let user_hobbies = user.fk_hobbies.map(h => h._id );
let result = await User.aggregate([
{ "$match": {
"_id": { "$ne": user._id },
"fk_hobbies._id": { "$in": user_hobbies }
}},
{ "$addFields": {
"numHobbies": {
"$size": {
"$setIntersection": [
"$fk_hobbies._id",
user_hobbies
]
}
},
"fk_hobbies": {
"$map": {
"input": "$fk_hobbies",
"in": {
"$mergeObjects": [
"$$this",
{
"shared": {
"$cond": {
"if": { "$in": [ "$$this._id", user_hobbies ] },
"then": true,
"else": "$$REMOVE"
}
}
}
]
}
}
}
}},
{ "$sort": { "numHobbies": -1 } }
]);
log(result);
mongoose.disconnect();
} catch(e) {
} finally {
process.exit();
}
})()
Most of that is just "setup" to re-create the data set, but simply put we're just adding the users and their hobbies and keeping a "unique" identifier for each "unique hobby" by name. This is probably what you actually meant in the question, and it's the sort of model you should be following.
The interesting part is all in the .aggregate() statement, which is how we "query" then "count" the matching hobbies and enable the "server" to sort the results before returning to the client.
Given a current user ( and the last one in the list you included has the most interesting matches ), we then focus on this section of the code:
// Simulates getting the current user to compare against
let user = await User.findOne({
"first_name" : "Philip",
"last_name" : "roi"
});
// Just get the list of _id values from the current user for reference
let user_hobbies = user.fk_hobbies.map(h => h._id );
let result = await User.aggregate([
// Find all users not the current user with at least one of the hobbies
{ "$match": {
"_id": { "$ne": user._id },
"fk_hobbies._id": { "$in": user_hobbies }
}},
// Add the count of matches, "optionally" we are marking the matched
// hobbies in the array as well.
{ "$addFields": {
"numHobbies": {
"$size": {
"$setIntersection": [
"$fk_hobbies._id",
user_hobbies
]
}
},
"fk_hobbies": {
"$map": {
"input": "$fk_hobbies",
"in": {
"$mergeObjects": [
"$$this",
{
"shared": {
"$cond": {
"if": { "$in": [ "$$this._id", user_hobbies ] },
"then": true,
"else": "$$REMOVE"
}
}
}
]
}
}
}
}},
// Sort the results by the "most" hobbies, which is "descending" order
{ "$sort": { "numHobbies": -1 } }
]);
I've commented those steps for you but let's expand on that.
Firstly we presume you have the current user already returned from the database by whatever means you have already done. For the purposes of the rest of the operations, all your really need from that user is the _id of the "User" itself and of course the _id values from each of that user's chosen hobbies. We can do a quick .map() operation as it shown here, but we keep a copy for ease of reference and not repeating that through the remaining code.
Then we get to the actual aggregate statement. The first condition there is the $match, this works like a standard query expression with all the same operators. We want two things from these query conditions:
Get all users except the current user for consideration;
AND where those users contain at least one match on the same hobbies, by _id value.
So the condition for "everyone else" is essentially to supply the $ne "not equal to" operator in argument to the _id value, comparing of course to the current user _id. The second condition to get only those with the same hobbies uses the $in operator against the _id field of the fk_hobbies array. In MongoDB query parlance we denote this as "$fk_hobbies._id" in order to match against the "inner" _id property values.
The $in operator itself takes a "list" as it's argument and compares each value in the list supplied to the property the condition is assigned to. MongoDB itself does not care that fk_hobbies is an array or a single value, and will simply look for an match for anything in the provided list. Think of $in as a short way of writing $or, except you don't need to explicitly include the same property name on every condition.
Now you have the correct documents selected and have discarded any users who do not share any of the same hobbies we can move on to the next stage. Note also that the whole $match considers it logical that you only want those "matching" users. If you actually wanted to see "all users" including those with "no matches", then you can simply omit the whole $match pipeline stage. Your code is discarding anything that was not counted, so this code simply doesn't bother to count anything which "must" have a 0 count.
The $addFields stage pipeline stage is a quick way to "add new fields" to the document returned in results. The main output you want here is the "numHobbies" in addition to the other user details, so this pipeline stage operator is the optimal way to do this, but if you're MongoDB server is a bit older then you can simply specify "all" fields you want to include in addition to any new ones using $project instead.
In order to "count" the number of hobbies in common we essentially use two aggregation operators, which are $setIntersection and $size. Both of these should be available in an MongoDB version you really should be using in production.
In respective order the $setIntersection operator "compares sets" which is in this case the list of _id values within fk_hobbies, both from the current selected user we stored earlier and from the present document being considered in the expression. The result from this operator is the list of values which are the "same" between both lists.
Naturally the $size operator looks at the returned list ( or set ) from $setIntersection and returns the number of entries in that list. This of course is the "matched count".
The next part involves projecting a "re-written" form of the fk_hobbies array. This is totally optional and by my own design for demonstration purposes. "If" you wanted to do what I am doing here as well, then what this bit of code does is adds an additional property to the objects of the fk_hobbies array to indicate where that particular hobby was one of those which matched the list.
I'm saying this is "optional" because I'm actually demonstrating two features available for MongoDB 3.6 only. These involve the usage of $mergeObjects on the inner array elements and the usage of Conditionally Exlcuding Fields.
Stepping through that, since fk_hobbies is an array we need to use the $map operator in order to "reshape" the objects inside it. This operator allows us to process each array member and return a new value based on the transformations we include as it's argument. It's usage is much the same as .map() for JavaScript or any other language which implements a similar operation.
Therefore for each object in the array ( $$this ) we apply the $mergeObjects operator which will "merge" the result of it's arguments. These are provided as the $$this for the current object as it already is, and the second argument in the expression which is doing something new and interesting.
Here we use the $cond operator, which is a "ternary" operator ( or if..then..else expression ) which considers a condition if and then returns either the then argument where that expression was true, or the else expression where it was false. The expression here is another form of $in used as an aggregation expression. In this form the first argument is a singular value $$this._id which will be compared to a list expression in the second argument. That second argument is of course the list of the current user hobby id's we kept earlier, and are using again for comparison.
That usage of $in alone would return either true or false where it was a match. But the extra demonstrated action here is that within the $cond expresion, our else condition for false returns the new and special $$REMOVE value. What this means is that with our "shared" property we are adding to each object in the array, rather than assigning it a value of false where there was no match, we actually don't include that property in the output document at all.
That "optional" part is really just there as a "nice touch" to indicate which "hobbies" were matched in the conditions, rather than simply returning the count. If you like it then use it, and if you don't have MongoDB 3.6 with those features you can simply do that same alteration in the returned documents from the aggregation output anyway:
let result = await User.aggregate([
{ "$match": {
"_id": { "$ne": user._id },
"fk_hobbies._id": { "$in": user_hobbies }
}},
{ "$addFields": {
"numHobbies": {
"$size": {
"$setIntersection": [
"$fk_hobbies._id",
user_hobbies
]
}
}
}},
{ "$sort": { "numHobbies": -1 } }
]);
// map each result after return
result = result.map(r =>
({
...r,
fk_hobbies: r.fk_hobbies.map(h =>
({
...h,
...(( user_hobbies.map(i => i.toString() ).indexOf( h._id.toString() ) != -1 )
? { "shared": true } : {} )
})
)
})
)
Either way, the main thing you wanted out of any $addFields or $project statement was the actual "numHobbies" value indicating the count. And the main reason we did that on the server was so that we can also $sort on the server, which would in turn allow you to add things like $limit and $skip to larger result sets for purposes of paging where it simply would not be practical to get all the results from the collection, even if they were filtered in the initial match or regular query.
Anyhow, from the small sample of documents in the question as also generated in the sample listing, we get a result like this:
[
{
"_id": "5ad6bbe63365bc3428feed8a",
"first_name": "jean",
"last_name": "mark",
"sex": "H",
"fk_hobbies": [
{
"_id": "5ad6bbe63365bc3428feed7d",
"name": "Musique",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed7e",
"name": "Chiller",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed7f",
"name": "Papoter",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed80",
"name": "Manger",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed81",
"name": "Film",
"__v": 0,
"shared": true
}
],
"__v": 0,
"numHobbies": 5
},
{
"_id": "5ad6bbe63365bc3428feed90",
"first_name": "michael",
"last_name": "ferrari",
"sex": "H",
"fk_hobbies": [
{
"_id": "5ad6bbe63365bc3428feed82",
"name": "fashion",
"__v": 0
},
{
"_id": "5ad6bbe63365bc3428feed83",
"name": "Voyage",
"__v": 0
},
{
"_id": "5ad6bbe63365bc3428feed7f",
"name": "Papoter",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed80",
"name": "Manger",
"__v": 0,
"shared": true
},
{
"_id": "5ad6bbe63365bc3428feed81",
"name": "Film",
"__v": 0,
"shared": true
}
],
"__v": 0,
"numHobbies": 3
}
]
So there are two users that were returned and we counted the matching hobbies as 5 and 3 respectively and returned the one with the most matched first. You can also see the addition of the "shared" property on each of the matched hobbies to indicate which of the hobbies in each of the returned users lists were also shared with the original user they were compared with.
NOTE: You were probably just "trying things" but your usage of async.each() in your question was not really necessary since none of the inner code is actually "async" itself. Even in the listing here, the only thing you actually need to "await" as an async call after you have the current user to compare is the .aggregate() response itself.
So if at any part of this you were presuming you would be "awaiting requests within a loop", then you were mistaken. Simply ask the database for the results and await their return.
One request to the database is all that is required.
N.B It's also 2018, so you really should start to understand Promises and usage of async/await with them. The code is much cleaner that way and surely any newly developed application should be running in an environment with this support. So "callback helper" libraries like "node async", are a little "old hat" and outmoded in a modern context.

Adding Objects to Array of Objects in a Document using Mongoose.js

I've looked in stackoverflow, however I've not found the answer. I'm trying to add Object to a New(empty) Array in my local mongodb that is not a duplicate. I also want to update this Array with other Objects.
I've looked at $push and $addToSet, the examples are using an "id" (_id) which wouldn't be created until I add my first Object.
I'm using Node.js, Mongoose.js, Mongodb, Express.js.
My Schema is:
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
var barSchema = new Schema({
location: [{
name: String,
city: String,
total: Number
}]
});
var Bar = mongoose.model('Bar', barSchema);
module.exports = Bar;
I've tried to use this;
var newBar = bar({
location: [{ "name": req.body.bar, "city": req.body.city, "total": 0 }] });
newBar.save(function(err) {
if (err) throw err;
});
I've also used the $push with success but in this case I've not got an "id"
user.findByIdAndUpdate(req.user._id, { $push: {
barlist: { "name": req.body.bar,
"rsvp": true } } },
function(err, user) { });
Which gives back this;
{
"_id" : ObjectId("######"),
"location" : [
{
"name" : "1st Bar",
"city" : "Boston",
"total" : 0,
"_id" : ObjectId("#########")
}
],
"__v" : 0
}
{
"_id" : ObjectId("######"),
"location" : [
{
"name" : "2nd Bar",
"city" : "Boston",
"total" : 0,
"_id" : ObjectId("#########")
}
],
"__v" : 0
}
However I am trying to get this;
{
"_id" : ObjectId("#######"),
"location" : [
{
"name" : "Biddy Early's",
"city" : "Boston",
"total" : 0
},
{
"name" : "Some Bar Name",
"city" : "Boston",
"total" : 0
}
]
}
Please know there may be better ways to do this, but... The first thing you want to do in this case is create a new instance of your schema or 'model'. Based on what your code looks like you might want to consider something like this;
var newBar = bar({
"city": req.body.city,
location: [{
"bar": req.body.bar,
"total": 0
}]
});
newBar.save(function(err) {
if (err) throw err;
});
Since if you are looking for bars or restaurants in the same city you might want to have a common key/value pair to '.find()' or '.update()' depending on what you want to do with it.
From here, you will want to look into what you mentioned before with '$addToSet' so that you wouldn't be adding duplicate bars to your 'location' array. So for instance;
bar.findOneAndUpdate({'city':req.body.city},
{'$addToSet': { location: {
'bar': req.body.bar, 'total': 0 } } },
function(err, b) {
if (err) throw err;
console.log(b);
});
Consider using a strategy like if/else to determine if the city name exists, if it does then you would utilize the '$addToSet'. Else, if it didn't exist you would utilize the new model like the example I used.

Matching field by omitting spaces - MongoDB

I'm trying to find the mongo document by matching the "Tel" field value,
{
"_id" : ObjectId("54f047aa5b9e5c7c13000000"),
"data" : [
{
"Id" : "1",
"Country" : "India",
"Timezone" : "Europe/Paris",
**"Tel" : "03 20 14 97 70",**
"Prenom" : "ddd",
"Email" : "ddd#gmail.com",
"City" : "Chennai",
"date" : "",
"active" : "true"
}
]
}
how to fetch the above document from mongo collection using the below find method without space in "Tel" field,
>db.test.find({"data.Tel":"0320149770"})
Please can anyone help me !!!
If this is what you really want to do on a regular basis then you are best off adding another field to the document that has the string present without any spaces.
The reason why is though there are functions you can perform to do the search, none of the methods are able to use an index to match the document, so this means scanning everything in the collection in order to find a match.
You can do this with JavaScript evaluation in a $where clause:
db.test.find(function() {
return this.data.some(function(el) {
el.Tel.replace(/ /g,"") == "0320149770"
});
});
But don't do that because it's really bad. you are better off just updating all the data instead:
db.test.find().forEach(function(doc) {
doc.data = doc.data.map(function(el) {
el.TelNum = el.Tel.replace(/ /g,"");
})
db.test.update({ "_id": doc._id },{ "$set": { "data": doc.data } });
})
Or something along those lines to have a field without spaces all ready to search on directly.

Mongodb : Mapreduce query filter positional operator

I have a huge data set (in millions) in the following format :
{
"userid" : "codejammer",
"data" : [
{"type" : "number", "value" : "23748"},
{"type" : "message","value" : "one"}
]
}
I want to get count of message with value one for userid - codejammer
The following is the mapreduce function I am using :
Map :
var map = function(){
emit(this.data[0].value,1);
}
Reduce
var reduce = function(key,values){
return Array.sum(values);
}
Options
var options = {
"query":{"userid" : "codejammer",
"data.type" : "message"},
"out" : "aggregrated"
}
The mapreduce function executes successfully with the following output:
{
"_id" : 23748,
"value" : 1
}
But, I am expecting the following output :
{
"_id" : one,
"value" : 1
}
The query filter in options, is sending the entire array to map function even though I specifically ask for data.type : "message"
Is there any way to use projection operator in query filter to get only the required item in array ?
Thank you very much for your help.
You actually would be better off doing this with aggregate. There is no need for mapReduce in this case and the aggregation framework runs as native code and will be much faster than running through the JavaScript interpreter:
db.collection.aggregate([
// Still makes sense to match the documents to reduce the set
{ "$match": {
"userid": "codejammer",
"data": { "$elemMatch": {
"type": "message", "value": "one"
}}
}},
// Unwind to de-normalize the array content
{ "$unwind": "$data" },
// Filter the content of the array
{ "$match": {
"data.type": "message",
"data.value": "one"
}},
// Count all the matching entries
{ "$group": {
"_id": null,
"count": { "$sum": 1 }
}}
])
Of course if you actually did only ever have one "message" inside your "data" array this becomes very simple:
db.collection.aggregate([
// Match the documents you want
{ "$match": {
"userid": "codejammer",
"data": { "$elemMatch": {
"type": "message", "value": "one"
}}
}},
// Simply count the documents
{ "$group": {
"_id": null,
"count": { "$sum": 1 }
}}
])
But of course that is actually no different to this:
db.collection.find({
"userid": "codejammer",
"data": { "$elemMatch": {
"type": "message", "value": "one"
}}
}).count()
So while there is a way to do this with mapReduce, the other ways shown are much better. Especially in the newly released 2.6 version and upwards. In the newer versions the aggregation pipeline can make use of disk storage to handle very large collections.
But to get the count using mapReduce you were basically going about it the wrong way. The projection will not work as an input, so you need to take the element out of the results. I'm still going to consider that there could possibly be more than one matching value in your array even if that was not the case:
db.collection.mapReduce(
function() {
var userid = this.userid;
this.data.forEach(function(doc) {
if ( doc == condition )
emit( userid, 1 );
});
},
function(key,values) {
return values.length;
},
{
"query": {
"userid": "codejammer",
"data": { "$elemMatch": {
"type": "message", "value": "one"
}}
},
"scope": {
"condition": {"type" : "message", "value" : "one"}
},
"out": { "inline": 1 }
}
)
So in much the same way this "emits" a value for the common key when a document matching your criteria is found inside the data array. So you cannot project just the matching element, you get all of them and you filter in this way.
Since you are only expecting one result there is no point in actually outputting to a collection, so just send it out as one.
But basically, use the aggregation method if you have to do this.

MongoDB Retrieve a subset of an array in a collection by specifying two fields which should match

I'm using this structure to store conversations & messages:
{ "_id" : ObjectId( "4f2952d7ff4b3c36d700000d" ),
"messages" : [
{ "_id" : ObjectId( "4f2952d7ff4b3c36d700000c" ),
"sender" : "4f02f16f0364c024678c0e5f",
"receiver" : "4f02f16f0364c024678c0e61",
"receiver_deleted" : "true",
"sender_deleted" : "true",
"body" : "MSG 1",
"timestamp" : "2012-02-01T14:57:27Z" },
{ "_id" : ObjectId( "4f2952daff4b3c36d700000e" ),
"sender" : "4f02f16f0364c024678c0e61",
"receiver" : "4f02f16f0364c024678c0e5f",
"body" : "MSG 2",
"timestamp" : "2012-02-01T14:57:30Z" },
{ "_id" : ObjectId( "4f295305ff4b3c36d700000f" ),
"sender" : "4f02f16f0364c024678c0e5f",
"receiver" : "4f02f16f0364c024678c0e61",
"body" : "TEST",
"timestamp" : "2012-02-01T14:58:13Z" } ],
"participants" : [
"4f02f16f0364c024678c0e5f",
"4f02f16f0364c024678c0e61" ],
"type" : "chat" }
When one of the sender or receiver does delete a specific message, receiver_deleted or sender_deleted gets added to the message (as you see in the first message).
Now how can I fetch a conversation with only the messages in it which haven't the sender/receiver deleted flag set?
First I tried like this:
db.conversations.find({
"_id": ObjectId("4f2952d7ff4b3c36d700000d"),
"participants": {"$in": ["4f02f16f0364c024678c0e5f"]},
"$or": [
{
"$and": [{"messages.sender": "4f02f16f0364c024678c0e5f"}, {"messages.sender_deleted": {"$exists": false}}]
},
{
"$and": [{"messages.receiver": "4f02f16f0364c024678c0e5f"}, {"messages.receiver_deleted": {"$exists": false}}]
}
]
})
But this doesn't work. I also tried with $elemMatch like this:
db.conversations.find({
"_id": ObjectId("4f2952d7ff4b3c36d700000d"),
"participants": {"$in": ["4f02f16f0364c024678c0e5f"]},
"$or": [
{
"messages": {
"$elemMatch": {"sender": "4f02f16f0364c024678c0e5f", "sender_deleted": {"$exists": False}}
}
},
{
"messages": {
"$elemMatch": {"receiver": "4f02f16f0364c024678c0e5f", "receiver_deleted": {"$exists": False}}
}
}
]
})
And a couple of other options with trying $and instead of $or etc. but it doesn't work.. Either it returns nothing or the whole conversation regardless of the receiver/sender deleted fields.
Thank you,
Michael
It is not currently possible to retrieve a subset of an array with mongodb. You will always get the whole document back if there is a match, or nothing if there is no match. $slice allows you to return a subset, but that's based on a starting and stopping index (which is not what you want - as you want to return only the matching messages in the array).
The feature you are describing has been logged and requested here: https://jira.mongodb.org/browse/SERVER-828
Since version 2.2 Aggregation Framework is available. You could perform your query like this:
db.expose.aggregate(
//Find the Documents which contains the desired criteria (document level)
{
$match: {
$or: [
{
"messages.sender_deleted": "true"
},
{
"messages.receiver_deleted": "true"
}]}},
//Peels off the elements of messages array individually
{
$unwind: "$messages"
},
//Perform the same find method, now in the embed collection level
{
$match: {
$or: [
{
"messages.sender_deleted": "true"
},
{
"messages.receiver_deleted": "true"
}]}},
//Select what to show as the result: in this case the Document id and the messages array
{
$group: {
_id: "$_id",
messages: {
$push: "$messages"
}}});
The first match is not required, but is better to filter out as much as possible in the beginning.

Categories

Resources