I have about 30,000 documents in a MongoDB collection. And have been stuck in developing a node.js script to retrieve only the records with a specific string key-value pair.
this query on MongoDB server returns me the exact results I've been looking for:
db.getCollection('posts').find({authorName: "Ashwin-kumar"})
Returns me about 33 documents instantly. Likewise I've about 40 authors with different names.
Here's my node.js script to retrieve posts by authorName (Yes, it is based on Name, a string, as there is no ID for these authors :( ):
var fs = require('fs'),
request = require('request'),
async = require("async"),
assert = require('assert');
_ = require('lodash'),
MongoClient = require('mongodb').MongoClient;
var db, postsCollection, postCol;
async.series([dbConnect, checkCollection, createMeta, dbClose], function(){
console.log("Executed all calls in series.");
process.exit(0);
});
function dbConnect(callback){
MongoClient.connect("mongodb://localhost:27017/jPosts", function(pErr, pDb) {
if(pErr) {
console.dir(pDb);
return 0;
}
db = pDb;
callback();
});
}
function dbClose(callback){
db.close(true, function (err) {
if (err) console.error(err);
else console.log("close complete");
callback();
});
}
function checkCollection(callback) {
db.collection('posts', function(err, collection) {});
postsCollection = db.collection('posts');
postCol = db.collection('posts');
callback();
}
function createMeta(callback){
var meta = [];
postsCollection.aggregate([
{
$group : {_id : "$authorName"}
}]).toArray(function(err, result) {
assert.equal(err, null);
async.forEachLimit(result, 1, function(pPost, callback) {
getPosts(pPost._id, callback);
}, function(err) {
console.log(err);
callback();
});
});
}
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authorName": pAuthor});
cursor.toArray(function(err,items){
if(err)
callback(err);
else
callback(null, items);
});
}
This does not seem to work for me. cursor.toArray() does nothing but wait forever. Is it because of too many fields in each document?
I tried to get the count of the documents the cursor fetched and it works well.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.count().then(function(items_count) {
console.log(items_count);
callback();
});
}
Also, I tried the cursor's .each method to iterate the documents fetched. But no luck yet.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.each(function(err, doc) {
assert.equal(err, null);
if (doc != null) {
console.dir(doc);
} else {
console.log(err);
}
});
}
Am I missing something here? What else can be done to make this work? Is there any issues with the way I'm using async?
P.S: The idea here is to query the dump and generate the PDF's for authours in the jPost collection.
P.S 2: Here's a sample document
{
"_id" : ObjectId("571d36b55672f713fe346a66"),
"id" : 56517,
"authorName" : "Ashwin-kumar",
"comment_count" : 380,
"tagline" : "... Opinions you don't really need",
"vote_count" : 5152,
"exclusive" : null,
"post": [
],
"post_comments" : [
//comment_count objects
],
"date" : "2016-03-27"
}
(I've omitted post & post_comments parts for brevity.)
try this:
var collection = db.collection("collection_name");
collection.find({authourName: "Ashwin-kumar"}).toArray(function (err,items) {
if (err) {
console.dir(err);
} else {
//do something with items array
console.dir(items);
}
});
Did you check what is the value of pAuthor in getPosts? Because when you do aggregation, you receive a collection of objects with _id field (not authourName), so you should do:
// not sure why you need meta array, at least it's not used in the code you provided
meta.push({
author: pPost._id
});
getPosts(pPost._id, callback);
Related
I have an API which I call three times with three different parameters.
https://api.developer.com/${param1} // param2, and param3
This api returns 30,000+ results each time. (total of around 100,000).
I want to store this data in a single collection. Meaning, I want 100,000+ documents in one collection.
I have a small script that extends the npm request module, which looks like this:
```
let _request = (urls, cb) => {
let results = {}, i = urls.length, c = 0;
handler = (err, response, body) => {
let url = response.request.uri.href;
results[url] = { err, response, body };
if (++c === urls.length) {
cb(results);
}
};
while (i--) {
request(urls[i], handler);
}
};
```
But let's exclude that for now. The function which I use to GET a single endpoint, and update the Database after is this:
function update() {
request(url, (err, response, body) => {
if (err) {
console.log(err);
} else {
let json = {};
try {
json = JSON.parse(body);
} catch (e) {
console.log(e);
}
_.forOwn(json, (price, market_hash_name) => {
Price.update(
{ market_hash_name },
{
$set: { price }
},
{ upsert: true },
err => {
if (err) {
console.log(err);
}
}
);
});
}
});
}
The raw data returned looks like this:
{
market_hash_name: price,
market_hash_name: price.. etc
}
The problem:
Even though, Object.keys(data).length === 30000, my MongoDB collection only writes down ~10,000 documents, and the other ~20000 vanish into thin air.
I've checked a thousand times, with Postman, Browser and even logging the data.keys in the console, and I am sure there are 30k + key:value pairs.
Is it something wrong with my code? Is it bad practice to call Price.update for every key:value pair in the json (probably). But I'm stuck. Any help would be much appreciated.
So right now, I'm working on a service to allow multiple events to store data on MongoDB. They store event data by creating new collections on MongoDB every time a new event comes on. If the same event needs to store a different set of data, a new document in MongoDB should be created.
The code below is the service I created to handle this.
import WhiteBoardEvent from '../model/event.model';
import IEventStore from '../interface/eventStore.interface';
import * as MongoClient from 'mongodb';
export class EventStore implements IEventStore {
private mongoDBEndpoint = "mongodb://192.168.10.10:27017";
public insert(event: WhiteBoardEvent, callback: (err: any) => void): void {
MongoClient.connect(this.mongoDBEndpoint, { connectTimeoutMS: 1000 }, (connErr, db) => {
if (connErr) { db.close(); callback(connErr); return; }
this.getNextSequence(db, event, (err, sequence) => {
if (err) { db.close(); callback(err); return; }
event.sequence = sequence;
db.collection(event.roomId).insert(event, (err) => {
db.close();
callback(err);
});
});
});
}
private createCounterCollection(db: MongoClient.Db, event: WhiteBoardEvent, callback: (err: any) => void): void {
db.collection("counters").insert({
roomId: event.roomId,
sequence: 0
}, callback);
}
private getNextSequence(db: MongoClient.Db, event: WhiteBoardEvent, callback: (err: any, sequence: number) => void): void {
var collection = db.collection("counters");
collection.findOneAndUpdate(
{ roomID: event.roomId },
{
$inc: { sequence: 1 },
// new: true
},
{
upsert: true,
returnOriginal: false
},
(err, r) => {
if (err) {
this.createCounterCollection(db, event, (err) => {
if (err) { callback(err, -1); return; }
callback(null, 0);
});
return;
}
callback(null, r.value.sequence);
console.log("counter : " + r.value.sequence);
}
);
}
}
The following code is a test file I created so that I can see the changes in MongoDB.
import * as timers from 'timers';
import WhiteBoardEvent from './data/model/event.model';
import { EventStore } from './data/service/eventStore.service';
var model = new WhiteBoardEvent();
model.name = "t2";
model.roomId = "testRoom";
model.timestamp = new Date();
model.userId = "";
var model2 = new WhiteBoardEvent();
model2.name = "t1";
model2.roomId = "testRoom2";
model2.timestamp = new Date();
model2.userId = "";
var eventStore = new EventStore();
var timer1 = timers.setInterval(()=>{
eventStore.insert(model, (err)=>{
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
}, 1000);
var timer2 = timers.setInterval(()=>{
eventStore.insert(model2, (err)=>{
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
}, 1000);
This is a snippet of the output I get. Here, "Test Completed" is shown for the first instances, after that, I'm getting the duplicate errors.
counter : 1
counter : 1
Test Completed!
Test Completed!
counter : 2
{ MongoError: E11000 duplicate key error collection: admin.testRoom index:
_id_ dup key: { : ObjectId('59d5da14cedd6f28a5db8c93') }
Can anyone help me with this? Thank you in advance!
You are creating two instances of WhiteBoardEvent without explicitly setting an ID (this is fine, but relevant). Have a look at this excerpt from your code above:
db.collection(event.roomId).insert(event, (err) => {
db.close();
callback(err);
});
After handing event over to MongoDB's insert, it is checked to see if it has an ID - it does not. Because of this, the MongoDB code generates an ID for you (see here). This is all great - it's what you want.
However, what happens the next time your setInterval callback is invoked? Well, model and model2 now have an ID set - it was set according to the rules I just described. In this case, now that there's an ID set on the model going into insert, you are trying to reuse the same ID as the MongoDB code leaves it alone.
In your test code, you could simply clear out the ID in your eventStore.insert callback to ensure that a new ID is generated every time. e.g.:
eventStore.insert(model, (err)=>{
model._id = null;
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
It is likely that in your scheme you have you have a key set on unique: true.
Adding another object with the same key or a key not filled in will result in a duplicate key error. Because, if a field is not filled in it will be filled in with null. So 2 times null is a duplicate key error. To make sure this will not happen.
Use sparse: true instead of unique: true. Also note that a field with unique: true is never able to have two of the same keys. Sparse is only able to have multiple nulls(undefined) inside and works the same as unique: true further.
In your case you have to times the userid on "", this will probably cause the error if its set on unique.model.userId = "";
Hope this will solve your answer. Else please show us your model.
Sven
I have to try to fetch a field value from MongoDB using Node.js. But it shows me undefined in my console. My requirement is to print the data in the console or browser from MongoDB using Node.js.
1). This is my node js
this.levelChange = function(req, res, next){
try{
var query = {'level_num':2};
QuizLevel.find(query,function(err,data){
var a = data.min_score;
console.log(a);
res.send(a);
});
}catch(err){
console.log("Error");
return next(err);
}
};
2). This is my js-schema
{
_id:{type:String},
age:{type:Number},
level_num:{type:String},
min_score:{type:String},
max_questions:{type:String}
}
3).This is my console output
undefined
4). This is my JSON data
{
"age":5,
"level_num":1,
"min_score":10,
"max_questions":30
},
{
"age":5,
"level_num":2,
"min_score":12,
"max_questions":33
}
Simply use findOne(find return an array of document) with a project field(return only desired fields).
And don't forget to check the err field !
try{
var query = {'level_num':2};
QuizLevel.findOne(query,{min_score: 1}, function(err,data){
if(err || !data)
{
console.log(err);
return next(err);
}
else
{
var a = data.min_score;
console.log(a);
res.send(a);
}
});
}catch(err){
console.log("Error");
return next(err);
}
I might be incorrect but it looks like you're trying to access object property while the result is a collection, see:
data.min_score // => [{ ... }, { ... }].min_score
vs
data[0].min_score
What you want to achieve is something like:
var scores = data.map((function (item) {
return item.min_score;
});
console.log(scores);
You can always check the type of result with console.log(typeof data) or simply write console.log(data), sometimes console.log(Object.keys(data)) come in handy as well for simple debugging not to mention node-inspector.
I'm looking for a way to refactor part of my code to be shorter and simpler, but I don't know Mongoose very well and I'm not sure how to proceed.
I am trying to check a collection for the existence of a document and, if it doesn't exist, create it. If it does exist, I need to update it. In either case I need to access the document's contents afterward.
What I've managed to do so far is query the collection for a specific document and, if it's not found, create a new document. If it is found, I update it (currently using dates as dummy data for this). From there I can access either the found document from my initial find operation or the newly saved document and this works, but there must be a better way to accomplish what I'm after.
Here's my working code, sans distracting extras.
var query = Model.find({
/* query */
}).lean().limit(1);
// Find the document
query.exec(function(error, result) {
if (error) { throw error; }
// If the document doesn't exist
if (!result.length) {
// Create a new one
var model = new Model(); //use the defaults in the schema
model.save(function(error) {
if (error) { throw error; }
// do something with the document here
});
}
// If the document does exist
else {
// Update it
var query = { /* query */ },
update = {},
options = {};
Model.update(query, update, options, function(error) {
if (error) { throw error; }
// do the same something with the document here
// in this case, using result[0] from the topmost query
});
}
});
I've looked into findOneAndUpdate and other related methods but I'm not sure if they fit my use case or if I understand how to use them correctly. Can anyone point me in the right direction?
(Probably) Related questions:
How to check if that data already exist in the database during update (Mongoose And Express)
Mongoose.js: how to implement create or update?
NodeJS + Mongo: Insert if not exists, otherwise - update
Return updated collection with Mongoose
Edit
I didn't come across the question pointed out to me in my searching, but after reviewing the answers there I've come up with this. It's certainly prettier, in my opinion, and it works, so unless I'm doing something horribly wrong I think my question can probably be closed.
I would appreciate any additional input on my solution.
// Setup stuff
var query = { /* query */ },
update = { expire: new Date() },
options = { upsert: true };
// Find the document
Model.findOneAndUpdate(query, update, options, function(error, result) {
if (!error) {
// If the document doesn't exist
if (!result) {
// Create it
result = new Model();
}
// Save the document
result.save(function(error) {
if (!error) {
// Do something with the document
} else {
throw error;
}
});
}
});
You are looking for the new option parameter. The new option returns the newly created document(if a new document is created). Use it like this:
var query = {},
update = { expire: new Date() },
options = { upsert: true, new: true, setDefaultsOnInsert: true };
// Find the document
Model.findOneAndUpdate(query, update, options, function(error, result) {
if (error) return;
// do something with the document
});
Since upsert creates a document if not finds a document, you don't need to create another one manually.
Since you wish to refactor parts of your code to be shorter and simpler,
Use async / await
Use .findOneAndUpdate() as suggested in this answer
let query = { /* query */ };
let update = {expire: new Date()};
let options = {upsert: true, new: true, setDefaultsOnInsert: true};
let model = await Model.findOneAndUpdate(query, update, options);
///This is simple example explaining findByIDAndUpdate from my code added with try catch block to catch errors
try{
const options = {
upsert: true,
new: true,
setDefaultsOnInsert: true
};
const query = {
$set: {
description: req.body.description,
title: req.body.title
}
};
const survey = await Survey.findByIdAndUpdate(
req.params.id,
query,
options
).populate("questions");
}catch(e){
console.log(e)
}
Here is an example I am using. I have to return custom responses for UI updates etc. This can be even shorter. User is
const UserScheme = mongoose.Schema({
_id: String,
name: String,
city: String,
address: String,
},{timestamps: true});
const User = mongoose.model('Users', UserScheme);
async function userUpdateAdd(data){
var resp = '{"status": "error"}';
if(data){
var resp = await User.updateOne({ _id: data._id }, data).then(function(err, res){
console.log("database.userUpdateAdd -> Update data saved in database!");
if(err){
var errMessage = err.matchedCount == 0 ? "User Record does not exist, will create new..." : "Record not updated";
// If no match, create new
if(err.matchedCount == 0){
const create_user = new User(data);
resp = create_user.save().then(function(){
console.log("database.userUpdateAdd -> Data saved to database!");
return '{"status":"success", "message": "New User added successfully"}';
});
return resp;
}
// Exists, return success update message
if(err.matchedCount == 1){
return '{"status": "success", "message" : "Update saved successfully"}';
} else {
return '{"status": "error", "code": "' + err.modifiedCount + '", "message": "' + errMessage + '"}';
}
}
})
.catch((error) => {
//When there are errors We handle them here
console.log("database.userUpdateAdd -> Error, data not saved! Server error");
return '{"status": "error", "code": "400", "message": "Server error!"}';
});
}
return resp;
}
Here's an example:
const mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/rsvp', {useNewUrlParser: true, useUnifiedTopology: true});
const db = mongoose.connection;
db.on('error', () => {
console.log('mongoose connection error');
});
db.once('open', () => {
console.log('mongoose connected successfully');
});
const rsvpSchema = mongoose.Schema({
firstName: String,
lastName: String,
email: String,
guests: Number
});
const Rsvp = mongoose.model('Rsvp', rsvpSchema);
// This is the part you will need... In this example, if first and last name match, update email and guest number. Otherwise, create a new document. The key is to learn to put "upsert" as the "options" for the argument.
const findRsvpAndUpdate = (result, callback) => {
Rsvp.findOneAndUpdate({firstName: result.firstName, lastName: result.lastName}, result, { upsert: true }, (err, results) => {
if (err) {
callback(err);
} else {
callback(null, results);
}
})
};
// From your server index.js file, call this...
app.post('/rsvps', (req, res) => {
findRsvpAndUpdate(req.body, (error, result) => {
if (error) {
res.status(500).send(error);
} else {
res.status(200).send(result);
}
})
});
I am trying to use the async module to cut back the 'callback hell' specific to Node.js. Basically, I am trying to use async.series to retrieve some info from the database and display it in my view. Still, I get no results in my view.
This is the code that I have so far:
// Search
exports.search = function(req, res) {
var x = [];
async.series([
function(cb) {
Lang.find({ lang: req.query.keyword }).sort({ verbal: -1 }).exec(function(err, langs) {
cb(null, langs);
});
},
function(cb) {
Human.find({}, function(err, humans) {
cb(null, humans);
});
}],
function(err, results) {
if (err) {
res.send(500);
}
for(var i = 0; i < results[0].length; i++) {
for(var j = 0; j < results[1].length; j++) {
if(results[1][j]._id == results[0][i].human) {
x.push(results[1][j]);
}
}
}
res.render('myView', { title: 'Search Results', humans: x });
}
);
}
I first want to query the Lang model (MongoDB) and find the records that match req.query.keyword. Afterwards, I want to query the Human model and find all the Humans that have that specific language skill. Also, the FOR loop is meant to eliminate duplicates from my array since a Human might have several languages.
If I understand your code correctly, you might want to use async.waterfall instead. It passes the result from one function as an argument to the next. There are also some other optimizations that can be made:
async.waterfall([
function(done) {
Lang
.find({ lang: req.query.keyword })
.sort({ verbal: -1 }) // (not really useful)
.select('human') // not strictly necessary, but saves a bit of space
.exec(done); // short for :
// .exec(function(err, results) {
// done(err, results);
// });
},
function(langs, done) {
// extract the 'human' property from each result
var _ids = langs.map(function(lang) {
return lang.human;
});
// perform a query finding all humans in the list of ids
Human
.find({ _id : { $in : _ids } })
.exec(done);
}
], function(err, humans) {
if (err)
return res.send(500);
res.render('myView', {
title : 'Search Results',
humans: humans
});
});
EDIT: because $in doesn't preserve order and this answer suggests that using $or will, try this as an alternative for the second query:
...
function(langs, done) {
var query = langs.map(function(lang) {
return { _id : lang.human };
});
Human
.find({ $or : query })
.exec(done);
}
...