I have an API which I call three times with three different parameters.
https://api.developer.com/${param1} // param2, and param3
This api returns 30,000+ results each time. (total of around 100,000).
I want to store this data in a single collection. Meaning, I want 100,000+ documents in one collection.
I have a small script that extends the npm request module, which looks like this:
```
let _request = (urls, cb) => {
let results = {}, i = urls.length, c = 0;
handler = (err, response, body) => {
let url = response.request.uri.href;
results[url] = { err, response, body };
if (++c === urls.length) {
cb(results);
}
};
while (i--) {
request(urls[i], handler);
}
};
```
But let's exclude that for now. The function which I use to GET a single endpoint, and update the Database after is this:
function update() {
request(url, (err, response, body) => {
if (err) {
console.log(err);
} else {
let json = {};
try {
json = JSON.parse(body);
} catch (e) {
console.log(e);
}
_.forOwn(json, (price, market_hash_name) => {
Price.update(
{ market_hash_name },
{
$set: { price }
},
{ upsert: true },
err => {
if (err) {
console.log(err);
}
}
);
});
}
});
}
The raw data returned looks like this:
{
market_hash_name: price,
market_hash_name: price.. etc
}
The problem:
Even though, Object.keys(data).length === 30000, my MongoDB collection only writes down ~10,000 documents, and the other ~20000 vanish into thin air.
I've checked a thousand times, with Postman, Browser and even logging the data.keys in the console, and I am sure there are 30k + key:value pairs.
Is it something wrong with my code? Is it bad practice to call Price.update for every key:value pair in the json (probably). But I'm stuck. Any help would be much appreciated.
Related
I am writing an API that fetches data from twitter using the oauth v1 API and serves it for my webapp.
I have one api endpoint that fetches from Twitter an array of objects representing each of the lists a user has created.
I have another api endpoint that, given a list ID, fetched an array of all the members of that list.
I want to combine these two endpoints into one, so my webapp can request '/api/getAllLists' and it will receive an array of list objects (API 1), with one of the properties of the object being a full list of members (API 2).
I have got myself tangled up trying to work out how to do this - I have used promises and async functions before but I don't know the best way to accomplish this.
router.get('/getAllLists', isLoggedIn, (req, res) => {
//oauth access credentials
let {oauthAccessToken, oauthAccessTokenSecret, username, user_id} = req.cookies.twitter || '';
//first api call to get array of list objects
consumer.get("https://api.twitter.com/1.1/lists/list.json?user_id="+user_id, oauthAccessToken, oauthAccessTokenSecret, (error, data, response) => {
if (error) {
console.log(error)
} else {
data = JSON.parse(data).map((list) => {
let newList = list;
//second API call, using list.id_str fromt he first call
consumer.get("https://api.twitter.com/1.1/lists/members.json?list_id="+list.id_str, oauthAccessToken, oauthAccessTokenSecret, (error, data, response) => {
if (error) {
console.log(error);
} else {
newList.contents = data;
return newList;
}
});
return newList;
})
res.send(data);
}
});
});
I have accomplished what I was aiming for like this:
//oauth access credentials
let { oauthAccessToken, oauthAccessTokenSecret, username, user_id } = req.cookies.twitter || '';
consumer.get("https://api.twitter.com/1.1/lists/list.json?user_id=" + user_id, oauthAccessToken, oauthAccessTokenSecret, (error, data, response) => {
if (error) {
console.log(error)
} else {
let allLists = [];
let allListLength = JSON.parse(data).length;
let listCounter = 0;
const addToAllLists = (list) => {
allLists.push(list)
listCounter ++;
if(listCounter === allListLength) {
res.send(allLists)
}
}
JSON.parse(data).map((list,i) => {
let newList = list;
consumer.get("https://api.twitter.com/1.1/lists/members.json?count=4999&list_id=" + list.id_str, oauthAccessToken, oauthAccessTokenSecret, (error, data2, response) => {
if (error) {
console.log(error);
} else {
newList.users = JSON.parse(data2).users;
addToAllLists(newList)
return newList;
}
})
})
}
});
I definitely don't think theis is the best practice method though, and I'd still love to see anyone else's suggestions or corrections, thanks.
I'm calling three functions, after the completion of these functions I want my script to close on it's own but it just hangs.
I've tried making the functions async/promise based, closing the database after each 'mongodb' type function, and using process.exit() within a function as a callback to the last called function.
Connecting to the (local - not Atlas) Database:
MongoClient.connect(local, {useNewUrlParser: true, useUnifiedTopology: true}, function(err, db) {
if (err) {
console.log(err)
}
else {
console.log('Connected to MongoDB...')
//Read in data from jsonfiles and store each file's contents into the database : This is where the functions are being called... within a successful connect to the MongoDB
insertJSON(db, jsonfiles, 'requests', jsonfilesSource)
insertJSON(db, issuedfiles, 'issuedLicenses', isssuedfilesSource)
insertLicenses(db)
}
db.close()
})
Function 1:
function insertJSON(db, dirBuf,collection, sourceFolder) {
var database = db.db('license-server')
var collection = database.collection(collection)
fs.readdir(dirBuf, function(err, files) {
if (err) {
console.log(err.message)
}
else {
files.forEach(function(filename) {
var text = fs.readFileSync(sourceFolder + filename);
var filecontents = JSON.parse(text)
//collection.insertOne(filecontents)
collection.findOne({"DisplayTitle" : filecontents.DisplayTitle, "NodeInformation" : filecontents.NodeInformation, "Date": filecontents.Date})
.then(function(result) {
if(result) {
console.log(`An Item could already be in the database: A file is unique if its display title, nodeinformation, and date are different.
the items display title is ${result.DisplayTitle}`)
return
}
else {
collection.insertOne(filecontents)
console.log(`Added ${filecontents.DisplayTitle} to database`)
}
})
.catch(function(error) {
console.log(error)
})
})
}
})
}
Function 2:
function insertLicenses(db) {
// Set up GridFS to import .lic and .licx files into the database
var database = db.db('license-server')
var collection = database.collection('fs.files')
var bucket = new mongodb.GridFSBucket(database);
var dirBuf = Buffer.from('../license-server/private/licenses')
fs.readdir(dirBuf, function(err, files) {
if (err) {
console.log(err.message)
}
else {
files.forEach(function(filename) {
collection.findOne({"filename": filename}).
then(function(result) {
if(result) {
console.log(`The file ${filename} is already in the database`)
return
}
else {
fs.createReadStream('./private/licenses/' + filename).
pipe(bucket.openUploadStream(filename)).
on('error', function(error) {
assert.ifError(error)
}).
on('finish', function() {
console.log(`Uploaded ${filename}`)
})
}
})
})
}
})
// I tried calling db.close() here since this is the last function to be called. No luck.
}
I'm guessing it has something to do with the mongodb functions having their own way to close themselves but I couldn't seem to find what I was looking for in previous attempts to resolve this issue.
The expected result should be the script closing itself, the actual result is a handing script.
All of these database calls are asynchronous -- the result of this code running is to immediately call db.close and then do the work in insertJSON and insertLicenses. If you were to rewrite this to use async/await (and you'd need to update your other functions as well) the db.close call would close the db, and that would allow the script to exit:
await insertJSON(db, jsonfiles, 'requests', jsonfilesSource)
await insertJSON(db, issuedfiles, 'issuedLicenses', isssuedfilesSource)
await insertLicenses(db)
db.close()
https://developer.mozilla.org/en-US/docs/Learn/JavaScript/Asynchronous/Introducing
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function
I want to create a uptime monitor using NodeJS and MongoDB. I want to run a cron job in NodeJS and store the data into MongoDB. If the website response status code is not equal to 200 then it will be saved in the database. I want to make a database entry like this,
url : http://www.google.com
status_code : 500
start_time :- start time
end_time :- end time
I can run the cron job but not sure how to save the downtime in the database. As, I don't want to store every response into the database. Only when response status code is other than 200 , then it will start tracking (start_time) the URL and it keeps the time when website is back to 200 as end_time.
cron.js :-
var async=require('async');
const Entry = require('../models/health.model.js');
var https = require('https');
var request = require('request');
module.exports = function getHttpsRequests () {
Entry.find({},function(err,entrys){
console.log(err);
if(!err && entrys){
async.each(entrys,function(entry,callback){
request(entry.url, function (error, response, body) {
entry.statuscheck=response.statusCode;
entry.save();
callback();
});
},function (error) {
});
}
});
}
health.model.js :-
const mongoose = require('mongoose');
const EntrySchema = mongoose.Schema({
url: String,
statuscheck: String
}, {
timestamps: true
});
module.exports = mongoose.model('Entry', EntrySchema);
I would do something like this to handle updating the database. I went ahead and put standard arrow functions in, because it was easier for me that way. I put some comments in so that should clear most questions up. It may not be the most elegant solution because I wrote it in 5 minutes, but if you follow this general logic flow, you should be much closer to your solution (its completely untested mind you.)
var async=require('async');
const Entry = require('../models/health.model.js');
var https = require('https');
var request = require('request');
module.exports = function getHttpsRequests () {
Entry.find({}, (err,entrys) => {
console.log(err);
if (!err && entrys) {
async.each(entrys, (entry,callback) => {
request(entry.url, (error, response, body) => {
//first check if the url has a document in the db.
Entry.find({ url: entry.url }, (err, entry) => {
if(!entry) {
//since the document does not exist, check the statusCode.
if(response.statusCode===200) { //if the statusCode is 200, continue the loop.
callback();
} else { //if the status code is not 200, lets save this to the db.
console.log("Saving object: " + entry)
entry.status_code = response.statusCode;
entry.start_time = new Date();
entry.save();
callback();
}
} else if (entry) {
//since the document exists, lets check the statusCode.
if(response.statusCode===200) { //if the statusCode is 200, update the stop_time.
entry.end_time = new Date();
Entry.findOneAndUpdate({ url: entry.url }, entry, (err, object) => { //this returns the entry after update, so we can put that in the console for easy debug.
if (err) {
console.log(err);
callback();
} else {
console.log("Object saved: " + object);
callback();
}
});
}
} else { //there was an error finding the document in the db, just go to the next one.
callback();
});
});
});
}
});
}
So right now, I'm working on a service to allow multiple events to store data on MongoDB. They store event data by creating new collections on MongoDB every time a new event comes on. If the same event needs to store a different set of data, a new document in MongoDB should be created.
The code below is the service I created to handle this.
import WhiteBoardEvent from '../model/event.model';
import IEventStore from '../interface/eventStore.interface';
import * as MongoClient from 'mongodb';
export class EventStore implements IEventStore {
private mongoDBEndpoint = "mongodb://192.168.10.10:27017";
public insert(event: WhiteBoardEvent, callback: (err: any) => void): void {
MongoClient.connect(this.mongoDBEndpoint, { connectTimeoutMS: 1000 }, (connErr, db) => {
if (connErr) { db.close(); callback(connErr); return; }
this.getNextSequence(db, event, (err, sequence) => {
if (err) { db.close(); callback(err); return; }
event.sequence = sequence;
db.collection(event.roomId).insert(event, (err) => {
db.close();
callback(err);
});
});
});
}
private createCounterCollection(db: MongoClient.Db, event: WhiteBoardEvent, callback: (err: any) => void): void {
db.collection("counters").insert({
roomId: event.roomId,
sequence: 0
}, callback);
}
private getNextSequence(db: MongoClient.Db, event: WhiteBoardEvent, callback: (err: any, sequence: number) => void): void {
var collection = db.collection("counters");
collection.findOneAndUpdate(
{ roomID: event.roomId },
{
$inc: { sequence: 1 },
// new: true
},
{
upsert: true,
returnOriginal: false
},
(err, r) => {
if (err) {
this.createCounterCollection(db, event, (err) => {
if (err) { callback(err, -1); return; }
callback(null, 0);
});
return;
}
callback(null, r.value.sequence);
console.log("counter : " + r.value.sequence);
}
);
}
}
The following code is a test file I created so that I can see the changes in MongoDB.
import * as timers from 'timers';
import WhiteBoardEvent from './data/model/event.model';
import { EventStore } from './data/service/eventStore.service';
var model = new WhiteBoardEvent();
model.name = "t2";
model.roomId = "testRoom";
model.timestamp = new Date();
model.userId = "";
var model2 = new WhiteBoardEvent();
model2.name = "t1";
model2.roomId = "testRoom2";
model2.timestamp = new Date();
model2.userId = "";
var eventStore = new EventStore();
var timer1 = timers.setInterval(()=>{
eventStore.insert(model, (err)=>{
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
}, 1000);
var timer2 = timers.setInterval(()=>{
eventStore.insert(model2, (err)=>{
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
}, 1000);
This is a snippet of the output I get. Here, "Test Completed" is shown for the first instances, after that, I'm getting the duplicate errors.
counter : 1
counter : 1
Test Completed!
Test Completed!
counter : 2
{ MongoError: E11000 duplicate key error collection: admin.testRoom index:
_id_ dup key: { : ObjectId('59d5da14cedd6f28a5db8c93') }
Can anyone help me with this? Thank you in advance!
You are creating two instances of WhiteBoardEvent without explicitly setting an ID (this is fine, but relevant). Have a look at this excerpt from your code above:
db.collection(event.roomId).insert(event, (err) => {
db.close();
callback(err);
});
After handing event over to MongoDB's insert, it is checked to see if it has an ID - it does not. Because of this, the MongoDB code generates an ID for you (see here). This is all great - it's what you want.
However, what happens the next time your setInterval callback is invoked? Well, model and model2 now have an ID set - it was set according to the rules I just described. In this case, now that there's an ID set on the model going into insert, you are trying to reuse the same ID as the MongoDB code leaves it alone.
In your test code, you could simply clear out the ID in your eventStore.insert callback to ensure that a new ID is generated every time. e.g.:
eventStore.insert(model, (err)=>{
model._id = null;
if(err){
console.log(err);
}else{
console.log("Test Completed!");
}
});
It is likely that in your scheme you have you have a key set on unique: true.
Adding another object with the same key or a key not filled in will result in a duplicate key error. Because, if a field is not filled in it will be filled in with null. So 2 times null is a duplicate key error. To make sure this will not happen.
Use sparse: true instead of unique: true. Also note that a field with unique: true is never able to have two of the same keys. Sparse is only able to have multiple nulls(undefined) inside and works the same as unique: true further.
In your case you have to times the userid on "", this will probably cause the error if its set on unique.model.userId = "";
Hope this will solve your answer. Else please show us your model.
Sven
I have about 30,000 documents in a MongoDB collection. And have been stuck in developing a node.js script to retrieve only the records with a specific string key-value pair.
this query on MongoDB server returns me the exact results I've been looking for:
db.getCollection('posts').find({authorName: "Ashwin-kumar"})
Returns me about 33 documents instantly. Likewise I've about 40 authors with different names.
Here's my node.js script to retrieve posts by authorName (Yes, it is based on Name, a string, as there is no ID for these authors :( ):
var fs = require('fs'),
request = require('request'),
async = require("async"),
assert = require('assert');
_ = require('lodash'),
MongoClient = require('mongodb').MongoClient;
var db, postsCollection, postCol;
async.series([dbConnect, checkCollection, createMeta, dbClose], function(){
console.log("Executed all calls in series.");
process.exit(0);
});
function dbConnect(callback){
MongoClient.connect("mongodb://localhost:27017/jPosts", function(pErr, pDb) {
if(pErr) {
console.dir(pDb);
return 0;
}
db = pDb;
callback();
});
}
function dbClose(callback){
db.close(true, function (err) {
if (err) console.error(err);
else console.log("close complete");
callback();
});
}
function checkCollection(callback) {
db.collection('posts', function(err, collection) {});
postsCollection = db.collection('posts');
postCol = db.collection('posts');
callback();
}
function createMeta(callback){
var meta = [];
postsCollection.aggregate([
{
$group : {_id : "$authorName"}
}]).toArray(function(err, result) {
assert.equal(err, null);
async.forEachLimit(result, 1, function(pPost, callback) {
getPosts(pPost._id, callback);
}, function(err) {
console.log(err);
callback();
});
});
}
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authorName": pAuthor});
cursor.toArray(function(err,items){
if(err)
callback(err);
else
callback(null, items);
});
}
This does not seem to work for me. cursor.toArray() does nothing but wait forever. Is it because of too many fields in each document?
I tried to get the count of the documents the cursor fetched and it works well.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.count().then(function(items_count) {
console.log(items_count);
callback();
});
}
Also, I tried the cursor's .each method to iterate the documents fetched. But no luck yet.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.each(function(err, doc) {
assert.equal(err, null);
if (doc != null) {
console.dir(doc);
} else {
console.log(err);
}
});
}
Am I missing something here? What else can be done to make this work? Is there any issues with the way I'm using async?
P.S: The idea here is to query the dump and generate the PDF's for authours in the jPost collection.
P.S 2: Here's a sample document
{
"_id" : ObjectId("571d36b55672f713fe346a66"),
"id" : 56517,
"authorName" : "Ashwin-kumar",
"comment_count" : 380,
"tagline" : "... Opinions you don't really need",
"vote_count" : 5152,
"exclusive" : null,
"post": [
],
"post_comments" : [
//comment_count objects
],
"date" : "2016-03-27"
}
(I've omitted post & post_comments parts for brevity.)
try this:
var collection = db.collection("collection_name");
collection.find({authourName: "Ashwin-kumar"}).toArray(function (err,items) {
if (err) {
console.dir(err);
} else {
//do something with items array
console.dir(items);
}
});
Did you check what is the value of pAuthor in getPosts? Because when you do aggregation, you receive a collection of objects with _id field (not authourName), so you should do:
// not sure why you need meta array, at least it's not used in the code you provided
meta.push({
author: pPost._id
});
getPosts(pPost._id, callback);