Currently I need to push a large CSV file into a mongo DB and the order of the values needs to determine the key for the DB entry:
Example CSV file:
Code to parse it into arrays:
var fs = require("fs");
var csv = require("fast-csv");
.on("data", function(data){
.on("end", function(data){
console.log("Read Finished");
Code Output:
[ '9',
'0' ]
[ '9',
'0' ]
How do I insert the arrays into my mongoose schema to go into mongo db?
var mongoose = require("mongoose");
var rankSchema = new mongoose.Schema({
serverid: Number,
resetid: Number,
rank: Number,
number: Number,
name: String,
land: Number,
networth: Number,
tag: String,
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
module.exports = mongoose.model("Rank", rankSchema);
The order of the array needs to match the order of the schema for instance in the array the first number 9 needs to always be saved as they key "serverid" and so forth. I'm using Node.JS

You can do it with fast-csv by getting the headers from the schema definition which will return the parsed lines as "objects". You actually have some mismatches, so I've marked them with corrections:
const fs = require('mz/fs');
const csv = require('fast-csv');
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/test';
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
const rankSchema = new Schema({
serverid: Number,
resetid: Number,
rank: Number,
name: String,
land: String, // <-- You have this as Number but it's a string
networth: Number,
tag: String,
stuff: String, // the empty field in the csv
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
const Rank = mongoose.model('Rank', rankSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
await Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()));
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
await new Promise((resolve,reject) => {
let buffer = [],
counter = 0;
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }))
.on("error", reject)
.on("data", async doc => {
try {
if ( counter > 10000 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
} catch(e) {
.on("end", async () => {
try {
if ( counter > 0 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
} catch(e) {
} catch(e) {
} finally {
As long as the schema actually lines up to the provided CSV then it's okay. These are the corrections that I can see but if you need the actual field names aligned differently then you need to adjust. But there was basically a Number in the position where there is a String and essentially an extra field, which I'm presuming is the blank one in the CSV.
The general things are getting the array of field names from the schema and passing that into the options when making the csv parser instance:
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }))
Once you actually do that then you get an "Object" back instead of an array:
"serverid": "9",
"resetid": "1557",
"rank": "358",
"name": "286",
"land": "Mutantville",
"networth": "4368",
"tag": "2358026",
"stuff": "",
"gov": "M",
"gdi": "0",
"protection": "0",
"vacation": "0",
"alive": "1",
"deleted": "0"
Don't worry about the "types" because Mongoose will cast the values according to schema.
The rest happens within the handler for the data event. For maximum efficiency we are using insertMany() to only write to the database once every 10,000 lines. How that actually goes to the server and processes depends on the MongoDB version, but 10,000 should be pretty reasonable based on the average number of fields you would import for a single collection in terms of the "trade-off" for memory usage and writing a reasonable network request. Make the number smaller if necessary.
The important parts are to mark these calls as async functions and await the result of the insertMany() before continuing. Also we need to pause() the stream and resume() on each item otherwise we run the risk of overwriting the buffer of documents to insert before they are actually sent. The pause() and resume() are necessary to put "back-pressure" on the pipe, otherwise items just keep "coming out" and firing the data event.
Naturally the control for the 10,000 entries requires we check that both on each iteration and on stream completion in order to empty the buffer and send any remaining documents to the server.
That's really what you want to do, as you certainly don't want to fire off an async request to the server both on "every" iteration through the data event or essentially without waiting for each request to complete. You'll get away with not checking that for "very small files", but for any real world load you're certain to exceed the call stack due to "in flight" async calls which have not yet completed.
FYI - a package.json used. The mz is optional as it's just a modernized Promise enabled library of standard node "built-in" libraries that I'm simply used to using. The code is of course completely interchangeable with the fs module.
"description": "",
"main": "index.js",
"dependencies": {
"fast-csv": "^2.4.1",
"mongoose": "^5.1.1",
"mz": "^2.7.0"
"keywords": [],
"author": "",
"license": "ISC"
Actually with Node v8.9.x and above then we can even make this much simpler with an implementation of AsyncIterator through the stream-to-iterator module. It's still in Iterator<Promise<T>> mode, but it should do until Node v10.x becomes stable LTS:
const fs = require('mz/fs');
const csv = require('fast-csv');
const streamToIterator = require('stream-to-iterator');
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/test';
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
const rankSchema = new Schema({
serverid: Number,
resetid: Number,
rank: Number,
name: String,
land: String,
networth: Number,
tag: String,
stuff: String, // the empty field
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
const Rank = mongoose.model('Rank', rankSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
await Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()));
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }));
const iterator = await streamToIterator(stream).init();
let buffer = [],
counter = 0;
for ( let docPromise of iterator ) {
let doc = await docPromise;
if ( counter > 10000 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
if ( counter > 0 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
} catch(e) {
} finally {
Basically, all of the stream "event" handling and pausing and resuming gets replaced by a simple for loop:
const iterator = await streamToIterator(stream).init();
for ( let docPromise of iterator ) {
let doc = await docPromise;
// ... The things in the loop
Easy! This gets cleaned up in later node implementation with for..await..of when it becomes more stable. But the above runs fine on the from the specified version and above.

By saying #Neil Lunn need headerline within the CSV itself.
Example using csvtojson module.
const csv = require('csvtojson');
const csvArray = [];
.on('json', (jsonObj) => {
csvArray.push({ name:, id: });
.on('done', (error) => {
if (error) {
return res.status(500).json({ error});
.then((result) => {
return res.status(200).json({result});
}).catch((err) => {
return res.status(500).json({ error});


Firestore startAfter method is not working with a document as a reference

i'm having this problem where I can't make the startAfter work with my data in Firestore.
I'm giving this two examples of the issue the first image is when it works, filtering with a property(createdAt), the second passing the whole doc returns empty and can't make the forEach to loop through the data
Does some know what this happen ? the documents do not have any complex information, name, creation date, all numbers for testing.
If someone had this problem, please help me with an answer, just started learning Firebase a few days ago.
Thanks in advance :)
// getting the data
const response = await db
const dataSend = [];
response.forEach((document) => {
//triggering the next data load
const getMore = async () => {
const limit = 3;
const last = apis[apis.length - 1]; // last document
console.log(last); // {name: "3", description: "3", createdAt: t, url: "3", authorId: 123123, …}
try {
const response = await db
.startAfter(last.createdAt) // passing createdAt to fix the problem
const dataSend = [];
response.forEach((document) => {
//this is not entering here
} catch .....
// getting the data
const response = await db
const dataSend = [];
response.forEach((document) => {
//triggering the next data load
const getMore = async () => {
const limit = 3;
const last = apis[apis.length - 1]; // last document
console.log(last); // {name: "3", description: "3", createdAt: t, url: "3", authorId: 123123, …}
try {
const response = await db
const dataSend = [];
response.forEach((document) => {
//this is not entering here
} catch .....
The solution to this problem was that I was getting the data and not the doc reference.
To fix something like this you'll have to add to the code something like this[ - 1]
// getting the data
const response = await db
const dataSend = [];
const last =[ - 1] // this is the reference to the doc that the documentations says
response.forEach((document) => {
//triggering the next data load
const getMore = async () => {
const limit = 3;
const last =[ - 1] // last document
console.log(last); // {name: "3", description: "3", createdAt: t, url: "3", authorId: 123123, …}
try {
const response = await db
.startAfter(last) //
const dataSend = [];
response.forEach((document) => {
//this is not entering here
} catch .....
So instead of passing the last object from the database you pass the last reference to the doc before it transforms with the data() function that Firebase provides.
ALSO it works better than passing the object.createdAt.
Actually the reason that the first code block is running is because that is the correct usage of startAt().
As you can see on the examples in the Official Documentation, you should use a value in startAt() and never a full document, and that actually makes sense if you consider that you are sorting the data by a specific field and you should also start your results by a specific value on that same field.
So the correct usage is indeed .startAfter(last.createdAt) in your case.

How to handle multiple promises at once

I am creating a program that...
1. Detects all of the drives on any given system.
2. Scans those drives for files of specific file types. For example, it may search all of the drives for any jpeg, png, and svg files.
3. The results are then stored in a JSON file in the following desired format.
"C:": {
"jpeg": [
"path": "C:\\Users\\John\\Pictures\\example.jpeg",
"name": "example",
"type": "jpeg",
"size": 86016
"png": [],
"svg": []
The code...
async function scan(path, exts) {
try {
const stats = await fsp.stat(path)
if (stats.isDirectory()) {
childPaths = await fsp.readdir(path),
promises =
childPath => scan(join(path, childPath), exts)
results = await Promise.all(promises)
// Likely needs to change.
return [].concat(...results)
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '')
if (exts.includes(fileExt)){
// Likely needs to change.
return {
"path": path,
"name": basename(path, fileExt).slice(0, -1),
"type": fileExt,
"size": stats.size
return []
catch (error) {
return []
const results = await Promise.all( => scan(drive, exts))
console.log(results) // [ Array(140), Array(0), ... ]
// And I would like to do something like the following...
for (const drive of results) {
root = parse(path).root,
fileExt = extname(path).replace('.', '')
data[root][fileExt] = []
await fsp.writeFile('./data.json', JSON.stringify(config, null, 2))
The global results is of course divided into individual arrays that correspond to each drive. But currently it combines all of the objects into one giant array despite their corresponding file types. There is also currently no way for me to know which array belongs to each drive, especially if the drive's array does not contain any items that I can parse to retrieve the root directory.
I can obviously map or loop thru the global results again, and then sort everything out, as illustrated below, but it would be a lot cleaner to have scan() handle everything from the get go.
// Initiate scan sequence.
async function initiateScan(exts) {
[config, data] = await Promise.all([
results = await Promise.all(
// => scan(drive, exts))
['K:', 'D:'].map(drive => scan(drive, exts))
for (const drive of results) {
let root = false
for (const [i, file] of drive.entries()) {
if (!root) root = parse(file.path).root.slice(0,-1)
if (!data[root][file.type] || !i) data[root][file.type] = []
await fsp.writeFile('./data.json', JSON.stringify(config, null, 2))
Due to my lack of experience with asynchronicity and objects in general, I am not quite sure how to best handle the data in map( ... )/scan. I am really not even sure how to best structure the output of scan() so that the structure of the global results is easily manipulable.
Any help would be greatly appreciated.
Mutating an outer object as asynchronously-derived results arrive is not particularly clean, however it can be done fairly simply and safely as follows:
(async function(exts, results) { // async IIFE wrapper
async function scan(path) { // lightly modified version of scan() from the question.
try {
const stats = await fsp.stat(path);
if (stats.isDirectory()) {
const childPaths = await fsp.readdir(path);
const promises = => scan(join(path, childPath)));
return Promise.all(promises);
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '');
if (results[path] && results[path][fileExt]) {
'path': path,
'name': basename(path, fileExt).slice(0, -1),
'type': fileExt,
'size': stats.size
catch (error) {
// swallow error by not rethrowing
await Promise.all( => {
// Synchronously seed the results object with the required data structure
results[path] = {};
for (fileExt of exts) {
results[path][fileExt] = []; // array will populated with data, or remain empty if no qualifying data is found.
// Asynchronously populate the results[path] object, and return Promise to the .map() callback
return scan(path);
// Here: whatever else you want to do with the results.
})(exts, {}); // pass `exts` and an empty results object to the IIFE function.
The results object is synchronously seeded with empty data structures, which are then populated asynchronously.
Everything is wrapped in an async Immediately Invoked Function Expression (IIFE), thus:
avoiding the global namespace (if not already avoided)
ensuring availabillty of await (if not already available)
making a safe closure for the results object.
This still needs some work, and it is iterating through the generated files collection a second time.
// This should get you an object with one property per drive
const results = Object.fromEntries(
(await Promise.all( drive => [drive, await scan(drive, exts)])
([drive, files]) => [
// we reduce each drive's file array to an object with
// one property per file extension
(acc, file) => {
return acc
Object.fromEntries( => [ext, []]))
nodejs supports Object.fromEntries from version 12.0.0, so if you can guarantee your application will always be run in that version or a later one, Object.fromEntries should be fine here.
You can use the glob npm library to get all of the filenames and then just transform that array to your object like this:
import {basename, extname} from 'path';
import {stat} from 'fs/promises'; // Or whichever library you use to promisify fs
import * as glob from "glob";
function searchForFiles() {
return new Promise((resolve, reject) => glob(
"/**/*.{jpeg,jpg,png,svg}", // The files to search for and where
{ silent: true, strict: false}, // No error when eg. something cannot be accessed
(err, files) => err ? reject() : resolve(files)
async function getFileObject() {
const fileNames = await searchForFiles(); // An array containing all file names (eg. ['D:\\my\path\to\file.jpeg', 'C:\\otherfile.svg'])
// An array containing all objects describing your file
const fileObjects = await Promise.all( filename => ({
path: filename,
name: basename(path, fileExt).slice(0, -1),
type: extname(path).replace('.', ''),
size: stat(path).size,
drive: `${filename.split(':\\')[0]}:`
// Create your actual object
return fileObjects.reduce((result, {path, name, type, size, drive}) => {
if (!result[drive]) { // create eg. { C: {} } if it does not already exist = {};
if (!result[drive][type]) { // create eg. {C: { jpeg: [] }} if it does not already exist
result[drive][type] = [];
// Push the object to the correct array
result[drive][type].push({path, name, type, size});
return result;
}, {});
The function must traverse the file system recursively, looking for files that match your criteria. The recursion can be simplified by the fact that the result doesn't need to retain any hierarchy, so we can just carry a flat array (files) as a parameter.
let exts = [...]
async function scan(path, files) {
const stats = await fsp.stat(path)
if (stats.isDirectory()) {
childPaths = await fsp.readdir(path)
let promises = => {
return scan(join(path, childPath), files)
return Promise.all(promises)
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '')
if (exts.includes(fileExt)) {
path: path,
name: basename(path, fileExt).slice(0, -1),
type: fileExt,
size: stats.size
let files = []
await scan('/', files)

How to test code when dealing with MongooseArray in NodeJS

I am trying to do some integration tests using Supertest. My object schema contains an array:
const schema = new mongoose.Schema({
name: {
type: String,
required: true,
minlength: 3,
maxlength: 50
tags: {
type: Array,
lowercase: true
I am using Mongoose and when running my tests I am always encounter this issue with getting an MongooseArray instead of Array and I am not sure how to deal with this.
- Expected value
+ Received value
- CoreMongooseArray [
+ Array [
What can I do to always get an Array ?
Where do I have to make my changes ? In my code or test ?
Here is an example(test) when I use lean that works:
await exec();
const updatedCategory = await Category.findById(category._id).lean();
And here is one that doesn't work:
const res = await exec();
expect(res.body).toHaveProperty('_id', category._id.toHexString());
expect(res.body).toHaveProperty('tags', category.tags);
In this case I am checking if the property exists and doesn't work.
Here is the exec() function for the test that works (PUT route):
const exec = async () => {
return await request(app)
.put('/api/categories/' + id)
.set('x-auth-token', token)
.send({ name: name, tags: tags });
Here is the exec() function for the test that fails (DELETE route):
const exec = async () => {
return await request(app)
.delete('/api/categories/' + id)
.set('x-auth-token', token)
Here is the result when displaying res.body on the console:
"_id": "5db58e63fa9c143794484eea",
"tags": [
"name": "category1",
"__v": 0

Using Model.create() and save() inside for loop

Hey so I'm pretty new to Javascript and Node but I'm running into an issue that's been bothering me for a while.
I have a User model and an Image model, I'm using Multer to upload an array of images and trying to loop through this array, create a new Image model for each, then unshift that Image into my User's photos. I have Multer set up to successfully fills req.files. Here's the code."/users/:user/photos/upload", middle.isLoggedIn, upload.array("photos", 4), function(req, res) {
User.findById(req.params.user, function(err, foundUser) {
for(var i = 0, len = req.files.length; i < len; i++) {
Image.create(req.files[i], function(err, newImage) {
if(err) {
return console.log(err.message);
newImage.human = foundUser;;
console.log(foundUser); seems to execute and print before console.log(newImage);
User Model
var mongoose = require("mongoose"),
passportLocalMongoose = require("passport-local-mongoose");
var UserSchema = new mongoose.Schema({
username: String,
password: String,
firstName: String,
lastName: String,
city: String,
photos: [
type: mongoose.Schema.Types.ObjectId,
ref: "Image"
module.exports = mongoose.model("User", UserSchema);
Image Model
var mongoose = require("mongoose");
var ImageSchema = new mongoose.Schema({
fieldname: String,
originalname: String,
mimetype: String,
filename: String,
destination: String,
size: Number,
path: String,
human: {
id: {
type: mongoose.Schema.Types.ObjectId,
ref: "Human"
module.exports = mongoose.model("Image", ImageSchema);
This is my first stackoverflow question so let me know if I didn't post enough code.
I think it have something to do with Image.create() being asynchronous, and I'm still trying to learn more about this and promises, but I still don't fully understand how it's relevant in my code.
Use Mongoose's promise support.
Promise.all allows you to resolve an array of promises.
async/await control flow for Promises.
I'm not sure your code as it is structured will work without a serial loop due the async code that can execute in any order. I'm not sure that triggering multiple on the same object at different times will work out very well. Holding database objects in memory for a long time can open up more concurrency data issues too.
The Bluebird promise library includes some additional helpers like Promise.each which will serially complete promises before the next starts which may be of use here.
const Promise = require('bluebird')"/users/:user/photos/upload", middle.isLoggedIn, upload.array("photos", 4), async function(req, res, next) {
try {
let foundUser = await User.findById(req.params.user)
await Promise.each(req.files, async file => {
let newImage = await Image.create(file)
newImage.human = foundUser;
catch (err) {
Other methods like .map and .reduce help make standard array/loop type operations with Promises easier to do.
Atomic Updates
In regards to the concurrency issue, any updates you can do in MongoDB that are "atomic" are a good thing. So instead of selecting something, modifying it in JS, then saving it back, you send the update to Mongo and let the db server deal with it. No matter what order you send the updates to the database, they will always be updating the latest copy of data.
In this case the array unshift can be completed without the initial select by using findByIdAndUpdate and $push (it can be made to push at position 0, there is no $unshift in mongo).
If you add a method to your User model for the adding the photo:
addPhoto(user_id, newImage){
return User.findByIdAndUpdate(
{ $push: { photos: { $each: [newImage], $position: 0 } } } }
{ safe: true, new: true }
So the code would look like
const Promise = require('bluebird')"/users/:user/photos/upload", middle.isLoggedIn, upload.array("photos", 4), async function(req, res, next) {
try {
let foundUser = await User.findById(req.params.user)
if (!foundUser) throw new Error(`No user found: $user`)
let results = await, async file => {
let newImage = await Image.create(file)
newImage.human = foundUser
let user_update = await User.addPhoto(req.params.user, newImage)
catch (err) {

Execute Sequelize queries synchronously

I am building a website using Node.js and Sequelize (with a Postgres backend). I have a query that returns many objects with a foreign key, and I want to pass to the view a list of the objects that the foreign key references.
In the example, Attendances contains Hackathon keys, and I want to return a list of hackathons. Since the code is async, the following thing of course does not work in Node:
where: {
}).then(function (data) {
var hacks = [];
for (var d in data) {
where: {
id: data[d].id
}).then(function (data1) {
res.render('dashboard/index.ejs', {title: 'My Hackathons', user: req.user, hacks: hacks});
Is there any way to do that query in a synchronous way, meaning that I don't return the view untill I have the "hacks" list filled with all the objects?
Use Promise.all to execute all of your queries then call the next function.
where: {
}).then(function (data) {
// get an array of the data keys, (not sure if you need to do this)
// it is unclear whether data is an object of users or an array. I assume
// it's an object as you used a `for in` loop
const keys = Object.keys(data)
// map the data keys to [Promise(query), Promise(query), {...}]
const hacks = => {
return models.Hackathon.findOne({
where: {
id: data[d].id
// user Promise.all to resolve all of the promises asynchronously
// this will be called once all promises have resolved so
// you can modify your data. it will be an array of the returned values
.then((users) => {
const [user1, user2, {...}] = users
res.render('dashboard/index.ejs', {
title: 'My Hackathons',
user: req.user,
hacks: users
The Sequelize library has the include parameter which merges models in one call. Adjust your where statement to bring the Hackathons model into Attendance. If this does not work, take the necessary time to setup Sequelize correctly, their documentation is constantly being improved. In the end, you'll save loads of time by reducing error and making your code readable for other programmers.
Look how much cleaner this can be...
include: [{
model: Hackathon,
as: 'hackathon'
where: {
}).then(function (data) {
// hackathon id
// attendance id
sequelize.sync().then(() => {
// this is where we continue ...
Learn more about Sequelize includes here:
Immediately invoke asynchronous function expression
This is one of the techniques mentioned at: How can I use async/await at the top level? Toplevel await is likely coming soon as of 2021, which will be even better.
Minimal runnable example:
const assert = require('assert');
const { Sequelize, DataTypes } = require('sequelize');
const sequelize = new Sequelize({
dialect: 'sqlite',
storage: 'db.sqlite',
const IntegerNames = sequelize.define(
'IntegerNames', {
value: { type: DataTypes.INTEGER, allowNull: false },
name: { type: DataTypes.STRING, },
}, {});
(async () => {
await IntegerNames.sync({force: true})
await IntegerNames.create({value: 2, name: 'two'});
await IntegerNames.create({value: 3, name: 'three'});
await IntegerNames.create({value: 5, name: 'five'});
// Fill array.
let integerNames = [];
integerNames.push(await IntegerNames.findOne({
where: {value: 2}
integerNames.push(await IntegerNames.findOne({
where: {value: 3}
// Use array.
assert(integerNames[0].name === 'two');
assert(integerNames[1].name === 'three');
await sequelize.close();
Tested on Node v14.16.0, sequelize 6.6.2, seqlite3 5.0.2, Ubuntu 20.10.

