Walking a directory with Node.js [duplicate] - javascript

This question already has answers here:
What is the purpose of the var keyword and when should I use it (or omit it)?
(19 answers)
JavaScript closure inside loops – simple practical example
(44 answers)
Closed last year.
I've got a problem with this code in node.js. I want to recursively walk through a directory tree and apply the callback action to every file in the tree. This is my code at the moment:
var fs = require("fs");
// General function
var dive = function (dir, action) {
// Assert that it's a function
if (typeof action !== "function")
action = function (error, file) { };
// Read the directory
fs.readdir(dir, function (err, list) {
// Return the error if something went wrong
if (err)
return action(err);
// For every file in the list
list.forEach(function (file) {
// Full path of that file
path = dir + "/" + file;
// Get the file's stats
fs.stat(path, function (err, stat) {
console.log(stat);
// If the file is a directory
if (stat && stat.isDirectory())
// Dive into the directory
dive(path, action);
else
// Call the action
action(null, path);
});
});
});
};
The problem is that in the for each loop stat is called for every file via the variable path. When the callback is called, path already has another value and so it dives into the wrong directories or calls the action for the wrong files.
Probably this problem could easily get solved by using fs.statSync, but this is not the solution I would prefer, since it is blocking the process.

var path = dir + "/" + file;
You forgot to make path a local variable. Now it won't be changed behind your back in the loop.

Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.
Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.
var dir = require('node-dir');
dir.files(__dirname, function(err, files) {
if (err) throw err;
console.log(files);
//we have an array of files now, so now we'll iterate that array
files.forEach(function(filepath) {
actionOnFile(null, filepath);
})
});
Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.
var dir = require('node-dir');
dir.subdirs(__dirname, function(err, subdirs) {
if (err) throw err;
console.log(subdirs);
//we have an array of subdirs now, so now we'll iterate that array
subdirs.forEach(function(filepath) {
actionOnDir(null, filepath);
})
});

Another suitable library is filehound. It supports file filtering (if required), callbacks and promises.
For example:
const Filehound = require('filehound');
function action(file) {
console.log(`process ${file}`)
}
Filehound.create()
.find((err, files) => {
if (err) {
return console.error(`error: ${err}`);
}
files.forEach(action);
});
The library is well documented and provides numerous examples of common use cases.
https://github.com/nspragg/filehound
Disclaimer: I'm the author.

Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:
Better error management support
A global completion callback which is called when the exploration is complete
The code:
/**
* dir: path to the directory to explore
* action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
* done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
*/
var walk = function(dir, action, done) {
// this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
var dead = false;
// this flag will store the number of pending async operations
var pending = 0;
var fail = function(err) {
if(!dead) {
dead = true;
done(err);
}
};
var checkSuccess = function() {
if(!dead && pending == 0) {
done();
}
};
var performAction = function(file, stat) {
if(!dead) {
try {
action(file, stat);
}
catch(error) {
fail(error);
}
}
};
// this function will recursively explore one directory in the context defined by the variables above
var dive = function(dir) {
pending++; // async operation starting after this line
fs.readdir(dir, function(err, list) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else { // iterate over the files
list.forEach(function(file) {
if(!dead) { // if we are already dead, we don't do anything
var path = dir + "/" + file;
pending++; // async operation starting after this line
fs.stat(path, function(err, stat) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else {
if (stat && stat.isDirectory()) {
dive(path); // it's a directory, let's explore recursively
}
else {
performAction(path, stat); // it's not a directory, just perform the action
}
pending--; checkSuccess(); // async operation complete
}
}
});
}
});
pending--; checkSuccess(); // async operation complete
}
}
});
};
// start exploration
dive(dir);
};

Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:
https://github.com/pvorb/node-dive
https://github.com/coolaj86/node-walk

There is an NPM module for this:
npm dree
Example:
const dree = require('dree');
const options = {
depth: 5, // To stop after 5 directory levels
exclude: /dir_to_exclude/, // To exclude some pahts with a regexp
extensions: [ 'txt', 'jpg' ] // To include only some extensions
};
const fileCallback = function (file) {
action(file.path);
};
let tree;
// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);
// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);
// Here tree contains an object representing the whole directory tree (filtered with options)

function loop( ) {
var item = list.shift( );
if ( item ) {
// content of the loop
functionWithCallback( loop );
} else {
// after the loop has ended
whatever( );
}
}

Related

nodejs - wait for fs.stat to complete

I have a synchronous process I am migrating from C# to nodejs which checks a directory daily for certain files. If those files exist it adds them to a TAR file and writes that TAR to a different directory. Whilst checking for any relevant files using a forEach loop, I am struggling to get my process to wait for the loop to complete before moving onto the next function, to create the TAR file.
I have tried using the async module as suggested here and promises as suggested here. Without much success.
By making use of the async module I am hoping to halt the execution of commands so that my loop may finish before the fileList array is returned. As it currently stands, I am receiving a TypeError: Cannot read property 'undefined' of undefined.
My question: will async halt execution until my loop completes, if so what am I doing wrong?
Thanks for looking, please see my code below.
var fs = require('fs'), // access the file system.
tar = require('tar'), // archiving tools.
async = require('async'), // async tool to wait for the process's loop to finish.
moment = require('moment'), // date / time tools.
source = process.env.envA, // environment variable defining the source directory.
destination = process.env.envB, // environment variable defining the destination directory.
archiveName = process.env.envArc, // environment variable defining the static part of the TAR file's name.
searchParameter = process.env.env1, // environment variable defining a file search parameter.
date = moment().format('YYYYMMDD'); // Create a date object for file date comparison and the archive file name.
// Change working directory the process is running in.
process.chdir(source);
// Read the files within that directory.
fs.readdir(source, function (err, files) {
// If there is an error display that error.
if (err) {
console.log('>>> File System Error: ' + err);
}
// **** LOOP ENTRY POINT ****
// Loop through each file that is found,
// check it matches the search parameter and current date e.g. today's date.
CheckFiles(files, function (fileList) {
// If files are present create a new TAR file...
if (fileList > 0) {
console.log('>>> File detected. Starting archiveFiles process.');
archiveFiles(fileList);
} else { // ...else exit the application.
console.log('>>> No file detected, terminating process.');
//process.exit(0);
}
});
});
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var fileList = []; // Create an empty array to hold relevant file names.
// **** THE LOOP IN QUESTION ****
// Loop through each file in the source directory...
async.series(files.forEach(function (item) {
// ...if the current file's name matches the search parameter...
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
fileList.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
});
}
}), callback(fileList)); // Once all the files have been examined, return the list of relevant files.
// **** END LOOP ****
console.log('>>> CheckFiles process finished.');
};
var archiveFiles = function (fileList) {
console.log('>>> Starting archiveFiles process.');
if (fileList.length > 0) {
// Tar the files in the array to another directory.
tar.c({}, [fileList[0], fileList[1]]).pipe(fs.createWriteStream(destination + archiveName));
// TODO Slack notification.
console.log('>>> TAR file written.');
}
};
Async was unnecessary, the use of Promises as suggested by #I'm Blue Da Ba Dee and fs.statSync as suggested by #Cheloid fulfilled my requirements. For anyone that may benefit from this outcome, please see my code below.
var fs = require('fs'), // access the file system.
tar = require('tar'), // archiving tools.
moment = require('moment'), // date / time tools.
source = process.env.envA, // environment variable defining the source directory.
destination = process.env.envB, // environment variable defining the destination directory.
archiveName = process.env.envArc, // environment variable defining the static part of the TAR file's name.
searchParameter = process.env.env1, // environment variable defining a file search parameter.
date = moment().format('YYYYMMDD'), // create a date object for file date comparison and the archive file name.
fileList = [], // create an empty array to hold relevant file names.
slack = require('./slack.js'); // import Slack notification functionality.
// Change working directory the process is running in.
process.chdir(source);
// Read the files within that directory.
fs.readdir(source, function (err, files) {
// If there is an error display that error.
if (err) console.log('>>> File System Error: ' + err);
// Loop through each file that is found...
checkFilesPromise(files).then(function (response) {
console.log('>>> File(s) detected. Starting archiveFilesPromise.');
// Archive any relevant files.
archiveFilesPromise(fileList).then(function (response) {
console.log('>>> TAR file written.');
// Send a Slack notification when complete.
slack('TAR file written.', 'good', response);
}, function (error) {
console.log('>>> archiveFilesPromise error: ' + error);
slack('archiveFilesPromise error:' + error, 'Warning', error);
});
}, function (error) {
console.log('>>> CheckFilesPromise error ' + error);
slack('CheckFilesPromise error: ' + error, 'Warning', error);
});
});
var checkFilesPromise = function (files) {
return new Promise(function (resolve, reject) {
files.forEach(function (item) {
// ...check it matches the search parameter...
if (item.match(searchParameter)) {
var stats = fs.statSync(item);
var fileDate = moment(stats.mtime).format('YYYYMMDD');
// ...and current date e.g. today's date.
if (fileDate === date) {
// Add file to an array of file names.
console.log('>>> Date match successful, pushing: ' + item);
fileList.push(item);
resolve('Success');
} else {
reject('Failure');
}
}
});
});
};
var archiveFilesPromise = function (list) {
return new Promise(function (resolve, reject) {
if (list.length > 0) {
// Tar the files in the array to another directory.
tar.c({}, [list[0], list[1]]).pipe(fs.createWriteStream(destination + date + archiveName));
resolve('Success');
} else {
reject('Failure');
}
});
};
You could use a normal for loop and at the last iteration call the callback function.
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var fileList = []; // Create an empty array to hold relevant file names.
for (var i = 0, n = files.length; i < n; ++i)
// ...if the current file's name matches the search parameter...
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
fileList.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
});
}
if (i === n + 1) {
callback(fileList);
console.log('>>> CheckFiles process finished.');
}
};
Edit:
Use recursive callbacks, I'm not sure if this code will work but I hope you get the idea.
fs.stats is async and therefore the loop does not wait for it... you can use call backs to "wait" for it.
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var arrIndex = 0;
var fileList = [];
recursiveCallback(fileList, callback); //callling our callback
function recursiveCallback(array, callback) { //recursive callback inside our function
var item = files[arrIndex++];
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
array.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
if (files.length < arrIndex) //when last item, use the main callback to retrieve the array
callback(array);
else //when not last item , recursion
recursiveCallback(item, array, callback);
});
} else if (files.length < arrIndex) //when last item, use the main callback to retrieve the array
callback(array);
else //when not last item , recursion
recursiveCallback(item, array, callback);
}
}

How to add many records to mongoDB from directory of JSON files?

I have about a million JSON files saved across many sub-directories of the directory "D:/njs/nodetest1/imports/source1/" and I want to import them into the collection "users" in my mongoDB database.
The following code correctly traverses through the file system. As you can see, it reads each item in the directory and if that item is a directory it reads each item in it. For each item that is not a directory it performs a some operations on it before sending a variable holding an to a function.
function traverseFS (path){
var files = fs.readdirSync(path);
for (var i in files){
var currentFile = path + '/' + files[i];
var stats = fs.statSync(currentFile);
if (stats.isFile())
runOnFile(currentFile);
else
traverseFS(currentFile);
}
}
traverseFS("D:/njs/nodetest1/imports/source1/")
Next, I run a few operations on the code (see below). This reads the file, parses it into a JSON object, reads two attributes of that object into variables,creates an object in the variable "entry" and passes the variable to another function.
function runOnFile(currentFile){
var fileText = fs.readFileSync(currentFile,'utf8');
var generatedJSON = JSON.parse(fileText);
var recordID = generatedJSON.recordID;
var recordText = generatedJSON.recordTexts;
var entry = {recordID:recordID, recordText:recordText};
insertRecord(entry);
}
The final function then should be used to insert the data into mongoDB. I think that this is where thing go wrong.
function insertRecord(entry){
var MongoClient = mongodb.MongoClient;
var MongoURL = 'mongodb://localhost:27017/my_database_name';
MongoClient.connect(MongoURL, function (err, db) {
var collection = db.collection('users');
collection.insert([entry], function (err, result) {
db.close();
});
});
}
I expected this to run through the file structure, reading the JSON files into objects and then inserting those objects into my mongoDB. Instead it reads the first file into the database and then stops/hangs.
Notes:
I don't want to use mongoimport because I don't want to insert all the data from these files into my MongoDB database. I however am not tied to any aspect of this approach. If some other solution exists I am open to it.
This connects to the database just fine. For each item in the directory this successfully creates an "entry" object and passes it to the insertRecord function. In other words, the problem must be occuring in the insertRecord section. But it obviously could be caused by something earlier in the process.
If I add error handling, no errors are produced. I have left the error handling out of this post because it clutters the readability of the code snippets.
As per mongodb2.2 (current latest) documentation, insert is deprecated
DEPRECATED
Use insertOne, insertMany or bulkWrite
So the short answer is probably to change collection.insert([entry], ...) to collection.insertOne(entry, ...) and you're done.
Then for the long answer, you say "about a million of json files", which typically deserves a full async approach with the least amount of overhead.
There are two (potential) bottlenecks in the sample code:
fs.readFileSync, this is a blocking operation
the connecting, inserting a record and closing the database connection
Both are executed "about a million of times". Granted, an import is not usually done over and over again and (hopefully) not on a machine which needs its performance for other important tasks. Still, the sample code can easily be made more robust.
Consider using the glob module to obtain the list of json file.
glob('imports/**/*.json', function(error, files) {...})
This provides you with the full list of files easily in an async fashion.
Then consider connecting to the database just once, insert everything and close once.
Maintaining more or less the same steps you have in the sample, I'd suggest something like:
var glob = require('glob'),
mongodb = require('mongodb'),
fs = require('fs'),
MongoClient = mongodb.MongoClient,
mongoDSN = 'mongodb://localhost:27017/my_database_name',
collection; // moved this to the "global" scope so we can do it only once
function insertRecord(json, done) {
var recordID = json.recordID || null,
recordText = json.recordText || null;
// the question implies some kind of validation/sanitation/preparation..
if (recordID && recordText) {
// NOTE: insert was changed to insertOne
return collection.insertOne({recordID: recordID, recordText: recordText}, done);
}
done('No recordID and/or recordText');
}
function runOnFile(file, done) {
// moved to be async
fs.readFile(file, function(error, data) {
if (error) {
return done(error);
}
var json = JSON.parse(data);
if (!json) {
return done('Unable to parse JSON: ' + file);
}
insertRecord(json, done);
});
}
function processFiles(files, done) {
var next = files.length ? files.shift() : null;
if (next) {
return runOnFile(next, function(error) {
if (error) {
console.error(error);
// you may or may not want to stop here by throwing an Error
}
processFiles(files, done);
});
}
done();
}
MongoClient.connect(mongoDSN, function(error, db) {
if (error) {
throw new Error(error);
}
collection = db.collection('users');
glob('imports/**/*.json', function(error, files) {
if (error) {
throw new Error(error);
}
processFiles(files, function() {
console.log('all done');
db.close();
});
});
});
NOTE: You can collect multiple "entry"-records to leverage the performance gain of multiple inserts using insertMany, though I have the feeling the inserted records are more complicated than described and it might give some memory issues if not handled correctly.
Just structure your data into one big array of objects, then run db.collection.insertMany.
I suggest you doing this using Promises:
const Bluebird = require('bluebird');
const glob = Bluebird.promisify(require('glob'));
const mongodb = require('mongodb');
const fs = Bluebird.promisifyAll(require('fs'));
const Path = require('path');
const MongoClient = mongodb.MongoClient;
const insertMillionsFromPath = Bluebird.coroutine(function *(path, mongoConnString) {
const db = yield MongoClient.connect(mongoConnString);
try {
const collection = db.collection('users');
const files = yield glob(Path.join(path, "*.json"));
yield Bluebird.map(
files,
Bluebird.coroutine(function *(filename) {
console.log("reading", filename);
const fileContent = yield fs.readFileAsync(filename);
const obj = JSON.parse(fileContent);
console.log("inserting", filename);
yield collection.insertOne(obj);
}),
{concurrency: 10} // You can increase concurrency here
);
} finally {
yield db.close();
}
});
insertMillionsFromPath("./myFiles", "mongodb://localhost:27017/database")
.then(()=>console.log("OK"))
.catch((err)=>console.log("ERROR", err));
In order to work, you will need to install the following packages:
npm install --save mongodb bluebird glob
and you will need to use node.js version 6 or greater, otherwise you will need to transpile your javascript (due to function *() generators usage).

Modules with Arguments NodeJS

I have two files, home.js and module.js in the same directory.
What I'm trying to do is, I'm trying to pass the variable named directory as I call the function I exported from module.js.
It gives me this error:
binding.readdir(pathModule._makeLong(path), req);
Type error: path must be a string.
What I'm trying to figure out is, I've passed the directory variable which is process.argv[2] (contains the path) from home.js as I call the function in module.js that requires the same argument (path).
home.js
var fs = require('fs');
var path = require('path');
var module = require('./module.js');
var directory = process.argv[2];
var extensionRequired = process.argv[3];
function printList(err, data) {
if(err) return err;
list.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
module(directory, extensionRequired, printList);
module.js
var fs = require('fs');
var path = require('path');
module.exports = function (directory, extensionRequired, callBack) {
fs.readdir(directory, function(err, list) {
if(err) return err;
callBack(err, list)
});
}
I think you made a mistake, and forgot to rename the list variable:
function printList(err, data) {
if(err) return err;
// Here list => data
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
In your callback-method, named printList, you set the second argument as data. If you want to access the second argument's value again, you have to use data in your code or reassign it to another variable.
Your method may then look like this:
function printList(err, data) {
if (err) return err;
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
Additionally, I see two more problems with your code:
In module.js, you're requiring the parameter extensionRequired. If you look closely, you'll find, that it isn't even used in this method. This isn't really an error, but would in my opinion be seen as inelegant. Rather pass it trough to the printList as an additional argument (more the node-typical way IMHO) or use it as a global-scope variable as you are currently doing anyway.
In your module.exports-anonymous function from module.js, you are using if (err) return err;. I'd highly recommend you to not do such a thing. Because this is an asynchronous method, you can't really return something, as the return-statement might actually be executed after you called this method. Instead, pass your error as the first argument of the callback. If there is no error, pass null instead, so you can easily figure out if something unexpected happened. Always check that!
Your module.js could then look something like this:
var fs = require('fs');
var path = require('path');
module.exports = function (directory, callback) {
fs.readdir(directory, function(err, list) {
if (err)
// Error happened, pass it to the callback
callback(err);
else
// Everything ran smooth, send null as the error (no error)
// and the list as the second argument.
callback(null, list)
});
}
Your home.js should then be changed accordingly:
var fs = require('fs');
var path = require('path');
var module = require('./module.js');
var directory = process.argv[2];
var extensionRequired = process.argv[3];
function printList(err, data) {
if (err) {
console.error("An error occurred:", err);
// Exit with an error-code of one to
// tell failure to the calling process and
// prevent printing the probably 'undefined' data-variable
process.exit(1);
}
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
// extensionRequired removed, as it is not needed
module(directory, printList);

How do I get a value nested inside two assign functions and a forEach loop?

I'm writing a NodeJS module that copies a bunch of folders over from Dropbox, and creates a directory based on the folder structure. The part that is giving me a headache is that I need to get the names of all the folders in the main directory, then the names of all the files within a folder before moving on to the next function.
Here is my process right now:
Get the list of folders in main directory using dropboxClient.readdir()
Iterate through the folders and get the names sub-folders (again with dropboxClient.readdir())
Iterate through these sub-folders and get the names of the files.
If the file is a markdown file, add the name to a list
Return the list of all markdown files in the sub-directories
and some pseudocode:
function getListOfFiles() {
var subfolders = [];
var fileNames = [];
dbClient.readdir('', function(error, folders) {
folders.forEach(function(folder, index) {
subfolders.push(folder);
dbClient.readdir('/'+folder, function(error, subfolders) {
subfolders.forEach(function(subfolder, index) {
dbClient.readdir('/'+folder+'/'+subfolder, function(error, files) {
files.forEach(function(file, index) {
if (isMarkdownFile) {
fileNames.push(file)
}
});
});
});
});
}
});
return fileNames;
}
I've looked into a handful of packages that seem like they are supposed to solve this scenario, as well as JS Generators, but I'm not sure what the simplest solution should be. My code runs fine on Node 0.11.3, so generators are an options, but that's a new concept for me, and I can't seem to find examples that match up to mine.
utilize the async package. Specifically, the each, eachSeries, or eachLimit for the loops, as well as waterfall and series for control flow.
I'd recommend reading up on... each... of the each functions to figure out which is efficient and consistent/reliable for your situation.
function getListOfFiles(callback) {
async.waterfall([
// get a list of the top level folders
function (cb) {
dbClient.readdir('', function (error, topLevelFolders) {
if (error) return cb(error);
cb(null, topLevelFolders); // pass the folders to the next function (this is the "waterfall")
});
},
// get an array of all topLevel/subFolders combos
function (topLevelFolders, cb) {
var everySubFolder = [];
async.each(topLevelFolders, function (folder, subFolderCallback) {
dbClient.readdir(folder, function (error, subFolders) {
if (error) return subFolderCallback(error);
everySubFolder = everySubFolder.concat(subFolders);
});
}, function (error) {
if (error) return cb(error);
cb(null, everySubFolder); // pass all the folder/subfolder combos to the next function
});
},
// get an array of all the files in each folder/subfolder
function (everySubfolder, cb) {
var fileNames = [];
async.each(everySubFolder, function (folder, fileNameCallback) {
dbClient.readdir(folder, function (error, files) {
if (error) return fileNameCallback(error);
fileNames = fileNames.concat(files);
fileNameCallback();
});
}, function (error) {
if (error) return cb(error);
cb(null, fileNames); // pass every file combo to the waterfall callback function
});
}
], function (error, fileNames) {
if (error) return callback(error);
callback(null, fileNames); // all done! Every file combo goes the function's callback!
});
}
When you use it, you'll do:
getListOfFiles(function (err, files) {
// Voila! here are all your files
});
DEFINITELY add the .each error handling. If it bumps into an error during the loops, it will continue looping without it. Which, # of files dependent, could be a little while.

Grunt Plugin- async.each with globbing pattern

I'm working on a grunt plugin that was written (by someone else) to receive hard-coded file names (src and dest), but I'm trying to change it to be able to be pointed to a directory with a globbing pattern and specify an output folder for the "dest". But I'm having trouble with the async.each, because my initial implementation has a nested async.each. Specifically, I think I have a problem with when to call the callback(). I'm getting hung up in some loop somewhere.
This does work as written because the files are created correctly both ways of configuring the Gruntfile.js, but the previously-working tests are now broken.
I'm just wondering about how to structure the second nested loop. Perhaps that doesn't need to use async?
The Gruntfile.js should be able to be config'd as:
myplugin: {
dev: {
files : {
'src/business.html': 'src/test_src/business.test',
...
}
}
},
or as a globbing pattern (this is what I'm adding)
myplugin: {
dev: {
src: ['src/test_src/*.test'],
dest: 'output'
}
},
The plugin started out with a single async.each, with each loop handling a specific "files" src/dest. But when we're using a globbing pattern, there's only one outer loop, the pattern, so I need a second async.each to handle the actual files (there are ~11).
grunt.registerMultiTask('myplugin', 'Compiles files using myplugin', function () {
done = this.async();
// Iterate over all specified file groups.
async.each(this.files, function (fileGlob, cb) {
var destination = fileGlob.dest;
grunt.log.debug("FileGlob: " + fileGlob);
async.each(fileGlob.src, function (filepath, callback) {
if (notAPartial(filepath) && grunt.file.exists(filepath)) {
if (filepath.match(/\.(test|html)$/)) {
grunt.log.debug('test compilation of ' + filepath);
compileMyFile(filepath, destination);
} else {
// this callback is from the orig version
// i think it's causing problems with the nested async.each calls
callback(new Error("No handler for filetype: " + filepath));
}
}
}, function(err) {
if(err) done(err);
else done();
});
cb();
}, function(err) {
if(err) done(err);
else done();
grunt.log.ok("Compiled " + count + " files.");
});
})
It looks like your callbacks are a little out of place. The signature for async.each is: async.each(arrayOfThings, callbackPerThing, callbackWhenWeGotThroughAllThings).
For nesting async.each statements, I like to name the callbacks based on what they do to avoid confusion when nesting, such as:
var done = this.async();
async.each(this.files, function(fileGlob, nextGlob) {
async.each(fileGlob.src, function(filepath, nextFile) {
doSomethingAsync(function() {
// Continue to the next file
nextFile();
});
}, function() {
// When we are done with all files in this glob
// continue to the next glob
nextGlob();
});
}, function() {
// When we are done with all globs
// call done to tell the Grunt task we are done
done();
});
In your case above, you are right about not needing the inner async.each. Nor do you need the outer async.each as none of the operations appear to be asynchronous. You can more simply do the following:
grunt.registerMultiTask('myplugin', 'Compiles files using myplugin', function () {
this.files.forEach(function(fileGlob) {
var destination = fileGlob.dest;
grunt.log.debug("FileGlob: " + fileGlob);
fileGlob.src.forEach(function(filepath) {
if (notAPartial(filepath) && grunt.file.exists(filepath)) {
if (filepath.match(/\.(test|html)$/)) {
grunt.log.debug('test compilation of ' + filepath);
compileMyFile(filepath, destination);
} else {
grunt.log.error("No handler for filetype: " + filepath);
}
}
});
});
grunt.log.ok("Compiled " + count + " files.");
});

Categories

Resources