I have a synchronous process I am migrating from C# to nodejs which checks a directory daily for certain files. If those files exist it adds them to a TAR file and writes that TAR to a different directory. Whilst checking for any relevant files using a forEach loop, I am struggling to get my process to wait for the loop to complete before moving onto the next function, to create the TAR file.
I have tried using the async module as suggested here and promises as suggested here. Without much success.
By making use of the async module I am hoping to halt the execution of commands so that my loop may finish before the fileList array is returned. As it currently stands, I am receiving a TypeError: Cannot read property 'undefined' of undefined.
My question: will async halt execution until my loop completes, if so what am I doing wrong?
Thanks for looking, please see my code below.
var fs = require('fs'), // access the file system.
tar = require('tar'), // archiving tools.
async = require('async'), // async tool to wait for the process's loop to finish.
moment = require('moment'), // date / time tools.
source = process.env.envA, // environment variable defining the source directory.
destination = process.env.envB, // environment variable defining the destination directory.
archiveName = process.env.envArc, // environment variable defining the static part of the TAR file's name.
searchParameter = process.env.env1, // environment variable defining a file search parameter.
date = moment().format('YYYYMMDD'); // Create a date object for file date comparison and the archive file name.
// Change working directory the process is running in.
process.chdir(source);
// Read the files within that directory.
fs.readdir(source, function (err, files) {
// If there is an error display that error.
if (err) {
console.log('>>> File System Error: ' + err);
}
// **** LOOP ENTRY POINT ****
// Loop through each file that is found,
// check it matches the search parameter and current date e.g. today's date.
CheckFiles(files, function (fileList) {
// If files are present create a new TAR file...
if (fileList > 0) {
console.log('>>> File detected. Starting archiveFiles process.');
archiveFiles(fileList);
} else { // ...else exit the application.
console.log('>>> No file detected, terminating process.');
//process.exit(0);
}
});
});
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var fileList = []; // Create an empty array to hold relevant file names.
// **** THE LOOP IN QUESTION ****
// Loop through each file in the source directory...
async.series(files.forEach(function (item) {
// ...if the current file's name matches the search parameter...
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
fileList.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
});
}
}), callback(fileList)); // Once all the files have been examined, return the list of relevant files.
// **** END LOOP ****
console.log('>>> CheckFiles process finished.');
};
var archiveFiles = function (fileList) {
console.log('>>> Starting archiveFiles process.');
if (fileList.length > 0) {
// Tar the files in the array to another directory.
tar.c({}, [fileList[0], fileList[1]]).pipe(fs.createWriteStream(destination + archiveName));
// TODO Slack notification.
console.log('>>> TAR file written.');
}
};
Async was unnecessary, the use of Promises as suggested by #I'm Blue Da Ba Dee and fs.statSync as suggested by #Cheloid fulfilled my requirements. For anyone that may benefit from this outcome, please see my code below.
var fs = require('fs'), // access the file system.
tar = require('tar'), // archiving tools.
moment = require('moment'), // date / time tools.
source = process.env.envA, // environment variable defining the source directory.
destination = process.env.envB, // environment variable defining the destination directory.
archiveName = process.env.envArc, // environment variable defining the static part of the TAR file's name.
searchParameter = process.env.env1, // environment variable defining a file search parameter.
date = moment().format('YYYYMMDD'), // create a date object for file date comparison and the archive file name.
fileList = [], // create an empty array to hold relevant file names.
slack = require('./slack.js'); // import Slack notification functionality.
// Change working directory the process is running in.
process.chdir(source);
// Read the files within that directory.
fs.readdir(source, function (err, files) {
// If there is an error display that error.
if (err) console.log('>>> File System Error: ' + err);
// Loop through each file that is found...
checkFilesPromise(files).then(function (response) {
console.log('>>> File(s) detected. Starting archiveFilesPromise.');
// Archive any relevant files.
archiveFilesPromise(fileList).then(function (response) {
console.log('>>> TAR file written.');
// Send a Slack notification when complete.
slack('TAR file written.', 'good', response);
}, function (error) {
console.log('>>> archiveFilesPromise error: ' + error);
slack('archiveFilesPromise error:' + error, 'Warning', error);
});
}, function (error) {
console.log('>>> CheckFilesPromise error ' + error);
slack('CheckFilesPromise error: ' + error, 'Warning', error);
});
});
var checkFilesPromise = function (files) {
return new Promise(function (resolve, reject) {
files.forEach(function (item) {
// ...check it matches the search parameter...
if (item.match(searchParameter)) {
var stats = fs.statSync(item);
var fileDate = moment(stats.mtime).format('YYYYMMDD');
// ...and current date e.g. today's date.
if (fileDate === date) {
// Add file to an array of file names.
console.log('>>> Date match successful, pushing: ' + item);
fileList.push(item);
resolve('Success');
} else {
reject('Failure');
}
}
});
});
};
var archiveFilesPromise = function (list) {
return new Promise(function (resolve, reject) {
if (list.length > 0) {
// Tar the files in the array to another directory.
tar.c({}, [list[0], list[1]]).pipe(fs.createWriteStream(destination + date + archiveName));
resolve('Success');
} else {
reject('Failure');
}
});
};
You could use a normal for loop and at the last iteration call the callback function.
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var fileList = []; // Create an empty array to hold relevant file names.
for (var i = 0, n = files.length; i < n; ++i)
// ...if the current file's name matches the search parameter...
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
fileList.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
});
}
if (i === n + 1) {
callback(fileList);
console.log('>>> CheckFiles process finished.');
}
};
Edit:
Use recursive callbacks, I'm not sure if this code will work but I hope you get the idea.
fs.stats is async and therefore the loop does not wait for it... you can use call backs to "wait" for it.
var CheckFiles = function (files, callback) {
console.log('>>> CheckFiles process starting.');
var arrIndex = 0;
var fileList = [];
recursiveCallback(fileList, callback); //callling our callback
function recursiveCallback(array, callback) { //recursive callback inside our function
var item = files[arrIndex++];
if (item.match(searchParameter)) {
// ...and it's modified property is equal to today...
fs.stat(item, function (err, stats) {
if (err) {
console.log('>>> File Attributes Error: ' + err);
}
var fileDate = moment(stats.mtime).format('YYYYMMDD');
if (fileDate === date) {
// ...add to an array of file names.
array.push(item);
console.log('>>> Date match successful: ' + item);
} else {
console.log('>>> Date match not successful:' + item);
}
if (files.length < arrIndex) //when last item, use the main callback to retrieve the array
callback(array);
else //when not last item , recursion
recursiveCallback(item, array, callback);
});
} else if (files.length < arrIndex) //when last item, use the main callback to retrieve the array
callback(array);
else //when not last item , recursion
recursiveCallback(item, array, callback);
}
}
Related
I'm trying to write a script, when a new url is found it will turn the url to a hash. Check if the file already has been written it just ignores it, and if it's not known earlier it should be added.
needle.get(mainUrl, function(err, res) {
if (err) throw err;
if (res.statusCode == 200 && !err ) {
var $ = cheerio.load(res.body)
var href = $('div div a').each(function(index, element) {
urlList.push($(element).attr("href"))
var url =($(element).attr("href"))
var hash = crypto.createHash('md5').update(url).digest('hex');
fs.writeFile('./directory/otherdirectory' + `${hash}`, url, (err) => {
if (err) throw err;
console.log('Hash created: ' + url + ' saved as ' + hash
});
}
)
}
})
This is what I've done so far, but this only writes new files. it doesn't check if files already has been added and doesn't remove files that's not found anymore.
So what I try to do:
I've written a script that fetches a website for urls.
Hash all the urls.
Make FS check if file already has been written, if it has just ignore it.
If it not is known earlier, add it as a new file.
If url isn't found when fetching anymore, delete it from the list.
I think this might be an X/Y problem and for that I'm still awaiting the answer to my comment.
With that said, you can simply ignore the existing files using fs.existsSync, if that returns true just skip saving the current file, otherwise save it. And to remove files that are not available anymore, just get all the files in the directory using fs.readdir and remove files that you whose urls are not in the response using fs.unlink:
needle.get(mainUrl, (err, res) => {
if (err) throw err;
if (res.statusCode == 200) {
let $ = cheerio.load(res.body);
let hashes = []; // list of hashes for this website (to be used later to keep only the items that are still available)
$('div div a').each((index, element) => {
let url = $(element).attr("href");
let hash = crypto.createHash('md5').update(url).digest('hex');
hashes.push(hash); // store the hash of the current url
if (!fs.existsSync('./directory/otherdirectory/' + hash)) { // if this file doesn't exist (notice the "not operator !" before fs.existsSync)
fs.writeFile('./directory/otherdirectory/' + hash, url, err => { // save it
if (err) throw err;
console.log('Hash created: ' + url + ' saved as ' + hash);
});
}
});
fs.readdir('./directory/otherdirectory', (err, files) => { // get a list of all the files in the directory
if (err) throw err;
files.forEach(file => { // and for each file
if(!hashes.includes(file)) { // if it was not encountered above (meaning that it doesn't exist in the hashes array)
fs.unlink('./directory/otherdirectory/' + file, err => { // remove it
if (err) throw err;
});
}
});
});
});
Another approach:
Since you only seem to want to store the urls, the best way to so would be to use one single file to store them all instead of storing each url in its own file. Something like this is more efficient:
needle.get(mainUrl, (err, res) => {
if (err) throw err;
if (res.statusCode == 200) {
let $ = cheerio.load(res.body);
let urls = $('div div a') // get the 'a' elements
.map((index, element) => $(element).attr("href")) // map each one into its href attribute
.get(); // and get them as an array
fs.writeFile('./directory/list-of-urls', urls.join('\n'), err => { // then save all the urls encountered in the file 'list-of-urls' (each on its own line, hence the join('\n'))
if (err) throw err;
console.log('saved all the urls to the file "list-of-urls"');
});
}
});
That way old urls will be removed automatically as the file gets overwritten each time, and new urls will be added automatically. No need to check whether an url is already encountered or not because it will get re-saved anyway.
And if you want to get the list of urls somewhere else, just read the file and split it by '\n' like so:
fs.readFile('./directory/list-of-urls', 'utf8', (err, data) => {
if (err) throw err;
let urls = data.split('\n');
// use urls here
});
I have some code that takes an uploaded file, executes it and gets the output. It then compares this output against expected output to check if the script did as expected.
I am now trying to improve this functionality so that an uploaded file will be run several times, each time being checked against a different expected output, or "test case". I then want to push "correct" or "incorrect" onto a results array, so that I can go through that array at the end and check whether there are any "incorrect" (whether the file failed any test case).
I have tried just callbacks within each function.
I have tried using await and async on the getArray as seen below
Using both callbacks and async together.
This is the parent function code that calls for the array to be created, and wants to iterate through it after it has been created.
var resultsArr = await getResults(file.name, files.length, markerDir);
//file.name is the name from the uploaded file object
//files.length is the number of subdirectories (number of testcases to run against)
//markerDir is the str path to where these testcases are stored
if (resultsArr){
for(var i=0;i<resultsArr.length;i++) {
if (resultsArr[i] == "incorrect"){
checkForMarkerCb('incorrect'); //Calls back to frontend to
break; //display result on web app
}
else if (i+1 == resultsArr.length) {
checkForMarkerCb('correct');
}
}
}
The following is inside the getResults function that is called above
for(var i=1; i<=fileLength; i++) {
var sampleOut = markerDir + '/test' + i + '/stdout.txt';
//Grab expected stdout.txt
var markerOut = fs.readFileSync(sampleOut, 'utf-8', function(err){
if (err){
throw err;
};
});
//Run the file and grab the output
executeFile(filename, function(fileOut){
//Compare output with sample stdout
if (markerOut == fileOut){
resultsArr.push('correct');
}
else {
resultsArr.push('incorrect');
}
});
}
//If results array has a response for each testcase
if (resultsArr.length == fileLength) {
return resultsArr;
}
Implementation of executeFile() as requested:
function executeFile(filename, execFileCb){
//pathToUpload is a str path to where the upload is stored
const child = execFile('python', [pathToUpload], (err,stdout,stderr) => {
if (err) {
throw err;
}
execFileCb(stdout); //Callback with output of file
});
}
function executeFileAsync(filename) {
return new Promise(function(resolve,reject){
executeFile(filename, function(err, data){
if (err !== null) reject(err);
else resolve(data);
});
});
}
which was called inside getResults() using
var fileOut = await executeFileAsync(filename)
The initial function that calls getResults().
getResults(): which gets the path to each directory and calls pushes the results of comparing outputs onto a results array.
executeFile(): uses 'child_process' to run a file and calls back with the output.
I expect the code to wait for getResults to return with the resultsArr so that the for loop can iterate through and check for any "incorrect". Instead, getResults returns before resultsArr is populated.
Using some logging, I see that the code for checking markerOut == fileOut is executed at the end after the getResults() for loop has already completed. I tried setting up the call to executeFile() to also be an async/await similar to how getResults() is called but still no change.
I may not be using async/callbacks correctly, any help is greatly appreciated.
Your executeFileAsync function currently calls executeFile with a callback that is expecting two arguments, but executeFile then does call this execFileCb always with only one argument which is interpreted as an error. It also should not use throw in an asynchronous callback.
Instead, merge them into one function:
function executeFile(filename) {
return new Promise(function(resolve,reject){
//pathToUpload is a str path to where the upload is stored
const child = execFile('python', [pathToUpload], (err,stdout,stderr) => {
if (err) reject(err);
else resolve(stdout); //Callback with output of file
});
});
}
I'm new to promises and I'm sure there's an answer/pattern out there but I just couldn't find one that was obvious enough to me to be the right one. I'm using node.js v4.2.4 and https://www.promisejs.org/
This should be pretty easy I think...I need to do multiple blocks of async in a specific order, and one of the middle blocks will be looping through an array of HTTP GETs.
//New Promise = asyncblock1 - FTP List, resolve the returned list array
//.then(asynchblock2(list)) - loop through list array and HTTP GET needed files
//.then(asynchblock3(list)) - update local log
I tried creating a new Promise, resolving it, passing the list to the .then, doing the GET loop, then the file update. I tried using a nested promise.all inside asynchblock2, but it's actually going in reverse order, 3, 2, and 1 due to the timing of those events. Thanks for any help.
EDIT: Ok, this is the pattern that I'm using which works, I just need a GET loop in the middle one now.
var p = new Promise((resolve, reject) => {
setTimeout(() => {
console.log('2 sec');
resolve(1);
},
2000);
}).then(() => {
return new Promise((resolve) => {
setTimeout(() => {
console.log('1.5 sec');
// instead of this section, here I'd like to do something like:
// for(var i = 0; i < dynamicarray.length; i++){
// globalvar[i] = ftpclient.getfile(dynamicarray[i])
// }
// after this loop is done, resolve
resolve(1);
},
1500);
});
}).then(() => {
return new Promise((resolve) => {
setTimeout(() => {
console.log('1 sec');
resolve(1);
},
1000);
});
});
EDIT Here is the almost working code!
var pORecAlert = (function(){
var pa;
var newans = [];
var anstodownload = [];
var anfound = false;//anfound in log file
var nexttab;
var lastchar;
var po;
var fnar = [];
var antext = '';
//-->> This section works fine; it's just creating a JSON object from a local file
try{
console.log('trying');
porfile = fs.readFileSync('an_record_files.json', 'utf8');
if(porfile == null || porfile == ''){
console.log('No data in log file - uploaded_files_data.json being initialized!');
plogObj = [];
}
else{
plogObj = JSON.parse(porfile);
}
}
catch(jpfp){
console.log('Error parsing log file for PO Receiving Alert: ' + jpfp);
return endPORecAlertProgram();
};
if((typeof plogObj) === 'object'){
console.log('an_record_files.json log file found and parsed for PO Receiving Alert!');
}
else{
return mkError(ferror, 'pORecAlert');
};
//finish creating JSON Object
pa = new Client();
pa.connect(ftpoptions);
console.log('FTP Connection for FTP Check Acknowledgement begun...');
pa.on('greeting', function(msg){
console.log('FTP Received Greeting from Server for ftpCheckAcknowledgement: ' + msg);
});
pa.on('ready', function(){
console.log('on ready');
//START PROMISE LIST
var listpromise = new Promise((reslp, rejlp) => {
pa.list('/public_html/test/out', false, (cerr, clist) => {
if(cerr){
return mkError(ferror, 'pORecAlert');
}
else{
console.log('Resolving clist');
reslp(clist);
}
});
});
listpromise.then((reclist) => {
ftpplist:
for(var pcl = 0; pcl < reclist.length; pcl++){
console.log('reclist iteration: ' + pcl);
console.log('checking name: ', reclist[pcl].name);
if(reclist[pcl].name.substring(0, 2) !== 'AN'){
console.log('Not AN - skipping');
continue ftpplist;
}
else{//found an AN
for(var plc = 0; plc < plogObj.length; plc++){
if(reclist[pcl].name === plogObj[plc].anname){
//console.log('Found reclist[pcl].name in local log');
anfound = true;
};
};
if(anfound === false){
console.log('Found AN file to download: ', reclist[pcl].name);
anstodownload.push(reclist[pcl].name);
};
};
};
console.log('anstodownload array:');
console.dir(anstodownload);
return anstodownload;
}).then((fnar) => {
//for simplicity/transparency, here is the array being overwritten
fnar = new Array('AN_17650_37411.699.txt', 'AN_17650_37411.700', 'AN_17650_37411.701', 'AN_17650_37411.702.txt', 'AN_17650_37411.801', 'AN_17650_37411.802.txt');
return Promise.all(fnar.map((gfname) => {
var nsalertnames = [];
console.log('Getting: ', gfname);
debugger;
pa.get(('/public_html/test/out/' + gfname), function(err, anstream){//THE PROBLEM IS THAT THIS GET GETS TRIGGERED AN EXTRA TIME FOR EVERY OTHER FILE!!!
antext = '';
console.log('Get begun for: ', gfname);
debugger;
if(err){
ferror.nsrest_trace = 'Error - could not download new AN file!';
ferror.details = err;
console.log('Error - could not download new AN file!');
console.log('************************* Exiting *************************')
logError(ferror, gfname);
}
else{
// anstream.on('data', (anchunk) => {
// console.log('Receiving data for: ', gfname);
// antext += anchunk;
// });
// anstream.on('end', () => {
// console.log('GET end for: ', gfname);
// //console.log('path to update - gfname ', gfname, '|| end text.');
// fs.appendFileSync(path.resolve('test/from', gfname), antext);
// console.log('Appended file');
// return antext;
// });//end end
};
});//get end
}));//end Promise.all and map
}).then((res99) => {
// pa.end();
// return Promise(() => {
console.log('end all. res99: ', res99);
// //res4(1);
// return 1;
// });
});
});
})();
-->> What happens here:
So I added the almost working code. What is happening is that for every other file, an additional Get request gets made (I don't know how it's being triggered), which fails with an "Unable to make data connection".
So for my iteration over this array of 6, there ends up being 9 Get requests. Element 1 gets requested (works and expected), then 2 (works and expected), then 2 again (fails and unexpected/don't know why it was triggered). Then 3 (works and expected), then 4 (works and expected), then 4 again (fails and unexpected) etc
what you need is Promise.all(), sample code for your app:
...
}).then(() => {
return Promise.all(arry.map(item => ftpclient.getFile(item)))
}).then((resultArray) => {
...
So thanks for the help (and the negative votes with no useful direction!)
I actually reached out to a good nodejs programmer and he said that there seemed to be a bug in the ftp module I was using, and even when trying to use a blackbird .map, the quick succession of requests somehow kicked off an error. I ended up using promise-ftp, blackbird, and promiseTaksQueue - the kicker was that I needed interval. Without it the ftp would end up causing a strange illogical error in the ftp module.
You need the async library. Use the async.eachSeries in situations where you need to use asynchronous operations within a loop, then execute a function when all of those are complete. There are many variations depending on the flow you want but this library does it all.
https://github.com/caolan/async
async.each(theArrayToLoop, function(item, callback) {
// Perform async operation on item here.
doSomethingAsync(item).then(function(){
callback();
})
}, function(err){
//All your async calls are finished continue along here
});
I'm trying to create a synchronous function which creates a random string and checks if there's already a file with that name on Amazon Web Service S3. How could I make the function synchronous as inside of it is the asynchronous web service call to AWS? If the filename already exists, the function should call itself again (recursive) until a available filename is found.
var generateUniqueAWSKey = function(prefix) {
var unique = generateRandomString(); // generates a random string
var name = prefix + unique + '.png';
awss3.headObject({ Bucket: 'pics', Key: name }, function(error, result) {
if (!error) {
unique = generateUniqueAWSKey(prefix);
}
});
return unique;
};
var filename = generateUniqueAWSKey('prefix_');
// more code below using the filename
You need to start thinking in terms of callbacks. I am not sure which condition should call the same function again, but your logic should be there on the callback function of the asynchronous call that you are making. In other words, your code should look like this:
function generateUniqueAWSKey (prefix, callback) {
var unique = generateRandomString(); // generates a random string
var name = prefix + unique + '.png';
awss3.headObject({ Bucket: 'pics', Key: name }, function(error, result) {
if (error) {
// error handling here
return generateUniqueAWSKey(prefix, callback); // say you do recursion here
}
else {
return callback(unique); // no need for recursion; return result
}
});
};
generateUniqueAWSKey('prefix_', function (name) {
var filename = name;
// more code below using the filename
});
This question already has answers here:
What is the purpose of the var keyword and when should I use it (or omit it)?
(19 answers)
JavaScript closure inside loops – simple practical example
(44 answers)
Closed last year.
I've got a problem with this code in node.js. I want to recursively walk through a directory tree and apply the callback action to every file in the tree. This is my code at the moment:
var fs = require("fs");
// General function
var dive = function (dir, action) {
// Assert that it's a function
if (typeof action !== "function")
action = function (error, file) { };
// Read the directory
fs.readdir(dir, function (err, list) {
// Return the error if something went wrong
if (err)
return action(err);
// For every file in the list
list.forEach(function (file) {
// Full path of that file
path = dir + "/" + file;
// Get the file's stats
fs.stat(path, function (err, stat) {
console.log(stat);
// If the file is a directory
if (stat && stat.isDirectory())
// Dive into the directory
dive(path, action);
else
// Call the action
action(null, path);
});
});
});
};
The problem is that in the for each loop stat is called for every file via the variable path. When the callback is called, path already has another value and so it dives into the wrong directories or calls the action for the wrong files.
Probably this problem could easily get solved by using fs.statSync, but this is not the solution I would prefer, since it is blocking the process.
var path = dir + "/" + file;
You forgot to make path a local variable. Now it won't be changed behind your back in the loop.
Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.
Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.
var dir = require('node-dir');
dir.files(__dirname, function(err, files) {
if (err) throw err;
console.log(files);
//we have an array of files now, so now we'll iterate that array
files.forEach(function(filepath) {
actionOnFile(null, filepath);
})
});
Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.
var dir = require('node-dir');
dir.subdirs(__dirname, function(err, subdirs) {
if (err) throw err;
console.log(subdirs);
//we have an array of subdirs now, so now we'll iterate that array
subdirs.forEach(function(filepath) {
actionOnDir(null, filepath);
})
});
Another suitable library is filehound. It supports file filtering (if required), callbacks and promises.
For example:
const Filehound = require('filehound');
function action(file) {
console.log(`process ${file}`)
}
Filehound.create()
.find((err, files) => {
if (err) {
return console.error(`error: ${err}`);
}
files.forEach(action);
});
The library is well documented and provides numerous examples of common use cases.
https://github.com/nspragg/filehound
Disclaimer: I'm the author.
Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:
Better error management support
A global completion callback which is called when the exploration is complete
The code:
/**
* dir: path to the directory to explore
* action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
* done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
*/
var walk = function(dir, action, done) {
// this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
var dead = false;
// this flag will store the number of pending async operations
var pending = 0;
var fail = function(err) {
if(!dead) {
dead = true;
done(err);
}
};
var checkSuccess = function() {
if(!dead && pending == 0) {
done();
}
};
var performAction = function(file, stat) {
if(!dead) {
try {
action(file, stat);
}
catch(error) {
fail(error);
}
}
};
// this function will recursively explore one directory in the context defined by the variables above
var dive = function(dir) {
pending++; // async operation starting after this line
fs.readdir(dir, function(err, list) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else { // iterate over the files
list.forEach(function(file) {
if(!dead) { // if we are already dead, we don't do anything
var path = dir + "/" + file;
pending++; // async operation starting after this line
fs.stat(path, function(err, stat) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else {
if (stat && stat.isDirectory()) {
dive(path); // it's a directory, let's explore recursively
}
else {
performAction(path, stat); // it's not a directory, just perform the action
}
pending--; checkSuccess(); // async operation complete
}
}
});
}
});
pending--; checkSuccess(); // async operation complete
}
}
});
};
// start exploration
dive(dir);
};
Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:
https://github.com/pvorb/node-dive
https://github.com/coolaj86/node-walk
There is an NPM module for this:
npm dree
Example:
const dree = require('dree');
const options = {
depth: 5, // To stop after 5 directory levels
exclude: /dir_to_exclude/, // To exclude some pahts with a regexp
extensions: [ 'txt', 'jpg' ] // To include only some extensions
};
const fileCallback = function (file) {
action(file.path);
};
let tree;
// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);
// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);
// Here tree contains an object representing the whole directory tree (filtered with options)
function loop( ) {
var item = list.shift( );
if ( item ) {
// content of the loop
functionWithCallback( loop );
} else {
// after the loop has ended
whatever( );
}
}