javascript subfolder read files [duplicate] - javascript

In node.js, can I list files with wild card matching like
fs.readdirSync('C:/tmp/*.csv')?
I did not find the information on wild card matching from the fs documention.

If you don't want to add a new dependency to your project (like glob), you can use plain js/node functions, like:
var files = fs.readdirSync('C:/tmp').filter(fn => fn.endsWith('.csv'));
Regex may help in more complex comparisons

This is not covered by Node core. You can check out this module for what you are after.
Setup
npm i glob
Usage
var glob = require("glob")
// options is optional
glob("**/*.js", options, function (er, files) {
// files is an array of filenames.
// If the `nonull` option is set, and nothing
// was found, then files is ["**/*.js"]
// er is an error object or null.
})

If glob is not quite what you want, or a little confusing, there is also glob-fs. The documentation covers many usage scenarios with examples.
// sync
var files = glob.readdirSync('*.js', {});
// async
glob.readdir('*.js', function(err, files) {
console.log(files);
});
// stream
glob.readdirStream('*.js', {})
.on('data', function(file) {
console.log(file);
});
// promise
glob.readdirPromise('*.js')
.then(function(files) {
console.log(file);
});

Just in case you want to search files by regex (for complex matches), then consider using file-regex, which supports recursive search and concurrency control (for faster results).
Sample usage
import FindFiles from 'file-regex'
// This will find all the files with extension .js
// in the given directory
const result = await FindFiles(__dirname, /\.js$/);
console.log(result)

Pretty straightforward with match out of the box
import fs from 'fs'
fs.readdirSync('C:/tmp/').filter((allFilesPaths:string) =>
allFilesPaths.match(/\.csv$/) !== null)

dont reinvent the wheel, if you are on *nix the ls tool can easily do this (node api docs)
var options = {
cwd: process.cwd(),
}
require('child_process')
.exec('ls -1 *.csv', options, function(err, stdout, stderr){
if(err){ console.log(stderr); throw err };
// remove any trailing newline, otherwise last element will be "":
stdout = stdout.replace(/\n$/, '');
var files = stdout.split('\n');
});

Related

Gulp: call an async function which provides its own callback from within a transform function

I want to create a function for use in a pipe() call in Gulp that'll enable conversion of xlsx files to json.
I had this working with NPM package 'excel-as-json' for gulp 3, however Gulp 4 has forced me to actually understand what it is doing ;-)
Six hours in, and my incapacity of getting this to work due to a lack of js/async/streaming knowledge is punching my curiosity.
Code is as follows:
paths = {excel_sourcefiles: "./sourcefiles/*.xls*", excel_targetdir_local_csvjson: "./targetfiles_local/*.*"}
var replaceExt = require('replace-ext');
var PluginError = require('plugin-error')
var gulpFunction = require('gulp-function').gulpFunction // default ES6 export
var through = require("through2")
var convertExcel = require('excel-as-json').processFile;
var changed = require('gulp-changed');
var assign = Object.assign || require('object.assign');
var notify = require('gulp-notify');
var gulpIgnore = require('gulp-ignore');
var rename = require('gulp-rename');
gulp.task('excel-to-jsoncsv', function() {
return gulp.src(paths.excel_sourcefiles)
// .pipe(debug())
.pipe(gulpIgnore.exclude("*\~*")) // Ignore temporary files by Excel while xlsx is open
.pipe(gulpIgnore.exclude("*\$*")) // Ignore temporary files by Excel while xlsx is open
.pipe(plumber({errorHandler: notify.onError('gulp-excel-to-jsoncsv error: <%= error.message %>')}))
.pipe(changed(paths.excel_targetdir_local_glob, { extension: '.csv' }))
.pipe(GulpConvertExcelToJson()) // This is where the magic should happen
.pipe(rename({ extname: '.csv' })) // Saving as .csv for SharePoint (does not allow .json files to be saved)
.pipe(gulp.dest(paths.excel_targetdir_local))
});
function GulpConvertExcelToJson() {
return through.obj(function(chunk, enc, callback) {
var self = this
if (chunk.isNull() || chunk.isDirectory()) {
callback() // Do not process directories
// return
}
if (chunk.isStream()) {
callback() // Do not process streams
// return
}
if (chunk.isBuffer()) {
convertExcel(chunk.path, null, null, // Converts file found at `chunk.path` and returns (err, `data`) its callback.
function(err, data) {
if (err) {
callback(new PluginError("Excel-as-json", err))
}
chunk.contents = new Buffer(JSON.stringify(data))
self.push(chunk)
callback()
// return
})
} else {
callback()
}
})
}
I (now) know there are other excel > json gulp modules that could allow me to solve this problem without writing my own module, yet I'd like to understand what I should do different here.
The error returned is 'Did you forget to signal Async completion?', which I tried to not do. However, probably my attempt to fix the error 'this.push is not a function' with a var self = this is not what I was supposed to do.
Looking at examples like gulp-zip introduced me to unfamiliar notations, making me unable to autodidact my way out of this.
In summary:
how would I go about calling an async function during a through.obj function call, where the chunk's contents is updated with the (not under my control) async function that only provides me with a callback (err, data)?
This problem is coming deep from the excel library. The end event should be finish.
Unsuccessful Try
I did the following, to install the modified excel.js:
rm -rf node_modules/excel
git clone https://github.com/bdbosman/excel.js node_modules/excel
cd node_modules/excel && npm i && cd ../..
This is not enough since the excel-as-json uses an old version of excel.
Either you have to modify the excel-as-json module, to use excel#1.0.0 (after the Pull Request is merged) or manually edit the file in node_modules. I will describe the second way here.
Temporary Solution
Edit the excel file after installing the modules.
I used:
sed -i "s/'end'/'finish'/g" node_modules/excel/excelParser.js
And then I ran:
$ gulp excel-to-jsoncsv
[19:48:21] Using gulpfile ~/so-question-gulp-async-function-call-xlsx/gulpfile.js
[19:48:21] Starting 'excel-to-jsoncsv'...
[19:48:21] Finished 'excel-to-jsoncsv' after 126 ms
And it apparently worked.

Issue with output list for learnyounode #6 MAKE IT MODULAR

Just started coding last thursday, bear with me here:
my code for this question of the tutorial is returning a list of just the extension names from the directory and not a list of the files with the said extension, e.g. if i used a directory with 3 .js files and used js as my extension argument in the command line, then i would get
1. js
2. js
3. js
as the output, here is the question from the tutorial and my code. THANK YOU!
the question from learnyounode tutorial number 6:
LEARN YOU THE NODE.JS FOR MUCH WIN!
─────────────────────────────────────
MAKE IT MODULAR
Exercise 6 of 13
This problem is the same as the previous but introduces the concept of modules. You will need to create two files to solve this.
Create a program that prints a list of files in a given directory, filtered by the extension of the files. The first argument is the directory name and the second argument is the extension filter. Pr
int the list of files (one file per line) to the console. You must use asynchronous I/O.
You must write a module file to do most of the work. The module must export a single function that takes three arguments: the directory name, the filename extension string and a callback function, in
that order. The filename extension argument must be the same as was passed to your program. i.e. don't turn it into a RegExp or prefix with "." or do anything else but pass it to your module where y
ou can do what you need to make your filter work.
The callback function must be called using the idiomatic node(err, data) convention. This convention stipulates that unless there's an error, the first argument passed to the callback will be null, a
nd the second will be your data. In this case, the data will be your filtered list of files, as an Array. If you receive an error, e.g. from your call to fs.readdir(), the callback must be called wi
th the error, and only the error, as the first argument.
You must not print directly to the console from your module file, only from your original program.
In the case of an error bubbling up to your original program file, simply check for it and print an informative message to the console.
These four things are the contract that your module must follow.
Export a single function that takes exactly the arguments described.
Call the callback exactly once with an error or some data as described.
Don't change anything else, like global variables or stdout.
Handle all the errors that may occur and pass them to the callback.
The benefit of having a contract is that your module can be used by anyone who expects this contract. So your module could be used by anyone else who does learnyounode, or the verifier, and just work. *
and my code is:
module (p6m.js):
var fs=require('fs'), ph=require('path'), exports =module.exports={}
exports.f=function(path,ext,callbk){
fs.readdir(path,function(err,files){
if(err){
return callbk(err,null)
}
files=files.filter(
function(file){
return ph.extname(file)==="."+ext
}
)
return callbk(null,files)}
)}
and my program (p6.js):
var p6m=require('./p6m'), path=process.argv[2], ext=process.argv[3]
p6m.f(path, ext, function(err,files){
if(err){return console.log.error('Error occured:', err)};
files.forEach(function(file){
console.log(file)})})
I got the same problem with my code as of need to use a single function export . So instead of exporting a module function like this :
exports =module.exports={}
exports.f=function(path,ext,callbk){...};
try it doing this way :
module.exports = function (path, ext, callbk) {...};
because its a single function so you don't need to specify that function with a name " f " as if you are doing it in this statement :
exports.f = function(path,ext,callbk){...};
whenever you will import the module,it will automatically call this function only, since the module contains this single function.
You can try this piece of code, it works well for me.
module code: mymodule.js
var fs = require('fs');
var ph= require('path');
module.exports = function (path, ext, callbk) {
var pathio = "." + ext;
fs.readdir(path, function (err, files) {
if (err)
return callbk(err);
else {
var listf = []; //listf is the resultant list
for (var i = 0; i < files.length; i++) {
if (ph.extname(files[i]) === pathio) {
listf.push(files[i]);
}
}
callbk(null, listf);
}
});
}
program code : moduletest.js
var mod = require('./mymodule');
mod(process.argv[2], process.argv[3], function (err, listf) {
if (err) {
console.log('Error!')
} else {
for (var i = 0; i < listf.length; i++) {
console.log(listf[i]);
}
}
});
and do remember, learnyounode series is very specific about its way of coding and syntax, so even if you are doing the logic right way still you won't get pass,you need your code to be the best and optimized. I'll suggest you to refer to discussions on nodeschool itself for various issues you might get in learnyounode series.
That will work and output the right results, but what they are looking for is something like this:
module.exports = function() {};
Because they only want one function total in the exports.
You could also do something like this:
module.exports = FindFilesByExtension;
function FindFilesByExtension(path, ext, callback) {
//your code
}
Here is my solution,
Thsi is my module file filteredls.js
var fs = require('fs');
var path = require('path');
module.exports = function filterFiles(folder, extension, callback) {
fs.readdir(folder, function(err, files) {
if(err) return callback(err);
var filesArray = [];
files.forEach(function(file) {
if(path.extname(file) === "."+extension) {
filesArray.push(file);
}
});
return callback(null, filesArray);
});
}
And here is my test file for reading module modular.js
var ff = require('./filteredls.js');
ff(process.argv[2], process.argv[3], function(err, data) {
if(err)
return console.error(err);
data.forEach(function(file) {
console.log(file);
});
});
And this is my result screenshot,

Delete (unlink) files matching a regex

I want to delete several files from a directory, matching a regex. Something like this:
// WARNING: not real code
require('fs').unlink(/script\.\d+\.js$/);
Since unlink doesn't support regexes, I'm using this instead:
var fs = require('fs');
fs.readdir('.', (error, files) => {
if (error) throw error;
files.filter(name => /script\.\d+\.js$/.test(name)).forEach(fs.unlink);
});
which works, but IMO is a little more complex than it should be.
Is there a better built-in way to delete files that match a regex (or even just use wildcards)?
No there is no globbing in the Node libraries. If you don't want to pull in something from NPM then not to worry, it just takes a line of code. But in my testing the code provided in other answers mostly won't work. So here is my code fragment, tested, working, pure native Node and JS.
let fs = require('fs')
const path = './somedirectory/'
let regex = /[.]txt$/
fs.readdirSync(path)
.filter(f => regex.test(f))
.map(f => fs.unlinkSync(path + f))
You can look into glob https://npmjs.org/package/glob
require("glob").glob("*.txt", function (er, files) { ... });
//or
files = require("glob").globSync("*.txt");
glob internally uses minimatch. It works by converting glob expressions into JavaScript RegExp objects. https://github.com/isaacs/minimatch
You can do whatever you want with the matched files in the callback (or in case of globSync the returned object).
I have a very simple solution to do this. Read the directory in node.js using fs.readdir API. This will give an array of all the files in the directory. Once you have that array, iterate over it using for loop, apply regex.
The below code will delete all files starting with "en" and extension ".js"
fs.readdir('.', (err, files)=>{
for (var i = 0, len = files.length; i < len; i++) {
var match = files[i].match(/en.*.js/);
if(match !== null)
fs.unlink(match[0]);
}
});
The answer could depend on your environment. It looks like you are running on node.js. A quick perusal of the node.js documentation suggests there is no "built in" way to do this, i.e., there isn't a single function call that will do this for you. The next best thing might involve a small number of function calls. As I wrote in my comment, I don't think there's any easy way to make your suggested answer much briefer just relying on the standard node.js function calls. That is, if I were in your shoes, I would go with the solution you already suggested (though slightly cleaned up).
One alternative is to go to the shell, e.g.,
var exec = require('child_process').exec;
exec('ls | grep "script[[:digit:]]\\\+.js" | xargs rm');
Personally, I would strongly prefer your offered solution over this gobbledygook, but maybe you're shooting for something different.

nodejs fs.exists()

I'm trying to call fs.exists in a node script but I get the error:
TypeError: Object # has no method 'exists'
I've tried replacing fs.exists() with require('fs').exists and even require('path').exists (just in case), but neither of these even list the method exists() with my IDE. fs is declared at the top of my script as fs = require('fs'); and I've used it previously to read files.
How can I call exists()?
Your require statement may be incorrect, make sure you have the following
var fs = require("fs");
fs.exists("/path/to/file",function(exists){
// handle result
});
Read the documentation here
http://nodejs.org/api/fs.html#fs_fs_exists_path_callback
You should be using fs.stats or fs.access instead. From the node documentation, exists is deprecated (possibly removed.)
If you are trying to do more than check existence, the documentation says to use fs.open. To example
fs.access('myfile', (err) => {
if (!err) {
console.log('myfile exists');
return;
}
console.log('myfile does not exist');
});
Do NOT use fs.exists please read its API doc for alternative
this is the suggested alternative : go ahead and open file then handle error if any :
var fs = require('fs');
var cb_done_open_file = function(interesting_file, fd) {
console.log("Done opening file : " + interesting_file);
// we know the file exists and is readable
// now do something interesting with given file handle
};
// ------------ open file -------------------- //
// var interesting_file = "/tmp/aaa"; // does not exist
var interesting_file = "/some/cool_file";
var open_flags = "r";
fs.open(interesting_file, open_flags, function(error, fd) {
if (error) {
// either file does not exist or simply is not readable
throw new Error("ERROR - failed to open file : " + interesting_file);
}
cb_done_open_file(interesting_file, fd);
});
As others have pointed out, fs.exists is deprecated, in part because it uses a single (success: boolean) parameter instead of the much more common (error, result) parameters present nearly everywhere else.
However, fs.existsSync is not deprecated (because it doesn't use a callback, it just returns a value), and if the whole rest of your script depends on checking the existence of a single file, it can make things easier than having to deal with callbacks or surrounding the call with try/catch (in the case of accessSync):
const fs = require('fs');
if (fs.existsSync(path)) {
// It exists
} else {
// It doesn't exist
}
Of course, existsSync is synchronous and blocking. While this can sometimes be handy, if you need to do other operations in parallel (such as checking for the existence of multiple files at once), you should use one one of the other callback-based methods.
Modern versions of Node also support promise-based versions of fs methods, which one might prefer over callbacks:
fs.promises.access(path)
.then(() => {
// It exists
})
.catch(() => {
// It doesn't exist
});

node.js execute system command synchronously

I need in node.js function
result = execSync('node -v');
that will synchronously execute the given command line and return all stdout'ed by that command text.
ps. Sync is wrong. I know. Just for personal use.
UPDATE
Now we have mgutz's solution which gives us exit code, but not stdout! Still waiting for a more precise answer.
UPDATE
mgutz updated his answer and the solution is here :)
Also, as dgo.a mentioned, there is stand-alone module exec-sync
UPDATE 2014-07-30
ShellJS lib arrived. Consider this is the best choice for now.
UPDATE 2015-02-10
AT LAST! NodeJS 0.12 supports execSync natively.
See official docs
Node.js (since version 0.12 - so for a while) supports execSync:
child_process.execSync(command[, options])
You can now directly do this:
const execSync = require('child_process').execSync;
code = execSync('node -v');
and it'll do what you expect. (Defaults to pipe the i/o results to the parent process). Note that you can also spawnSync now.
See execSync library.
It's fairly easy to do with node-ffi. I wouldn't recommend for server processes, but for general development utilities it gets things done. Install the library.
npm install node-ffi
Example script:
var FFI = require("node-ffi");
var libc = new FFI.Library(null, {
"system": ["int32", ["string"]]
});
var run = libc.system;
run("echo $USER");
[EDIT Jun 2012: How to get STDOUT]
var lib = ffi.Library(null, {
// FILE* popen(char* cmd, char* mode);
popen: ['pointer', ['string', 'string']],
// void pclose(FILE* fp);
pclose: ['void', [ 'pointer']],
// char* fgets(char* buff, int buff, in)
fgets: ['string', ['string', 'int','pointer']]
});
function execSync(cmd) {
var
buffer = new Buffer(1024),
result = "",
fp = lib.popen(cmd, 'r');
if (!fp) throw new Error('execSync error: '+cmd);
while(lib.fgets(buffer, 1024, fp)) {
result += buffer.readCString();
};
lib.pclose(fp);
return result;
}
console.log(execSync('echo $HOME'));
Use ShellJS module.
exec function without providing callback.
Example:
var version = exec('node -v').output;
There's an excellent module for flow control in node.js called asyncblock. If wrapping the code in a function is OK for your case, the following sample may be considered:
var asyncblock = require('asyncblock');
var exec = require('child_process').exec;
asyncblock(function (flow) {
exec('node -v', flow.add());
result = flow.wait();
console.log(result); // There'll be trailing \n in the output
// Some other jobs
console.log('More results like if it were sync...');
});
Native Node.js solution is:
const {execSync} = require('child_process');
const result = execSync('node -v'); // 👈 this do the trick
Just be aware that some commands returns Buffer instead of string. And if you need string just add encoding to execSync options:
const result = execSync('git rev-parse HEAD', {encoding: 'utf8'});
... and it is also good to have timeout on sync exec:
const result = execSync('git rev-parse HEAD', {encoding: 'utf8', timeout: 10000});
This is not possible in Node.js, both child_process.spawn and child_process.exec were built from the ground up to be async.
For details see: https://github.com/ry/node/blob/master/lib/child_process.js
If you really want to have this blocking, then put everything that needs to happen afterwards in a callback, or build your own queue to handle this in a blocking fashion, I suppose you could use Async.js for this task.
Or, in case you have way too much time to spend, hack around in Node.js it self.
This is the easiest way I found:
exec-Sync:
https://github.com/jeremyfa/node-exec-sync
(Not to be confused with execSync.)
Execute shell command synchronously. Use this for migration scripts, cli programs, but not for regular server code.
Example:
var execSync = require('exec-sync');
var user = execSync('echo $USER');
console.log(user);
Just to add that even though there are few usecases where you should use them, spawnSync / execFileSync / execSync were added to node.js in these commits: https://github.com/joyent/node/compare/d58c206862dc...e8df2676748e
You can achieve this using fibers. For example, using my Common Node library, the code would look like this:
result = require('subprocess').command('node -v');
my way since 5 years is to have 2 lines ;
const { execSync } = require('child_process');
const shell = (cmd) => execSync(cmd, {encoding: 'utf8'});
Then enjoy:
shell('git remote -v')
or
out = shell('ls -l')
.. so on
I get used to implement "synchronous" stuff at the end of the callback function. Not very nice, but it works. If you need to implement a sequence of command line executions you need to wrap exec into some named function and recursively call it.
This pattern seem to be usable for me:
SeqOfExec(someParam);
function SeqOfExec(somepParam) {
// some stuff
// .....
// .....
var execStr = "yourExecString";
child_proc.exec(execStr, function (error, stdout, stderr) {
if (error != null) {
if (stdout) {
throw Error("Smth goes wrong" + error);
} else {
// consider that empty stdout causes
// creation of error object
}
}
// some stuff
// .....
// .....
// you also need some flag which will signal that you
// need to end loop
if (someFlag ) {
// your synch stuff after all execs
// here
// .....
} else {
SeqOfExec(someAnotherParam);
}
});
};
I had a similar problem and I ended up writing a node extension for this. You can check out the git repository. It's open source and free and all that good stuff !
https://github.com/aponxi/npm-execxi
ExecXI is a node extension written in C++ to execute shell commands
one by one, outputting the command's output to the console in
real-time. Optional chained, and unchained ways are present; meaning
that you can choose to stop the script after a command fails
(chained), or you can continue as if nothing has happened !
Usage instructions are in the ReadMe file. Feel free to make pull requests or submit issues!
EDIT: However it doesn't return the stdout yet... Just outputs them in real-time. It does now. Well, I just released it today. Maybe we can build on it.
Anyway, I thought it was worth to mention it.
you can do synchronous shell operations in nodejs like so:
var execSync = function(cmd) {
var exec = require('child_process').exec;
var fs = require('fs');
//for linux use ; instead of &&
//execute your command followed by a simple echo
//to file to indicate process is finished
exec(cmd + " > c:\\stdout.txt && echo done > c:\\sync.txt");
while (true) {
//consider a timeout option to prevent infinite loop
//NOTE: this will max out your cpu too!
try {
var status = fs.readFileSync('c:\\sync.txt', 'utf8');
if (status.trim() == "done") {
var res = fs.readFileSync("c:\\stdout.txt", 'utf8');
fs.unlinkSync("c:\\stdout.txt"); //cleanup temp files
fs.unlinkSync("c:\\sync.txt");
return res;
}
} catch(e) { } //readFileSync will fail until file exists
}
};
//won't return anything, but will take 10 seconds to run
console.log(execSync("sleep 10"));
//assuming there are a lot of files and subdirectories,
//this too may take a while, use your own applicable file path
console.log(execSync("dir /s c:\\usr\\docs\\"));
EDIT - this example is meant for windows environments, adjust for your own linux needs if necessary
I actually had a situation where I needed to run multiple commands one after another from a package.json preinstall script in a way that would work on both Windows and Linux/OSX, so I couldn't rely on a non-core module.
So this is what I came up with:
#cmds.coffee
childproc = require 'child_process'
exports.exec = (cmds) ->
next = ->
if cmds.length > 0
cmd = cmds.shift()
console.log "Running command: #{cmd}"
childproc.exec cmd, (err, stdout, stderr) ->
if err? then console.log err
if stdout? then console.log stdout
if stderr? then console.log stderr
next()
else
console.log "Done executing commands."
console.log "Running the follows commands:"
console.log cmds
next()
You can use it like this:
require('./cmds').exec ['grunt coffee', 'nodeunit test/tls-config.js']
EDIT: as pointed out, this doesn't actually return the output or allow you to use the result of the commands in a Node program. One other idea for that is to use LiveScript backcalls. http://livescript.net/

Categories

Resources