Looping through files in a folder Node.JS - javascript

I am trying to loop through and pick up files in a directory, but I have some trouble implementing it. How to pull in multiple files and then move them to another folder?
var dirname = 'C:/FolderwithFiles';
console.log("Going to get file info!");
fs.stat(dirname, function (err, stats) {
if (err) {
return console.error(err);
}
console.log(stats);
console.log("Got file info successfully!");
// Check file type
console.log("isFile ? " + stats.isFile());
console.log("isDirectory ? " + stats.isDirectory());
});

Older answer with callbacks
You want to use the fs.readdir function to get the directory contents and the fs.rename function to actually do the renaming. Both these functions have synchronous versions if you need to wait for them to finishing before running the code afterwards.
I wrote a quick script that does what you described.
var fs = require('fs');
var path = require('path');
// In newer Node.js versions where process is already global this isn't necessary.
var process = require("process");
var moveFrom = "/home/mike/dev/node/sonar/moveme";
var moveTo = "/home/mike/dev/node/sonar/tome"
// Loop through all the files in the temp directory
fs.readdir(moveFrom, function (err, files) {
if (err) {
console.error("Could not list the directory.", err);
process.exit(1);
}
files.forEach(function (file, index) {
// Make one pass and make the file complete
var fromPath = path.join(moveFrom, file);
var toPath = path.join(moveTo, file);
fs.stat(fromPath, function (error, stat) {
if (error) {
console.error("Error stating file.", error);
return;
}
if (stat.isFile())
console.log("'%s' is a file.", fromPath);
else if (stat.isDirectory())
console.log("'%s' is a directory.", fromPath);
fs.rename(fromPath, toPath, function (error) {
if (error) {
console.error("File moving error.", error);
} else {
console.log("Moved file '%s' to '%s'.", fromPath, toPath);
}
});
});
});
});
Tested on my local machine.
node testme.js
'/home/mike/dev/node/sonar/moveme/hello' is a file.
'/home/mike/dev/node/sonar/moveme/test' is a directory.
'/home/mike/dev/node/sonar/moveme/test2' is a directory.
'/home/mike/dev/node/sonar/moveme/test23' is a directory.
'/home/mike/dev/node/sonar/moveme/test234' is a directory.
Moved file '/home/mike/dev/node/sonar/moveme/hello' to '/home/mike/dev/node/sonar/tome/hello'.
Moved file '/home/mike/dev/node/sonar/moveme/test' to '/home/mike/dev/node/sonar/tome/test'.
Moved file '/home/mike/dev/node/sonar/moveme/test2' to '/home/mike/dev/node/sonar/tome/test2'.
Moved file '/home/mike/dev/node/sonar/moveme/test23' to '/home/mike/dev/node/sonar/tome/test23'.
Moved file '/home/mike/dev/node/sonar/moveme/test234' to '/home/mike/dev/node/sonar/tome/test234'.
Update: fs.promises functions with async/await
Inspired by ma11hew28's answer (shown here), here is the same thing as above but with the async functions in fs.promises. As noted by ma11hew28, this may have memory limitations versus fs.promises.opendir added in v12.12.0.
Quick code below.
//jshint esversion:8
//jshint node:true
const fs = require( 'fs' );
const path = require( 'path' );
const moveFrom = "/tmp/movefrom";
const moveTo = "/tmp/moveto";
// Make an async function that gets executed immediately
(async ()=>{
// Our starting point
try {
// Get the files as an array
const files = await fs.promises.readdir( moveFrom );
// Loop them all with the new for...of
for( const file of files ) {
// Get the full paths
const fromPath = path.join( moveFrom, file );
const toPath = path.join( moveTo, file );
// Stat the file to see if we have a file or dir
const stat = await fs.promises.stat( fromPath );
if( stat.isFile() )
console.log( "'%s' is a file.", fromPath );
else if( stat.isDirectory() )
console.log( "'%s' is a directory.", fromPath );
// Now move async
await fs.promises.rename( fromPath, toPath );
// Log because we're crazy
console.log( "Moved '%s'->'%s'", fromPath, toPath );
} // End for...of
}
catch( e ) {
// Catch anything bad that happens
console.error( "We've thrown! Whoops!", e );
}
})(); // Wrap in parenthesis and call now

fs.readdir(path[, options], callback) (which Mikey A. Leonetti used in his answer) and its variants (fsPromises.readdir(path[, options]) and fs.readdirSync(path[, options])) each reads all of a directory's entries into memory at once. That's good for most cases, but if the directory has very many entries and/or you want to lower your application's memory footprint, you could instead iterate over the directory's entries one at a time.
Asynchronously
Directories are async iterable, so you could do something like this:
const fs = require('fs')
async function ls(path) {
const dir = await fs.promises.opendir(path)
for await (const dirent of dir) {
console.log(dirent.name)
}
}
ls('.').catch(console.error)
Or, you could use dir.read() and/or dir.read(callback) directly.
Synchronously
Directories aren't sync iterable, but you could use dir.readSync() directly. For example:
const fs = require('fs')
const dir = fs.opendirSync('.')
let dirent
while ((dirent = dir.readSync()) !== null) {
console.log(dirent.name)
}
dir.closeSync()
Or, you could make directories sync iterable. For example:
const fs = require('fs')
function makeDirectoriesSyncIterable() {
const p = fs.Dir.prototype
if (p.hasOwnProperty(Symbol.iterator)) { return }
const entriesSync = function* () {
try {
let dirent
while ((dirent = this.readSync()) !== null) { yield dirent }
} finally { this.closeSync() }
}
if (!p.hasOwnProperty(entriesSync)) { p.entriesSync = entriesSync }
Object.defineProperty(p, Symbol.iterator, {
configurable: true,
enumerable: false,
value: entriesSync,
writable: true
})
}
makeDirectoriesSyncIterable()
And then, you could do something like this:
const dir = fs.opendirSync('.')
for (const dirent of dir) {
console.log(dirent.name)
}
Note: "In busy processes, use the asynchronous versions of these calls. The synchronous versions will block the entire process until they complete, halting all connections."
References:
Node.js Documentation: File System: Class fs.Dir
Node.js source code: fs.Dir
GitHub: nodejs/node: Issues: streaming / iterative fs.readdir #583

Read all folders in a directory
const readAllFolder = (dirMain) => {
const readDirMain = fs.readdirSync(dirMain);
console.log(dirMain);
console.log(readDirMain);
readDirMain.forEach((dirNext) => {
console.log(dirNext, fs.lstatSync(dirMain + "/" + dirNext).isDirectory());
if (fs.lstatSync(dirMain + "/" + dirNext).isDirectory()) {
readAllFolder(dirMain + "/" + dirNext);
}
});
};

The answers provided are for a single folder. Here is an asynchronous implementation for multiple folders where all the folders are processed simultaneously but the smaller folders or files gets completed first.
Please comment if you have any feedback
Asynchronously Multiple Folders
const fs = require('fs')
const util = require('util')
const path = require('path')
// Multiple folders list
const in_dir_list = [
'Folder 1 Large',
'Folder 2 Small', // small folder and files will complete first
'Folder 3 Extra Large'
]
// BEST PRACTICES: (1) Faster folder list For loop has to be outside async_capture_callback functions for async to make sense
// (2) Slower Read Write or I/O processes best be contained in an async_capture_callback functions because these processes are slower than for loop events and faster completed items get callback-ed out first
for (i = 0; i < in_dir_list.length; i++) {
var in_dir = in_dir_list[i]
// function is created (see below) so each folder is processed asynchronously for readFile_async that follows
readdir_async_capture(in_dir, function(files_path) {
console.log("Processing folders asynchronously ...")
for (j = 0; j < files_path.length; j++) {
file_path = files_path[j]
file = file_path.substr(file_path.lastIndexOf("/") + 1, file_path.length)
// function is created (see below) so all files are read simultaneously but the smallest file will be completed first and get callback-ed first
readFile_async_capture(file_path, file, function(file_string) {
try {
console.log(file_path)
console.log(file_string)
} catch (error) {
console.log(error)
console.log("System exiting first to catch error if not async will continue...")
process.exit()
}
})
}
})
}
// fs.readdir async_capture function to deal with asynchronous code above
function readdir_async_capture(in_dir, callback) {
fs.readdir(in_dir, function(error, files) {
if (error) { return console.log(error) }
files_path = files.map(function(x) { return path.join(in_dir, x) })
callback(files_path)
})
}
// fs.readFile async_capture function to deal with asynchronous code above
function readFile_async_capture(file_path, file, callback) {
fs.readFile(file_path, function(error, data) {
if (error) { return console.log(error) }
file_string = data.toString()
callback(file_string)
})
}

Related

copy folder in node using cp feature

I am trying to copy a folder and all of it's content using node.js cp feature as follows
fs.cp('D:\\Developer\\insomniac-beta\\template', dir_path, {recursive: true});
however its throwing me this error
node:internal/validators:232
throw new ERR_INVALID_ARG_TYPE(name, 'Function', value);
^
TypeError [ERR_INVALID_ARG_TYPE]: The "cb" argument must be of type function. Received undefined
at makeCallback (node:fs:191:3)
at Object.cp (node:fs:2848:14)
at D:\Developer\igbot\generate_config.js:30:13
at FSReqCallback.oncomplete (node:fs:193:23) {
code: 'ERR_INVALID_ARG_TYPE'
}
how is this possible ? i do not have any calls to cb ?
If you dont want to use asynchronous copy with callback you can use synchronous version.
fs.cpSync(sourceDir, destDir, {recursive: true});
You are missing one argument. As mentioned in the documentation, fs.cp is an asynchronous function that takes in a callback function
the final arguement needs to be a callback function
fs.cp('D:\\Developer\\insomniac-beta\\template', dir_path, (err)=>{
// handle error
})
It seems like you're using the promises API, but you didn't show how you import the module. Here's an example with the current Node LTS (v16.x):
Ref: fsPromises.cp(src, dest[, options])
import {promises as fs} from 'fs';
// ...
await fs.cp(sourceDir, destDir, {recursive: true});
Here's a full, self-contained example which creates a sample dir structure, copies it, verifies the copy, and cleans up the sample data:
example.mjs:
import * as path from 'path';
import {constants as fsConstants, promises as fs} from 'fs';
import {fileURLToPath} from 'url';
import {ok as assert} from 'assert/strict';
// Create sample folder structure, return relative file paths
async function createSampleFiles (rootDir) {
const writeFileOpts = {encoding: 'utf8'};
const filePaths = [];
await fs.mkdir(rootDir, {recursive: true});
let fPath = 'hello.txt';
filePaths.push(fPath);
fPath = path.join(rootDir, fPath);
let text = 'hello world\n';
await fs.writeFile(fPath, text, writeFileOpts);
let dir = 'more';
await fs.mkdir(path.join(rootDir, dir), {recursive: true});
fPath = path.join(dir, 'wow.txt');
filePaths.push(fPath);
fPath = path.join(rootDir, fPath);
text = 'wow\n';
await fs.writeFile(fPath, text, writeFileOpts);
return filePaths;
}
async function fsEntryExists (filePath) {
try {
await fs.access(filePath, fsConstants.F_OK);
return true;
}
catch (ex) {
if (ex instanceof Error && ex.code === 'ENOENT') return false;
throw ex;
}
}
async function assertFSEntryExists (filePath) {
assert(await fsEntryExists(filePath), `FS entry not found for "${filePath}"`);
}
async function main () {
const moduleDir = path.dirname(fileURLToPath(import.meta.url));
const sourceDir = path.join(moduleDir, 'data');
const destDir = path.join(moduleDir, 'data-copy');
const relativePaths = await createSampleFiles(sourceDir);
await fs.cp(sourceDir, destDir, {recursive: true});
let exitCode = 0;
try {
const filePaths = relativePaths.map(fPath => path.join(destDir, fPath));
for (const fPath of filePaths) await assertFSEntryExists(fPath);
console.log('Copy successful');
}
catch {
console.error('Copy failed');
exitCode = 1;
}
finally {
// Cleanup
for (const dir of [sourceDir, destDir]) {
if (await fsEntryExists(dir)) await fs.rm(dir, {recursive: true});
}
process.exit(exitCode);
}
}
main();
$ node --version
v16.15.0
$ node example.mjs
Copy successful

Async function immediately returns undefined yet output variable returns a value immediately before the return statement

I am writing a function that downloads and converts a pdf into individual jpg files by page. I am using the imagemagick library to do the conversion. I am having trouble with my processPDF() function as it immediately returns undefined. I put a console.log statement immediately before the function returns and it returns the exact value I expect yet that value doesn't seem to be getting outside of the function for some reason.
import im from 'imagemagick'
import { promises as fs } from 'fs'
import path from 'path'
import _ from 'lodash'
import axios from 'axios'
import { v4 as uuid } from 'uuid'
async function processPDF(pdfPath) {
let basename = path.basename(pdfPath, '.pdf')
let outputPath = "./img/" + basename + ".jpg";
console.log(`Converting ${pdfPath}`)
// Take PDF file and generate individual JPG files
await im.convert(["-density", 300, pdfPath, outputPath],async (err) => {
if (err) {
console.log(err)
throw `Couldn't Process ${pdfPath}`
}
else {
// Get every file in Temporary Image Directory
let files = await fs.readdir(`./img/`)
// Append directory into filenames
files = files.map(file => {
return "./img/" + file
})
// We only want the files that match the source pdf's name
files = files.filter((file) => {
return file.includes(basename)
})
console.log(`Getting ${basename} Buffer Data`)
// For each file, read and return the buffer data along with the path
let images = await Promise.all(files.map(async file => {
const contents = await fs.readFile(file)
return { path: file, buffer: contents }
}))
// Since we read the files asynchonously, Reorder the files
images = _.orderBy(images, (image) => {
let regex = /\d*.jpg/
let res = image.path.match(regex)[0]
res = path.basename(res, '.jpg')
return res
})
let output = { pdf: pdfPath, images }
// Returns a value
console.log(output)
// Returns undefined???
return output
}
})
}
export async function downloadAndProcessPDF(url) {
// Fetch PDF from server
let { data } = await axios.get(url, {
responseType: 'arraybuffer',
headers: {
'Content-Type': 'application/json',
'Accept': 'application/pdf'
}
}).catch(e=>{
console.log(e);
throw `Can't retrieve ${url}`
})
// Generate a Unique ID for the pdf since this is called asynchronously, this will be called many times simultaneously
let id = "./pdf/" + uuid() + ".pdf"
await fs.writeFile(id, data);
// tell processPDF to process the pdf in the ./pdf directory with the given filename
let pdfData = await processPDF(id);
// Returns undefined???
console.log(pdfData)
return pdfData
}
If I had to take a wild guess I'd think that im.convert is the function that is giving me trouble. Throughout my source code i'm using promises to handle asynchronous tasks yet im.convert() uses a callback function. I'm not super familiar with how concurrency works between promises and callback functions so I think that's what's probably the issue.

How do I handle asynchronous calls when running newman (Postman) collections with node?

I could do this with bash but I'm trying to learn node and would like to do it from there. How do I get the newman run call to be synchronous. I don't really understand the use of async/await (if that is what is required here). I have the following script that loops over a bunch of collection files (that each contain multiple requests) and calls newman run on each of them:
// node imports
const fs = require('fs');
const newman = require('newman');
// test variables
const testFolder = './api-tests/';
// read all files in the test folder
fs.readdirSync(testFolder).forEach(file => {
console.log('Running file: ' + file);
// run newman using the file
newman.run({
collection: require(testFolder + file),
delayRequest: 500,
iterationData: [
{
'host': 'localhost',
'port': '8080'
}
],
reporters: ['cli', 'html']
}, (err, summary) => {
if (err) {
throw err;
}
console.log(file + ' run complete');
});
});
Newman executes each file immediately rather than waiting for the loop to go back around to the next file.
Thanks.
you can use deasync https://github.com/abbr/deasync
var done = false;
fs.readdirSync(testFolder).forEach(file => {
newman.run({
...
}).on('start', function (err, args) { // on start of run, log to console
console.log('running a collection...');
}).on('done', function (err, summary) {
...
done = true;
});
require('deasync').loopWhile(function(){return !done;});
done = false;
}

Node.js how to wait on asynchronous call (readdir and stat)

I am working on post method in the server side to retrieve all files inside the requested directory (not recursive), and below is my code.
I am having difficulty sending the response back (res.json(pathContent);) with the updated pathContent without using the setTimeout.
I understand that this is due to the asynchronous behavior of the file system methods used (readdir and stat) and need to use some sort of callback, async, or promise technique.
I tried to use the async.waterfall with the entire body of readdir as one function and the res.json(pathContent) as the other, but it didn't send the updated array to the client side.
I know that there have been thousands of questions regarding this asynchronous operation but could not figure out how to solve my case after reading number of posts.
Any comments would be appreciated. Thanks.
const express = require('express');
const bodyParser = require('body-parser');
const fs = require('fs');
const path = require('path');
var pathName = '';
const pathContent = [];
app.post('/api/files', (req, res) => {
const newPath = req.body.path;
fs.readdir(newPath, (err, files) => {
if (err) {
res.status(422).json({ message: `${err}` });
return;
}
// set the pathName and empty pathContent
pathName = newPath;
pathContent.length = 0;
// iterate each file
const absPath = path.resolve(pathName);
files.forEach(file => {
// get file info and store in pathContent
fs.stat(absPath + '/' + file, (err, stats) => {
if (err) {
console.log(`${err}`);
return;
}
if (stats.isFile()) {
pathContent.push({
path: pathName,
name: file.substring(0, file.lastIndexOf('.')),
type: file.substring(file.lastIndexOf('.') + 1).concat(' File'),
})
} else if (stats.isDirectory()) {
pathContent.push({
path: pathName,
name: file,
type: 'Directory',
});
}
});
});
});
setTimeout(() => { res.json(pathContent); }, 100);
});
The easiest and cleanest way would be use await/async, that way you can make use of promises and the code will almost look like synchronous code.
You therefor need a promisified version of readdir and stat that can be create by the promisify of the utils core lib.
const { promisify } = require('util')
const readdir = promisify(require('fs').readdir)
const stat = promisify(require('fs').stat)
async function getPathContent(newPath) {
// move pathContent otherwise can have conflicts with concurrent requests
const pathContent = [];
let files = await readdir(newPath)
let pathName = newPath;
// pathContent.length = 0; // not needed anymore because pathContent is new for each request
const absPath = path.resolve(pathName);
// iterate each file
// replace forEach with (for ... of) because this makes it easier
// to work with "async"
// otherwise you would need to use files.map and Promise.all
for (let file of files) {
// get file info and store in pathContent
try {
let stats = await stat(absPath + '/' + file)
if (stats.isFile()) {
pathContent.push({
path: pathName,
name: file.substring(0, file.lastIndexOf('.')),
type: file.substring(file.lastIndexOf('.') + 1).concat(' File'),
})
} else if (stats.isDirectory()) {
pathContent.push({
path: pathName,
name: file,
type: 'Directory',
});
}
} catch (err) {
console.log(`${err}`);
}
}
return pathContent;
}
app.post('/api/files', (req, res, next) => {
const newPath = req.body.path;
getPathContent(newPath).then((pathContent) => {
res.json(pathContent);
}, (err) => {
res.status(422).json({
message: `${err}`
});
})
})
And you should not concatenated paths using + (absPath + '/' + file), use path.join(absPath, file) or path.resolve(absPath, file) instead.
And you never should write your code in a way that the code executed for the request, relays on global variables like var pathName = ''; and const pathContent = [];. This might work in your testing environment, but will for sure lead to problems in production. Where two request work on the variable at the "same time"
Based on the initial comment I received and the reference, I used readdirSync and statSync instead and was able to make it work. I will review other answers as well and learn about other ways to implement this.
Thank you all for your kind inputs.
Here is my solution.
const express = require('express');
const bodyParser = require('body-parser');
const fs = require('fs');
const path = require('path');
var pathName = '';
const pathContent = [];
app.post('/api/files', (req, res) => {
const newPath = req.body.path;
// validate path
let files;
try {
files = fs.readdirSync(newPath);
} catch (err) {
res.status(422).json({ message: `${err}` });
return;
}
// set the pathName and empty pathContent
pathName = newPath;
pathContent.length = 0;
// iterate each file
let absPath = path.resolve(pathName);
files.forEach(file => {
// get file info and store in pathContent
let fileStat = fs.statSync(absPath + '/' + file);
if (fileStat.isFile()) {
pathContent.push({
path: pathName,
name: file.substring(0, file.lastIndexOf('.')),
type: file.substring(file.lastIndexOf('.') + 1).concat(' File'),
})
} else if (fileStat.isDirectory()) {
pathContent.push({
path: pathName,
name: file,
type: 'Directory',
});
}
});
res.json(pathContent);
});
There is different way to do it :
You can first promisify the function with using new Promise() then second, use async/await or .then()
You can use the function ProsifyAll() of the Bluebird package (https://www.npmjs.com/package/bluebird)
You can use the synchrone version of the fs functions
https://nodejs.org/api/fs.html#fs_fs_readdirsync_path_options
https://nodejs.org/api/fs.html#fs_fs_statsync_path_options
Here's some options:
Use the synchronous file methods (check the docs, but they usually end with Sync). Slower, but a fairly simple code change, and very easy to understand.
Use promises (or util.promisify) to create a promise for each stat, and Promise.all to wait for all the stats to complete. After that, you can use async functions and await as well for easier to read code and simpler error handling. (Probably the largest code change, but it will make the async code easier to follow)
Keep a counter of the number of stats you have done, and if that number is the size you expect, then call res.json form inside the stat callback (smallest code change, but very error prone)

Javascript - .map running out of memory

My libraries:
const Promise = require('bluebird');
const fs = Promise.promisifyAll(require('graceful-fs'));
const path = require('path');
const xml2js = Promise.promisifyAll(require('xml2js'));
I have a large number of XML files I want to parse. I am able to create an array of paths to all the files using this function:
function getFileNames(rootPath) {
// Read content of path
return fs.readdirAsync(rootPath)
// Return all directories
.then(function(content) {
return content.filter(function(file) {
return fs.statSync(path.join(rootPath, file)).isDirectory();
});
})
// For every directory
.map(function(directory) {
// Save current path
let currentPath = path.join(rootPath, directory);
// Read files in the directory
return fs.readdirAsync(currentPath)
// Filter out the XMLs
.filter(function(file) {
return path.extname(file) === '.XML';
})
// Return path to file
.map(function(file) {
return path.join(rootPath, directory, file);
});
})
// Flatten array of results
.reduce(function(a, b) {
return a.concat(b);
});
}
and now I want to go trough every single file and parse it.
I have 2 function to do so:
function openFile(filePath) {
return fs.readFileAsync('./' + filePath)
.then(function(fileData) {
return fileData;
});
}
function parseFile(data) {
return xml2js.parseStringAsync(data)
.then(function(xmlObject) {
return xmlObject;
});
}
Now when I call this with the .map (the GetFileNames function outputs an array with over 20k strings with file paths) function:
getFileNames('./XML')
.map(function(file) {
openFile(file)
.then(function(data) {
parseFile(data)
.then(function(object) {
console.log(object);
});
});
});
I get a javascript heap out of memory error:
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap
out of memory
But when I run the function a single time by passing in the path to the actual file:
openFile('./XML/2016-10-1/EUROTIPOLD2016-10-1T00-00-22.5756240530.XML')
.then(function(data) {
parseFile(data)
.then(function(object) {
console.log(object);
});
});
I get the desired output.
What am I doing wrong?
Iterating nK files happens asynchronous.
1) You're getting list of files
2) by doing .map You're calling openFile, parseFile that are async functions and it takes time to read and parse.
So because of asynchronousity it proceeds to next file without waiting to finish previous one to call garbage collector to sweep memory and here is insufficient memory problem.
Think about reading 20K files with different sizes at once.
So here is solution:
Use async to synchronize (eachSeries) or control (eachLimit) iteration.
const async = require('async'); // install: npm i --save async
let files = getFileNames('./XML');
// eachLimit(files, 3,
async.eachSeries(files,
(file, next) => {
openFile(file)
.then(
parseFile,
(err) => {
console.error('Cannot open file:', file, err);
next();
})
.then(
object => { // successfully parsed file, so log it out and proceed to next file
console.log(object);
next();
},
(err) => {
console.error('Cannot parse data from file:', file, err);
next();
});
});
p.s. feel free to comment and fix code issue in my answer.
This is a simple case of more resource requirement for your workload. I would look at increasing heap size to meet your demand, rather than changing the source code.
I recommend --max_old_space_size to be setup accordingly to meet the requirement - it may be an iterative process though.
Hope this helps.

Categories

Resources