How to prevent my command line command from timing out? [duplicate] - javascript

Error "spawnSync /bin/sh ENOBUFS" spawns in my NodeJs application non-systematically while executing the following line :
child_process.execSync(`cd /tmp/myFolder ; tar -xjf myArchive.tar.bz2`);
Archive dimension is 81.5 MB, NodeJs version with NVM : 12.17.0.

The problem is that execSync mode execute the command within a shell with a limited buffer (200 Kb) used to forward the execution output. Moreover, the default shell execution option is "pipe", which means that the output must be forwarded to the parent.
In order to let the shell ignore the execution output, i.e. forward to /dev/null the output, and hence prevent the buffer from filling up, you must use the "ignore" execution option as following :
child_process.execSync(`cd /tmp/myFolder ; tar -xjf myArchive.tar.bz2`, { stdio: 'ignore' });
Read more about exec and spawn execution modes here and here
P.S. Also consider that this error spawns systematically when, during an archive extraction, you run out of disk space.

Use child_process.spawn if the output is important to you.
And even if you don't need it, execution output can be helpful when debugging. You can always add a simple switch which lets you silence the output should you choose.
I try not to use child_process.exec because child_process.spawn works just as fine but without the limitations of exec. Of course, YMMV if your core application logic deals with streaming data. The reason child_process.spawn will work here is because it streams output whereas child_process.exec buffers output and if you fill up the max buffer then child_process.exec will crash. You could increase the buffer size of child_process.exec via the options parameter but remember, the bigger the buffer, the more memory you use, whereas streaming usually keeps the memory usage to a minimum.
Here's a reference implementation. FYI, This code works on Node v14.
const child_process = require("child_process");
function spawn(instruction, spawnOpts = {}, silenceOutput = false) {
return new Promise((resolve, reject) => {
let errorData = "";
const [command, ...args] = instruction.split(/\s+/);
if (process.env.DEBUG_COMMANDS === "true") {
console.log(`Executing \`${instruction}\``);
console.log("Command", command, "Args", args);
}
const spawnedProcess = child_process.spawn(command, args, spawnOpts);
let data = "";
spawnedProcess.on("message", console.log);
spawnedProcess.stdout.on("data", chunk => {
if (!silenceOutput) {
console.log(chunk.toString());
}
data += chunk.toString();
});
spawnedProcess.stderr.on("data", chunk => {
errorData += chunk.toString();
});
spawnedProcess.on("close", function(code) {
if (code > 0) {
return reject(new Error(`${errorData} (Failed Instruction: ${instruction})`));
}
resolve(data);
});
spawnedProcess.on("error", function(err) {
reject(err);
});
});
}
// example usage
async function run() {
await spawn("echo hello");
await spawn("echo hello", {}, true);
await spawn("echo hello", { cwd: "/" });
}
run();

ENOBUFS means the process out size exceed so to fix this override process stdout here an example using nodejs:
let ops={}
ops.args=['logcat','-t',"'m-d h:min:s.000'"]
//set output file
ops.log='tmp/logfile.txt'
largeout=run('adb.exe',ops)
console.log(largeout)
//+++++++++++++++utils+++++++++++++++++++
function run(cmd,ops={args:'',log:''}) {
const{readFileSync,openSync}=require('fs');
cmd=ops.args?cmd+" "+ops.args:cmd;
try{
//override stdio[stdin,stdout,stderr]
if(ops.log) {let log=openSync(ops.log,'w');ops.stdio=[null,log,log]}
let rs= require('child_process').execSync(cmd,ops);
//nb:ops.log:path file used to fix pipe size max 200k ENOBUFS;
if(ops.log) rs=readFileSync(ops.log,'utf8');
return !rs?false:rs.toString();
}catch(e){
console.log('ags.run err:',e.message);
return false;
}
}

Related

Downloading large files using nodejs piped stream causes huge memory usage and OOM Error

I am using node js to download large files(300MB) from a server and pipe the response to a file write stream. As far as I understand pipes in nodejs, the data flow is managed by node and I don't have to consider draining and other events. The issue I face is that the memory usage of the docker where my application is running increases in the same amount as the file being downloaded (i.e It seems the file is being saved in memory). This memory usage persists even when I delete the file in the docker. I am attaching the code used for creating request and piping, below for reference. The code is running fine but causing performance issues like huge memory/CPU usage and crashes with OOM error. I am not able to understand what I am doing wrong.
let req = request({
url: firmwareURL,
maxAttempts: 5,
retryDelay: 5000,
retryStrategy: request.RetryStrategies.HTTPOrNetworkError});
// 1. Perform server request
req.on('response', (res) => {
console.log(methodName, 'Download response statusCode:', res.statusCode);
if (res.statusCode === 200) {
abortOperation = false;
isStarted = "yes";
// 1.1 Create local file stream if the file is found on url and WaterMark paramter, for bigger chunk
// basepath + basefirmware folder + firmware name + file extension
fileStoragePath = `${firmwareDirectory}/${ip}`;
console.log("filestoragepath is",fileStoragePath);
fileName = `${firmwareVersion}.${firmwareURL.split(".").pop()}`;
// temporary store the file
tempFile = `${fileStoragePath}/${fileName}`;
console.log("tempfile is",tempFile);
writestream = fs.createWriteStream(tempFile, {
highWaterMark: Math.pow(2,20 )
}); // for 1mb buffer,can be increased
writestream.on('error', function (err) {
// on error
console.log('Error while creating a file write stream' + err);
abortOperation = true;
isStarted = "no";
_deleteProgressPointer(ip);
});
// 1.2 Get content length of the current response
size = parseInt(res.headers['content-length'], 10);
console.log(methodName, 'File size is:', size);
req.pipe(writestream);
} else {
// 1.3 Ignore next request events on failure
console.log(methodName, 'File not found on server. res.statusCode:', res.statusCode);
abortOperation = true;
isStarted = "no";
_deleteProgressPointer(ip);
}
});
// 3. In case of error ignore next request events
req.on('error', (error) => {
console.log(methodName, 'File not found on server:', error);
abortOperation = true;
isStarted = "no";
_deleteProgressPointer(ip);
});
// 4. After stream is received close the connection
req.on('end', () => {
if (!abortOperation) {
if (null !== writestream) {
writestream.end();
writestream.on('finish', function () {
console.log(methodName, `File successfully downloaded for device ${ip} of firmware version ${firmwareVersion}`);
try {
// file extraction/storage operation
// further check whether the file extension is valid or not
if (ALLOWED_EXTENSION.includes(firmwareURL.split(".").pop())) {
try {
//req.unpipe(writestream);
fileio.removeFile(tempFile); //deleting downloaded file to avoid storage issues
});
console.log("upgrade ended");
return upgradeOp;
} catch (error) {
console.log(`Error while renamining file: ${tempFile}`);
}
} else {
console.log(methodName, ` Not an valid file extension: ${tempFile}`);
fileio.removeFile(tempFile);
console.log(methodName, ` Invalid: ${tempFile} removed`);
}
// delete the progress pointer
_deleteProgressPointer(ip);
} catch (error) {
// delete the progress pointer
_deleteProgressPointer(ip);
console.log(methodName, `Error during read/write operation :${error}`);
}
});
}
The problem is that you are using the requestretry package, which does not really support streaming. It does always call request with a callback and will provide a promise that is resolved with the full response. The request library will read the entire response body when such a callback is provided, which indeed does buffer the complete response in memory. This is not what you want.
I don't see a way to do streaming-only with requestretry, so you should use the request package directly (or, given its deprecation, one of its successor libraries) and handle the retry logic yourself.

spawnSync /bin/sh ENOBUFS

Error "spawnSync /bin/sh ENOBUFS" spawns in my NodeJs application non-systematically while executing the following line :
child_process.execSync(`cd /tmp/myFolder ; tar -xjf myArchive.tar.bz2`);
Archive dimension is 81.5 MB, NodeJs version with NVM : 12.17.0.
The problem is that execSync mode execute the command within a shell with a limited buffer (200 Kb) used to forward the execution output. Moreover, the default shell execution option is "pipe", which means that the output must be forwarded to the parent.
In order to let the shell ignore the execution output, i.e. forward to /dev/null the output, and hence prevent the buffer from filling up, you must use the "ignore" execution option as following :
child_process.execSync(`cd /tmp/myFolder ; tar -xjf myArchive.tar.bz2`, { stdio: 'ignore' });
Read more about exec and spawn execution modes here and here
P.S. Also consider that this error spawns systematically when, during an archive extraction, you run out of disk space.
Use child_process.spawn if the output is important to you.
And even if you don't need it, execution output can be helpful when debugging. You can always add a simple switch which lets you silence the output should you choose.
I try not to use child_process.exec because child_process.spawn works just as fine but without the limitations of exec. Of course, YMMV if your core application logic deals with streaming data. The reason child_process.spawn will work here is because it streams output whereas child_process.exec buffers output and if you fill up the max buffer then child_process.exec will crash. You could increase the buffer size of child_process.exec via the options parameter but remember, the bigger the buffer, the more memory you use, whereas streaming usually keeps the memory usage to a minimum.
Here's a reference implementation. FYI, This code works on Node v14.
const child_process = require("child_process");
function spawn(instruction, spawnOpts = {}, silenceOutput = false) {
return new Promise((resolve, reject) => {
let errorData = "";
const [command, ...args] = instruction.split(/\s+/);
if (process.env.DEBUG_COMMANDS === "true") {
console.log(`Executing \`${instruction}\``);
console.log("Command", command, "Args", args);
}
const spawnedProcess = child_process.spawn(command, args, spawnOpts);
let data = "";
spawnedProcess.on("message", console.log);
spawnedProcess.stdout.on("data", chunk => {
if (!silenceOutput) {
console.log(chunk.toString());
}
data += chunk.toString();
});
spawnedProcess.stderr.on("data", chunk => {
errorData += chunk.toString();
});
spawnedProcess.on("close", function(code) {
if (code > 0) {
return reject(new Error(`${errorData} (Failed Instruction: ${instruction})`));
}
resolve(data);
});
spawnedProcess.on("error", function(err) {
reject(err);
});
});
}
// example usage
async function run() {
await spawn("echo hello");
await spawn("echo hello", {}, true);
await spawn("echo hello", { cwd: "/" });
}
run();
ENOBUFS means the process out size exceed so to fix this override process stdout here an example using nodejs:
let ops={}
ops.args=['logcat','-t',"'m-d h:min:s.000'"]
//set output file
ops.log='tmp/logfile.txt'
largeout=run('adb.exe',ops)
console.log(largeout)
//+++++++++++++++utils+++++++++++++++++++
function run(cmd,ops={args:'',log:''}) {
const{readFileSync,openSync}=require('fs');
cmd=ops.args?cmd+" "+ops.args:cmd;
try{
//override stdio[stdin,stdout,stderr]
if(ops.log) {let log=openSync(ops.log,'w');ops.stdio=[null,log,log]}
let rs= require('child_process').execSync(cmd,ops);
//nb:ops.log:path file used to fix pipe size max 200k ENOBUFS;
if(ops.log) rs=readFileSync(ops.log,'utf8');
return !rs?false:rs.toString();
}catch(e){
console.log('ags.run err:',e.message);
return false;
}
}

How to read large files with fs.read and a buffer in javascript?

I'm just learning javascript, and a common task I perform when picking up a new language is to write a hex-dump program. The requirements are 1. read file supplied on command line, 2. be able to read huge files (reading a buffer-at-a-time), 3. output the hex digits and printable ascii characters.
Try as I might, I can't get the fs.read(...) function to actually execute. Here's the code I've started with:
console.log(process.argv);
if (process.argv.length < 3) {
console.log("usage: node hd <filename>");
process.exit(1);
}
fs.open(process.argv[2], 'r', (err,fd) => {
if (err) {
console.log("Error: ", err);
process.exit(2);
} else {
fs.fstat(fd, (err,stats) => {
if (err) {
process.exit(4);
} else {
var size = stats.size;
console.log("size = " + size);
going = true;
var buffer = new Buffer(8192);
var offset = 0;
//while( going ){
while( going ){
console.log("Reading...");
fs.read(fd, buffer, 0, Math.min(size-offset, 8192), offset, (error_reading_file, bytesRead, buffer) => {
console.log("READ");
if (error_reading_file)
{
console.log(error_reading_file.message);
going = false;
}else{
offset += bytesRead;
for (a=0; a< bytesRead; a++) {
var z = buffer[a];
console.log(z);
}
if (offset >= size) {
going = false;
}
}
});
}
//}
fs.close(fd, (err) => {
if (err) {
console.log("Error closing file!");
process.exit(3);
}
});
}
});
}
});
If I comment-out the while() loop, the read() function executes, but only once of course (which works for files under 8K). Right now, I'm just not seeing the purpose of a read() function that takes a buffer and an offset like this... what's the trick?
Node v8.11.1, OSX 10.13.6
First of all, if this is just a one-off script that you run now and then and this is not code in a server, then there's no need to use the harder asynchronous I/O. You can use synchronous, blocking I/O will calls such as fs.openSync(), fs.statSync(), fs.readSync() etc... and then thinks will work inside your while loop because those calls are blocking (they don't return until the results are done). You can write normal looping and sequential code with them. One should never use synchronous, blocking I/O in a server environment because it ruins the scalability of a server process (it's ability to handle requests from multiple clients), but if this is a one-off local script with only one job to do, then synchronous I/O is perfectly appropriate.
Second, here's why your code doesn't work properly. Javascript in node.js is single-threaded and event-driven. That means that the interpreter pulls an event out of the event queue, runs the code associated with that event and does nothing else until that code returns control back to the interpreter. At that point, it then pulls the next event out of the event queue and runs it.
When you do this:
while(going) {
fs.read(... => (err, data) {
// some logic here that may change the value of the going variable
});
}
You've just created yourself an infinite loop. This is because the while(going) loop just runs forever. It never stops looping and never returns control back to the interpreter so that it can fetch the next event from the event queue. It just keeps looping. But, the completion of the asynchronous, non-blocking fs.read() comes through the event queue. So, you're waiting for the going flag to change, but you never allow the system to process the events that can actually change the going flag. In your actual case, you will probably eventually run out of some sort of resource from calling fs.read() too many times in a tight loop or the interpreter will just hang in an infinite loop.
Understanding how to program a repetitive, looping type of tasks with asynchronous operations involved requires learning some new techniques for programming. Since much I/O in node.js is asynchronous and non-blocking, this is an essential skill to develop for node.js programming.
There are a number of different ways to solve this:
Use fs.createReadStream() and read the file by listening for the data event. This is probably the cleanest scheme. If your objective here is do a hex outputter, you might even want to learn a stream feature called a transform where you transform the binary stream into a hex stream.
Use promise versions of all the relevant fs functions here and use async/await to allow your for loop to wait for an async operation to finish before going to the next iteration. This allows you to write synchronous looking code, but use async I/O.
Write a different type of looping construct (not using a while) loop that manually repeats the loop after fs.read() completes.
Here's a simple example using fs.createReadStream():
const fs = require('fs');
function convertToHex(val) {
let str = val.toString(16);
if (str.length < 2) {
str = "0" + str;
}
return str.toUpperCase();
}
let stream = fs.createReadStream(process.argv[2]);
let outputBuffer = "";
stream.on('data', (data) => {
// you get an unknown length chunk of data from the file here in a Buffer object
for (const val of data) {
outputBuffer += convertToHex(val) + " ";
if (outputBuffer.length > 100) {
console.log(outputBuffer);
outputBuffer = "";
}
}
}).on('error', err => {
// some sort of error reading the file
console.log(err);
}).on('end', () => {
// output any remaining buffer
console.log(outputBuffer);
});
Hopefully you will notice that because the stream handles opening, closing and reading from the file for you that this is a lot simpler way to code. All you have to do is supply event handlers for data that is read, a read error and the end of the operation.
Here's a version using async/await and the new file interface (where the file descriptor is an object that you call methods on) with promises in node v10.
const fs = require('fs').promises;
function convertToHex(val) {
let str = val.toString(16);
if (str.length < 2) {
str = "0" + str;
}
return str.toUpperCase();
}
async function run() {
const readSize = 8192;
let cntr = 0;
const buffer = Buffer.alloc(readSize);
const fd = await fs.open(process.argv[2], 'r');
try {
let outputBuffer = "";
while (true) {
let data = await fd.read(buffer, 0, readSize, null);
for (let i = 0; i < data.bytesRead; i++) {
cntr++;
outputBuffer += convertToHex(buffer.readUInt8(i)) + " ";
if (outputBuffer.length > 100) {
console.log(outputBuffer);
outputBuffer = "";
}
}
// see if all data has been read
if (data.bytesRead !== readSize) {
console.log(outputBuffer);
break;
}
}
} finally {
await fd.close();
}
return cntr;
}
run().then(cntr => {
console.log(`done - ${cntr} bytes read`);
}).catch(err => {
console.log(err);
});

NodeJS readdirSync() not executed

I'm a beginner in non-blocking environment, such NodeJS. Below is my simple code, which list all files in directory :
var readline = require('readline');
var rl = readline.createInterface(process.stdin, process.stdout);
var fs = require('fs');
var datafolder = './datafolder';
var datafoldername = 'datafolder';
rl.setPrompt('Option> ');
rl.prompt();
rl.on('line', function(line) {
if (line === "right") rl.close();
if (line == '1') {
listFile();
}
rl.prompt();
}).on('close', function() {
process.exit(0);
});
function listFile() {
console.log(`File(s) on ${datafolder}`);
fs.readdirSync(datafolder, (err, files) => {
if (err) {
console.log(err);
} else {
files.forEach(filename => {
console.log(filename);
});
}
});
}
If user press 1, it's suppose to execute method listFile and show all files inside.
My question is, why fs.readdirSync not executed? The program works if I do it with readdir(), but it'll mess the output to user.
You are passing a callback to fs.readdirSync() but *Sync() functions don't take callbacks. The callback is never run (because the function does not take a callback), so you see no output. But fs.readdirSync() does in fact execute.
fs.readdirSync() simply returns it's value (which may make the program easier to read, but also means the call will block, which may be OK depending on what your program does and how it is used.)
var resultsArray = fs.readdirSync(datafolder);
(You may want to wrap it in a try/catch for error handling.)

Run async code from command line, node js

I have a function that generates some test data and inserts it to a mongodb:
'use strict';
const CvFaker = require('./cv-faker');
const mongoose = require('mongoose');
require('../models/cv_model.js');
module.exports.init = function(){
var cvfaker = new CvFaker();
cvfaker.genCvs(100);
mongoose.model('cv').create(cvfaker.cvs, (err, createdCvs) => {
if(err){
console.log('something went wrong');
}
})
};
I want to execute this code from the command line:
node -e 'require("./create-mock-db").init()'
The function executes, but it does not wait for the function to complete since it is async. How do I make it wait for the function to complete?
This is not working either:
module.exports.init = function(cb){ ...
..
cb();
node -e 'require("./create-mock-db").init(function(){})'
As this answer might come up for more peopleā€¦
// test.js
const request = require('request');
const rp = require('request-promise');
const demo = module.exports.demo = async function() {
try {
const res = await rp.post( {
uri: 'https://httpbin.org/anything',
body: { hi: 'there', },
}, function (error, response, body) {
return error ? error : body;
} )
console.log( res )
return res;
}
catch ( e ) {
console.error( e );
}
};
Call it like this:
$ node -e 'require("./test").demo()'
Sidenote:
it does not wait for the function to complete since it is async
It's not async. You might call asynchronous functions, but you are not treating them as such and not awaiting any result.
The node process will not exit until the event queue is empty. The event loop uses the event queue to make asynchronous execution possible.
It's pretty simple to verify that this is not an issue with executing asynchronous code.
node -e "setTimeout(() => console.log('done'), 5000)"
This example takes 5 seconds to run, as you would expect.
The problem with your code is the fact that you never establish a connection with the database. The model.create method doesn't do anything until there is a connection, therefor nothing ever gets queued and the process is free to exit.
This means your code needs to change to do two things:
Connect to the database so that the model.create method can execute.
Disconnect from the database when model.create is complete so the process is free to exit.
To add to Kaiser's answer, if you are on Windows using cmd, the single/double quotes are important. Put the double quotes on the outside, i.e.
node -e "require('./test').demo()"

Categories

Resources