Limit number of file watched with nodejs

Limit number of file watched with nodejs - javascript

I have a script written in nodejs with chokidar where I listen into a folder when a file is added.
When a file is added start a function with many loop.
Sometimes the system add 100 or more files at the same time so the event add is called 100 times and the server can crash.
Is there a way to limit watched file and the others wait until watched file are closed?
For example I decide that I can't watch more than 10 files at the same time, the other files wait until first 10 finished their thing.
Is possible?
This is my script:
var chokidar = require('chokidar');
var sys = require('sys')
var exec = require('child_process').exec;
var folder = process.argv[2];
var watcher = chokidar.watch('tmp/xml/' + folder + '/', {ignored: /^\./, persistent: true});
watcher
.on('add', function(path) {
var filenameArr = path.split('/');
filename = folder + '/' + filenameArr[filenameArr.length - 1];
function puts(error, stdout, stderr) { sys.puts(stdout) }
exec('bin/cake manager readXmlAndSave ' + filename, puts);
})

const chokidar = require('chokidar');
const async = require("async");
[...]
let taskQueue;
const watcher = chokidar.watch('myPath');
watcher
.on('add', function(path) {
taskQueue.push({path});
})
// handle files 10 per 10 to avoid to exhaust critical resources
taskQueue = async.queue(async function(task) {
await myAsyncHandler(task.path);
}, 10);
taskQueue.error(function(err, task) {
handleError(err, task);
});
For my culture I'd love to see a pure vanillaJS answer using for example streams and backpressure

Related

Read fs without restarting server [duplicate]

I'm trying to write a node.js script that watches for changes in a directory of files, and then prints the files that are changed. How can I modify this script so that it watches a directory (instead of an individual file), and prints the names of the files in the directory as they are changed?
var fs = require('fs'),
sys = require('sys');
var file = '/home/anderson/Desktop/fractal.png'; //this watches a file, but I want to watch a directory instead
fs.watchFile(file, function(curr, prev) {
alert("File was modified."); //is there some way to print the names of the files in the directory as they are modified?
});

Try Chokidar:
var chokidar = require('chokidar');
var watcher = chokidar.watch('file or dir', {ignored: /^\./, persistent: true});
watcher
.on('add', function(path) {console.log('File', path, 'has been added');})
.on('change', function(path) {console.log('File', path, 'has been changed');})
.on('unlink', function(path) {console.log('File', path, 'has been removed');})
.on('error', function(error) {console.error('Error happened', error);})
Chokidar solves some of the crossplatform issues with watching files using just fs.

Why not just use the old fs.watch? Its pretty straightforward.
fs.watch('/path/to/folder', (eventType, filename) => {
console.log(eventType);
// could be either 'rename' or 'change'. new file event and delete
// also generally emit 'rename'
console.log(filename);
})
For more info and details about the options param, see Node fs Docs

try hound:
hound = require('hound')
// Create a directory tree watcher.
watcher = hound.watch('/tmp')
// Create a file watcher.
watcher = hound.watch('/tmp/file.txt')
// Add callbacks for file and directory events. The change event only applies
// to files.
watcher.on('create', function(file, stats) {
console.log(file + ' was created')
})
watcher.on('change', function(file, stats) {
console.log(file + ' was changed')
})
watcher.on('delete', function(file) {
console.log(file + ' was deleted')
})
// Unwatch specific files or directories.
watcher.unwatch('/tmp/another_file')
// Unwatch all watched files and directories.
watcher.clear()
It will execute once file was change

How to make Gulp watch monitoring files?

I have a simple gulpfile.js, that defines only two tasks, buildLess and watchFiles:
var gulp = require('gulp');
var less = require('gulp-less');
var watch = require('gulp-watch');
var plumber = require('gulp-plumber');
var filter = require('gulp-filter');
function buildLess(done) {
const fileFilter = filter(['**/*', '!**/mixins.less', '!**/variables.less']);
gulp.src('./public/less/*.less')
.pipe(fileFilter)
.pipe(plumber())
.pipe(less())
.pipe(gulp.dest('./public/css/'))
;
done();
};
function watchFiles() {
gulp.watch(['public/less/*.less'], gulp.series('build-less'));
// gulp.watch(['./public/less/*.less'], gulp.series(buildLess));
};
gulp.task('build-less', buildLess);
gulp.task('watch-files', watchFiles);
The first one ($ gulp build-less) is working fine. The watchFiles ($ gulp watch-files) can be started and doesn't cause any errors, but changes on the public/less/style.less are ignored.
What is wrong at this gulpfile.js and how to get the watch-files task working?

The gulp.series API allows you to pass a string of a previously registered task. In your code, you haven't registered build-less yet.
var gulp = require('gulp');
var less = require('gulp-less');
var watch = require('gulp-watch');
var plumber = require('gulp-plumber');
var filter = require('gulp-filter');
function buildLess(done) {
const fileFilter = filter(['**/*', '!**/mixins.less', '!**/variables.less']);
gulp.src('./public/less/*.less')
.pipe(fileFilter)
.pipe(plumber())
.pipe(less())
.pipe(gulp.dest('./public/css/'))
;
done();
};
gulp.task('build-less', buildLess);
function watchFiles() {
gulp.watch(['public/less/*.less'], gulp.series('build-less'));
// gulp.watch(['./public/less/*.less'], gulp.series(buildLess));
};
gulp.task('watch-files', watchFiles);
I would note that Gulp does not recommend using the gulp.task API anymore to register tasks, but instead to use exports.
Secondly, you don't need gulp-watch, as gulp now comes with its own gulp.watch method (which you are already using).
Lastly, you should make sure to your correctly signaling async completion in your buildLess function. Below, I've changed that function to return a Stream, rather than calling a done() callback since as you have it written, you have a race condition where done() may be called before the Less compilation has finished.
var gulp = require('gulp');
var less = require('gulp-less');
var plumber = require('gulp-plumber');
var filter = require('gulp-filter');
function buildLess() {
const fileFilter = filter(['**/*', '!**/mixins.less', '!**/variables.less']);
return gulp
.src('./public/less/*.less')
.pipe(fileFilter)
.pipe(plumber())
.pipe(less())
.pipe(gulp.dest('./public/css/'));
}
exports['build-less'] = buildLess;
function watchFiles() {
gulp.watch(['public/less/*.less'], buildLess);
}
exports['watch-files'] = watchFiles;
Overall, I'd go through Gulp's documentation. They recently updated their website, and updated their documentation along with it. Going through that might clear up some other questions you may be having.

is there anyway to make js executes this script line by line

i am trying to make a script that can automate my projects every time i create a new react application so first I've made a bat file that runs my main.js script
here is the bat file
#echo off
setlocal enableextensions
REM run my main
node "C:\Windows\System32\automate\main.js" %1
and here is the main.js file
const { exec } = require("child_process");
const { replace, rename } = require("./file.js");
const axios = require("axios").default;
let state = {
cwd: "E:/WORK/Projects",
token: "[[my github token]]",
project: {
name: process.argv[2],
path: "E:/WORK/Projects/" + process.argv[2],
src: "E:/WORK/Projects/" + process.argv[2] + "/src",
},
};
// init
let { cwd, project, token } = state;
//
//
// main
// create the app
exec("create-react-app " + project.name, { cwd });
// install debs
exec("npm i node-sass", { cwd: project.path });
// use sass
rename(project.src + "/index.css", "index.scss");
rename(project.src + "/App.css", "App.scss");
replace(project.src + "/index.js", "./index.css", "./index.scss");
replace(project.src + "/App.js", "./App.css", "./App.scss");
// structure my app
let code = [
'mkdir "' + project.src + '/App"',
'mkdir "' + project.src + '/App/Elements"',
'mkdir "' + project.src + '/App/nav"',
'touch "' + project.src + '/App/nav.jsx"',
'touch "' + project.src + '/App/nav/nav.scss"',
];
code = code.join(" && ");
exec(code);
// push to github
axios({
method: "post",
url: "https://api.github.com/user/repos?access_token=" + token,
data: {
name: project.name,
},
})
.then((res) => {
let repo = res.data.clone_url;
let c = [
"git remote add origin " + repo,
"git push --set-upstream origin master",
"git push",
].join(" && ");
exec(c);
})
.catch((err) => {
console.log(err.response.data.errors);
});
// open in code
exec("code " + project.path);
// final message
console.log("Have Fun!");
now everything is ready but the only problem i have that every line gets executed asynchronously
example like create-react-app command this takes a lot of time and every next line depends on it to finish first

You can use execSync instead of exec:
const {execSync} = require('child_process');
Here is the main documentation about this feature: NodeJs Docs
Specific Description:
The child_process.execSync() method is generally identical to child_process.exec() with the exception that the method will not return until the child process has fully closed. When a timeout has been encountered and killSignal is sent, the method won't return until the process has completely exited. If the child process intercepts and handles the SIGTERM signal and doesn't exit, the parent process will wait until the child process has exited.

This is a classic case where you can use locks or semaphores.
Just lock the execution before the desired line and remove it after that.
refer this for locks in js - How to implement a lock in JavaScript

You can use promisify from the util module to turn the async exec function into a promise so you can then use await exec in order for it to appear as executing synchronously.
const util = require('util');
const exec = util.promisify(require('child_process').exec);
Then you can wrap your code in a function like this:
async function init(){
//your code
await exec("create-react-app " + project.name, { cwd });
//...
}
init();
execSync may actually be easier for a simple script like this, but you may find the async versions better in other cases.

Dir walking to get abs path array of various files

Goal: Get a List of Absolute Paths for all files in a directory recursively leveraging NodeJs.
Info: As a python dev, I normally use the python packages which handle this in a platform independent fashion. My boss wanted some javascript code which would handle this goal... and as a JS dev previously, I was like "oh this is easy. Let's look up the node as I never got a chance to get my hands dirty with it." but I seem to be mistaken.
I don't see anything in node relating to Dir Walking, or a way I could hack together to create such a thing.
I was looking in "Child Process", "Console", "File System", "OS", "Path", and "Process". I didn't see anything which would do anything akin to:
pushd .
cd $dir
for folder in $(ls);
do
pushd .
cd $folder
//call again
ls $(pwd)$flag >> $dir/files_matching.txt
popd
done;
// or any platform independent means of recursively getting
// all files and their abs path which meet flag criterion,
// such as "*.txt" || "_*found*"
I could use child process to carry out Command Line items, but then I need to create a bunch of conditionals based on the OS consuming the app, and figured this would be something which already exists.
I don't want to reinvent the wheel, but figured this has already been done; I just don't see it in the base modules.
Is there a node module I would need which accomplishes this, which is outside of the base modules?
I am trying not to have to hand roll a conditional os based system to get an exhaustive list of abs paths for all files under a directory (or subset due to extensions, etc.)

I'd do it like this:
synchronous:
const fs = require("fs");
const { resolve } = require("path");
const getFiles = dir => {
const stack = [resolve(dir)];
const files = [];
while (stack.length) {
dir = stack.pop();
fs.readdirSync(dir).forEach(item => {
const path = resolve(dir, item);
(fs.statSync(path).isDirectory() ? stack : files).push(path);
});
}
return files;
};
console.log(getFiles("."));
asynchronous:
const fs = require("fs");
const { resolve } = require("path");
const pify = require("util").promisify;
const readdir = pify(fs.readdir);
const stat = pify(fs.stat);
const getFiles = async dir => {
const files = await readdir(resolve(dir));
const filesP = files.map(async file => {
const path = resolve(dir, file);
return (await stat(path)).isDirectory() ? getFiles(path) : path;
});
// return (await Promise.all(filesP)).flat(); // flat supported in node ~11
return [].concat(...(await Promise.all(filesP)));
};
getFiles(".").then(console.log);
async demo https://repl.it/#marzelin/getFiles

So, I was looking at the filesystem module and noticed the function readDir
https://nodejs.org/dist/latest-v8.x/docs/api/fs.html#fs_fs_readdir_path_options_callback
which does the trick in part. I guess it wasnt named a method i would have looking for. I was looking for things involving LIST and DIR, but not READ.
Anyways, here is a way to read Dir.
var fs = require('fs');
if (process.argv.length <= 2) {
console.log("Usage: " + __filename + " path/to/directory");
process.exit(-1);
}
var path = process.argv[2];
fs.readdir(path, function(err, items) {
console.log(items);
for (var i=0; i<items.length; i++) {
console.log(items[i]);
}
});
You notice that this one above is Async, but there is a Sync variant, just add "Sync" to the signature. Now you need to determine if something is a directory:
let file = fs.statSync("path/to/directory")
let isDir = file.isDirectory()
So you can couple this all together.
var fs = require('fs')
function recurse_file_system(path, contains) {
let files = fs.readdirSync(path);
let dArr = [];
let fArr = [];
for (let i in files){
let newPath = path + "/" + files[i]
if (fs.statSync(newPath).isDirectory()){
dArr.push(newPath)
}else{
if (filter(files[i], ".txt")){
fArr.push(newPath)
}
}
}
if (arr.length == 0){
return fArr;
}else{
for (let d in dArr){
let rslt = recurse_file_system(dArr[d]);
for (let i in rslt){
fArr.push(rslt[i])
}
}
return fArr;
}
}
console.log("Files:")
console.log(recurse_file_system("/"))
Now if you want to extend this, all you need to do is add a filter to say, limit the size of returns based on particular criterion, such as file name limitation.
function filter(filename, contains){
let reg = new RegEx(contains)
return reg.test(filename)
}
and you can add it to the base case, where you see filter... OR you can just return the WHOLE set and filter it there with the List method, filter.

is something wrong with how I read tgz files in Node.js? Benchmark says it is slow :(

benchmark of my function:
mark#ichikawa:~/inbox/D3/read_logs$ time python countbytes.py
bytes: 277464
real 0m0.037s
user 0m0.036s
sys 0m0.000s
mark#ichikawa:~/inbox/D3/read_logs$ time node countbytes.js
bytes: 277464
real 0m0.144s
user 0m0.120s
sys 0m0.032s
The measurements have been taken on a Ubuntu 13.04 x86_64 bit machine.
This is the simple version of my benchmark (I did 1000 iterations as well). I shows that the function that I wrote to read tgz files take more than 3x as long as a function I have written in Python.
For 1000 iterations filesize 277kB (I used process.hrtime and timeit):
Node: 30.608409032000015
Python: 6.84210395813
For 1000 iterations size 9.7MB:
Node: 590.491709309999
Python: 200.796745062
Please let me know if you have any idea on how to speed up reading the tgz files.
here is the code:
var fs = require('fs');
var tar = require('tar');
var zlib = require('zlib');
var Stream = require('stream');
var countBytes = new Stream;
countBytes.writable = true;
countBytes.count = 0;
countBytes.bytes = 0;
countBytes.write = function (buf) {
countBytes.bytes += buf.length;
};
countBytes.end = function (buf) {
if (arguments.length) countBytes.write(buf);
countBytes.writable = false;
console.log('bytes: ' + countBytes.bytes);
};
countBytes.destroy = function () {
countBytes.writable = false;
};
fs.createReadStream('supercars-logs-13060317.tgz')
.pipe(zlib.createUnzip())
.pipe(tar.Extract({path: "responsetimes.log.13060317"}))
.pipe(countBytes);
Any idea how to speed things up?

I looks good, but I'm curious of why are using tar stream?
And I would implement countBytes using a Transform instead. I like yo use through2 for this
var fs = require('fs')
, tar = require('tar')
, zlib = require('zlib')
, thr = require('through2')
, cache = {bytes: 0}
;
fs.createReadStream('supercars-logs-13060317.tgz')
.pipe(zlib.createUnzip())
.pipe(thr(function(chunk, enc, next){
cache.bytes += chunk.length
next(null, chunk)
}))
.on('end', function(){
console.log(cache.count)
})

Develop Reference

JavaScript is the programming language of the Web.