Determine when recursive function is finished - javascript

I have a function that does a breadth first search over a big graph.
Currently the App runs and is finished after some time.
I want to add a finished Event to the EventEmitter.
My first idea was to implement a counter for each Recursive process.
But this could fail if some Recursive process does not call the counter-- method.
var App = function(start, cb) {
var Recursive = function(a, cb) {
// **asynchronous** and recursive breadth-first search
}
var eventEmitter = new EventEmitter();
cb(eventEmitter);
Recursive(start);
};
How can I emit the finished message if all Recursive functions are finished.
Edit App is not searching something in the graph, it has to traversal the complete graph in order to finish. And it is not known how many elements are in the graph.
Edit2 Something like computational reflection would be perfect, but it does not seem to exist in javascript.
The Graph is very unstable and I am doing a few nested asynchronous calls which all could fail.
Is there a way to know when all asynchronous recursive calls are finished without using a counter?

JavaScript is single threaded.
So unless Recursive(start); has asynchronous calls in it like setTimeout or ajax it's safe to just trigger your finished event after calling the recursive function.
General asynchronous APIs pass around a done function.
So you would have
Recursive(start, function() {
// trigger finished.
});
var Recursive = function(a, done) {
...
};
And it's upto users to call done when they are done.

Try something like this :
var App = function(start, cb) {
var pendingRecursive = 0;
var eventEmitter = new EventEmitter();
cb(eventEmitter);
var Recursive = function(a) {
// breadth-first search recursion
// before each recursive call:
pendingRecursive++;
Recursive(/*whatever*/);
// at the end of the function
if (--pendingRecursive == 0){
eventEmitter.emit('end');
}
}
pendingRecursive = 1;
Recursive(start);
};
Basically, you just increment a counter before each recursive call, and decrement it at the end of the call, so you're effectively counting the number of unfinished calls, when it's zero, you can then emit your event.

Try something like this based on Adriens answer
/**
* Function to search for a file recursively from a base directory
* returns an array of absolute paths for files that match the search
* criteria
*/
let recursiveFileSearch = ( baseDir, fileId ) => {
let pathsArray = [];
pendingRecursive = 1;
//recursive funcion to get all config paths
let getFilePaths = ( baseDir ) => {
//require inbuilt path and filesystem modules
let path = require ( 'path' );
let fs = require ( 'fs' );
//read the files in the base directory
let files = fs.readdirSync ( baseDir );
//fetch all config files recursively
for ( let i = 0 ; i < files.length; i ++ ) {
let file = files [ i ];
let filePath = path.resolve ( baseDir, file );
//get file stats
let fileStats = fs.lstatSync ( filePath );
let isFile = fileStats.isFile ( );
let isDir = fileStats.isDirectory ( );
if ( isFile && file === fileId ) {
pathsArray.push ( filePath );
}
if ( isDir ) {
pendingRecursive++;
getFilePaths( filePath );
}
}
//decrement the recursive flag
if (--pendingRecursive == 0){
return pathsArray;
}
};
return getFilePaths ( baseDir );
};
//Testing the recursive search
let baseDir = __dirname;
let filePaths = recursiveFileSearch ( baseDir, "your file name" );

Can you use a boolean outside of the function as a flag and change it's value upon reaching your targetted node? Perhaps your recursive cases can be within a case on the boolean, and when the node is found you can update it's value... Or are you asking what the base case is for your recursive function to complete?

Related

How to use web worker inside a for loop in javascript?

Following is the code to create a 2d matrix in javascript:
function Create2DArray(rows) {
var arr = [];
for (var i=0;i<rows;i++) {
arr[i] = [];
}
return arr;
}
now I have a couple of 2d matrices inside an array:
const matrices = []
for(let i=1; i<10000; i++){
matrices.push(new Create2DArray(i*100))
}
// I'm just mocking it here. In reality we have data available in matrix form.
I want to do operations on each matrix like this:
for(let i=0; i<matrices.length; i++){
...domeAnythingWithEachMatrix()
}
& since it will be a computationally expensive process, I would like to do it via a web worker so that the main thread is not blocked.
I'm using paralleljs for this purpose since it will provide nice api for multithreading. (Or should I use the native Webworker? Please suggest.)
update() {
for(let i=0; i<matrices.length; i++){
var p = new Parallel(matrices[i]);
p.spawn(function (matrix) {
return doanythingOnMatrix(matrix)
// can be anything like transpose, scaling, translate etc...
}).then(function (matrix) {
return back so that I can use those values to update the DOM or directly update the DOM here.
// suggest a best way so that I can prevent crashes and improve performance.
});
}
requestAnimationFrame(update)
}
So my question is what is the best way of doing this?
Is it ok to use a new Webworker or Parallel instance inside a for loop?
Would it cause memory issues?
Or is it ok to create a global instance of Parallel or Webworker and use it for manipulating each matrix?
Or suggest a better approach.
I'm using Parallel.js for as alternative for Webworker
Is it ok to use parallel.js for multithreading? (Or do I need to use the native Webworker?)
In reality, the matrices would contain position data & this data is processed by the Webworker or parallel.js instance behind the scenes and returns the processed result back to the main app, which is then used to draw items / update canvas
UPDATE NOTE
Actually, this is an animation. So it will have to be updated for each matrix during each tick.
Currently, I'm creating a new Instance of parallel inside the for loop. I fear that this would be a non conventional approach. Or it would cause memory leaks. I need the best way of doing this. Please suggest.
UPDATE
This is my example:
Following our discussion in the comments, here is an attempt at using chunks. The data is processed by groups of 10 (a chunk), so that you can receive their results regularly, and we only start the animation after receiving 200 of them (buffer) to get a head start (think of it like a video stream). But these values may need to be adjusted depending on how long each matrix takes to process.
That being said, you added details afterwards about the lag you get. I'm not sure if this will solve it, or if the problem lays in your canvas update function. That's just a path to explore:
/*
* A helper function to process data in chunks
*/
async function processInChunks({ items, processingFunc, chunkSize, bufferSize, onData, onComplete }) {
const results = [];
// For each group of {chunkSize} items
for (let i = 0; i < items.length; i += chunkSize) {
// Process this group in parallel
const p = new Parallel( items.slice(i, i + chunkSize) );
// p.map is no a real Promise, so we create one
// to be able to await it
const chunkResults = await new Promise(resolve => {
return p.map(processingFunc).then(resolve);
});
// Add to the results
results.push(...chunkResults);
// Pass the results to a callback if we're above the {bufferSize}
if (i >= bufferSize && typeof onData === 'function') {
// Flush the results
onData(results.splice(0, results.length));
}
}
// In case there was less data than the wanted {bufferSize},
// pass the results anyway
if (results.length) {
onData(results.splice(0, results.length));
}
if (typeof onComplete === 'function') {
onComplete();
}
}
/*
* Usage
*/
// For the demo, a fake matrix Array
const matrices = new Array(3000).fill(null).map((_, i) => i + 1);
const results = [];
let animationRunning = false;
// For the demo, a function which takes time to complete
function doAnythingWithMatrix(matrix) {
const start = new Date().getTime();
while (new Date().getTime() - start < 30) { /* sleep */ }
return matrix;
}
processInChunks({
items: matrices,
processingFunc: doAnythingWithMatrix,
chunkSize: 10, // Receive results after each group of 10
bufferSize: 200, // But wait for at least 200 before starting to receive them
onData: (chunkResults) => {
results.push(...chunkResults);
if (!animationRunning) { runAnimation(); }
},
onComplete: () => {
console.log('All the matrices were processed');
}
});
function runAnimation() {
animationRunning = results.length > 0;
if (animationRunning) {
updateCanvas(results.shift());
requestAnimationFrame(runAnimation);
}
}
function updateCanvas(currentMatrixResult) {
// Just for the demo, we're not really using a canvas
canvas.innerHTML = `Frame ${currentMatrixResult} out of ${matrices.length}`;
info.innerHTML = results.length;
}
<script src="https://unpkg.com/paralleljs#1.0/lib/parallel.js"></script>
<h1 id="canvas">Buffering...</h1>
<h3>(we've got a headstart of <span id="info">0</span> matrix results)</h3>

fs.createWriteStream doesn't use back-pressure when writing data to a file, causing high memory usage

Problem
I'm trying to scan a drive directory (recursively walk all the paths) and write all the paths to a file (as it's finding them) using fs.createWriteStream in order to keep the memory usage low, but it doesn't work, the memory usage reaches 2GB during the scan.
Expected
I was expecting fs.createWriteStream to automatically handle memory/disk usage at all times, keeping memory usage at a minimum with back-pressure.
Code
const fs = require('fs')
const walkdir = require('walkdir')
let dir = 'C:/'
let options = {
"max_depth": 0,
"track_inodes": true,
"return_object": false,
"no_return": true,
}
const wstream = fs.createWriteStream("C:/Users/USERNAME/Desktop/paths.txt")
let walker = walkdir(dir, options)
walker.on('path', (path) => {
wstream.write(path + '\n')
})
walker.on('end', (path) => {
wstream.end()
})
Is it because I'm not using .pipe()? I tried creating a new Stream.Readable({read{}}) and then inside the .on('path' emitter pushing paths into it with readable.push(path) but that didn't really work.
UPDATE:
Method 2:
I tried the proposed in the answers drain method but it doesn't help much, it does reduce memory usage to 500mb (which is still too much for a stream) but it slows down the code significantly (from seconds to minutes)
Method 3:
I also tried using readdirp, it uses even less memory (~400mb) and is faster but I don't know how to pause it and use the drain method there to reduce the memory usage further:
const readdirp = require('readdirp')
let dir = 'C:/'
const wstream = fs.createWriteStream("C:/Users/USERNAME/Desktop/paths.txt")
readdirp(dir, {alwaysStat: false, type: 'files_directories'})
.on('data', (entry) => {
wstream.write(`${entry.fullPath}\n`)
})
Method 4:
I also tried doing this operation with a custom recursive walker, and even though it uses only 30mb of memory, which is what I wanted, but it is like 10 times slower than the readdirp method and it is synchronous which is undesirable:
const fs = require('fs')
const path = require('path')
let dir = 'C:/'
function customRecursiveWalker(dir) {
fs.readdirSync(dir).forEach(file => {
let fullPath = path.join(dir, file)
// Folders
if (fs.lstatSync(fullPath).isDirectory()) {
fs.appendFileSync("C:/Users/USERNAME/Desktop/paths.txt", `${fullPath}\n`)
customRecursiveWalker(fullPath)
}
// Files
else {
fs.appendFileSync("C:/Users/USERNAME/Desktop/paths.txt", `${fullPath}\n`)
}
})
}
customRecursiveWalker(dir)
Preliminary observation: you've attempted to get the results you want using multiple approaches. One complication when comparing the approaches you used is that they do not all do the same work. If you run tests on file tree that contains only regular files, that tree does not contain mount points, you can probably compare the approaches fairly, but when you start adding mount points, symbolic links, etc, you may get different memory and time statistics merely due to the fact that one approach excludes files that another approach includes.
I've initially attempted a solution using readdirp, but unfortunately, but that library appears buggy to me. Running it on my system here, I got inconsistent results. One run would output 10Mb of data, another run with the same input parameters would output 22Mb, then I'd get another number, etc. I looked at the code and found that it does not respect the return value of push:
_push(entry) {
if (this.readable) {
this.push(entry);
}
}
As per the documentation the push method may return a false value, in which case the Readable stream should stop producing data and wait until _read is called again. readdirp entirely ignores that part of the specification. It is crucial to pay attention to the return value of push to get proper handling of back-pressure. There are also other things that seemed questionable in that code.
So I abandoned that and worked on a proof of concept showing how it could be done. The crucial parts are:
When the push method returns false it is imperative to stop adding data to the stream. Instead, we record where we were, and stop.
We start again only when _read is called.
If you uncomment the console.log statements that print START and STOP. You'll see them printed out in succession on the console. We start, produce data until Node tells us to stop, and then we stop, until Node tells us to start again, and so on.
const stream = require("stream");
const fs = require("fs");
const { readdir, lstat } = fs.promises;
const path = require("path");
class Walk extends stream.Readable {
constructor(root, maxDepth = Infinity) {
super();
this._maxDepth = maxDepth;
// These fields allow us to remember where we were when we have to pause our
// work.
// The path of the directory to process when we resume processing, and the
// depth of this directory.
this._curdir = [root, 1];
// The directories still to process.
this._dirs = [this._curdir];
// The list of files to process when we resume processing.
this._files = [];
// The location in `this._files` were to continue processing when we resume.
this._ix = 0;
// A flag recording whether or not the fetching of files is currently going
// on.
this._started = false;
}
async _fetch() {
// Recall where we were by loading the state in local variables.
let files = this._files;
let dirs = this._dirs;
let [dir, depth] = this._curdir;
let ix = this._ix;
while (true) {
// If we've gone past the end of the files we were processing, then
// just forget about them. This simplifies the code that follows a bit.
if (ix >= files.length) {
ix = 0;
files = [];
}
// Read directories until we have files to process.
while (!files.length) {
// We've read everything, end the stream.
if (dirs.length === 0) {
// This is how the stream API requires us to indicate the stream has
// ended.
this.push(null);
// We're no longer running.
this._started = false;
return;
}
// Here, we get the next directory to process and get the list of
// files in it.
[dir, depth] = dirs.pop();
try {
files = await readdir(dir, { withFileTypes: true });
}
catch (ex) {
// This is a proof-of-concept. In a real application, you should
// determine what exceptions you want to ignore (e.g. EPERM).
}
}
// Process each file.
for (; ix < files.length; ++ix) {
const dirent = files[ix];
// Don't include in the results those files that are not directories,
// files or symbolic links.
if (!(dirent.isFile() || dirent.isDirectory() || dirent.isSymbolicLink())) {
continue;
}
const fullPath = path.join(dir, dirent.name);
if (dirent.isDirectory() & depth < this._maxDepth) {
// Keep track that we need to walk this directory.
dirs.push([fullPath, depth + 1]);
}
// Finally, we can put the data into the stream!
if (!this.push(`${fullPath}\n`)) {
// If the push returned false, we have to stop pushing results to the
// stream until _read is called again, so we have to stop.
// Uncomment this if you want to see when the stream stops.
// console.log("STOP");
// Record where we were in our processing.
this._files = files;
// The element at ix *has* been processed, so ix + 1.
this._ix = ix + 1;
this._curdir = [dir, depth];
// We're stopping, so indicate that!
this._started = false;
return;
}
}
}
}
async _read() {
// Do not start the process that puts data on the stream over and over
// again.
if (this._started) {
return;
}
this._started = true; // Yep, we've started.
// Uncomment this if you want to see when the stream starts.
// console.log("START");
await this._fetch();
}
}
// Change the paths to something that makes sense for you.
stream.pipeline(new Walk("/home/", 5),
fs.createWriteStream("/tmp/paths3.txt"),
(err) => console.log("ended with", err));
When I run the first attempt you made with walkdir here, I get the following statistics:
Elapsed time (wall clock): 59 sec
Maximum resident set size: 2.90 GB
When I use the code I've shown above:
Elapsed time (wall clock): 35 sec
Maximum resident set size: 0.1 GB
The file tree I use for the tests produces a file listing of 792 MB
You could exploit the returned value from WritableStream.write(): it essentially states if you should continue to read or not. a WritableStream has an internal property that stores the threshold after which the buffer should be processed by the OS. The drain event will be emitted when the buffer has been flushed, i.e. you can call safely call WritableStream.write() without risking to excessively fill the buffer (which means the RAM). Luckily for you, walkdir let you control the process: you can emit pause(pause the walk. no more events will be emitted until resume) and resume(resume the walk) event from the walkdir object, pausing and resuming the writing process on you stream accordingly. Try with this:
let is_emitter_paused = false;
wstream.on('drain', (evt) => {
if (is_emitter_paused) {
walkdir.resume();
}
});
walkdir.on('path', function(path, stat) {
is_emitter_paused = !wstream.write(path + '\n');
if (is_emitter_paused) {
walkdir.pause();
}
});
Here's an implementation inspired by #Louis's answer. I think it's a bit easier to follow and in my minimal testing it performs about the same.
const fs = require('fs');
const path = require('path');
const stream = require('stream');
class Walker extends stream.Readable {
constructor(root = process.cwd(), maxDepth = Infinity) {
super();
// Dirs to process
this._dirs = [{ path: root, depth: 0 }];
// Max traversal depth
this._maxDepth = maxDepth;
// Files to flush
this._files = [];
}
_drain() {
while (this._files.length > 0) {
const file = this._files.pop();
if (file.isFile() || file.isDirectory() || file.isSymbolicLink()) {
const filePath = path.join(this._dir.path, file.name);
if (file.isDirectory() && this._maxDepth > this._dir.depth) {
// Add directory to be walked at a later time
this._dirs.push({ path: filePath, depth: this._dir.depth + 1 });
}
if (!this.push(`${filePath}\n`)) {
// Hault walking
return false;
}
}
}
if (this._dirs.length === 0) {
// Walking complete
this.push(null);
return false;
}
// Continue walking
return true;
}
async _step() {
try {
this._dir = this._dirs.pop();
this._files = await fs.promises.readdir(this._dir.path, { withFileTypes: true });
} catch (e) {
this.emit('error', e); // Uh oh...
}
}
async _walk() {
this.walking = true;
while (this._drain()) {
await this._step();
}
this.walking = false;
}
_read() {
if (!this.walking) {
this._walk();
}
}
}
stream.pipeline(new Walker('some/dir/path', 5),
fs.createWriteStream('output.txt'),
(err) => console.log('ended with', err));

Javascript - process array in batches AND show progress

Dear Javascript Guru's:
I have the following requirements:
Process a large array in batches of 1000 (or any arbitrary size).
When each batch is processed, update the UI to show our progress.
When all batches have been processed, continue with the next step.
For example:
function process_array(batch_size) {
var da_len = data_array.length;
var idx = 0;
function process_batch() {
var idx_end = Math.min(da_len, idx + batch_size);
while (idx < idx_end) {
// do the voodoo we need to do
}
}
// This loop kills the browser ...
while (idx < da_len) {
setTimeout(process_batch, 10);
// Show some progress (no luck) ...
show_progress(idx);
}
}
// Process array ...
process_array(1000);
// Continue with next task ...
// BUT NOT UNTIL WE HAVE FINISHED PROCESSING THE ARRAY!!!
Since I am new to javascript, I discovered that everything is done on a single thread and as such, one needs to get a little creative with regard to processing and updating the UI. I have found some examples using recursive setTimeout calls, (one key difference is I have to wait until the array has been fully processed before continuing), but I cannot seem to get things working as described above.
Also -- I am in need of a "pure" javascript solution -- no third party libraries or the use of web workers (that are not fully supported).
Any (and all) guidance would be appreciated.
Thanks in advance.
You can make a stream from array and use batch-stream to make batches so that you can stream in batches to UI.
stream-array
and
batch-stream
In JavaScript when executing scripts in a HTML page, the page becomes unresponsive until the script is finished. This is because JavaScript is single thread.
You could consider using a web worker in JavaScript that runs in the background, independently of other scripts, without affecting the performance of the page.
In this case User can continue to do whatever he wants in the UI.
You can send and receive messages from the web worker.
More info on Web Worker here.
So part of the magic of recursion is really thinking about the things that you need to pass in, to make it work.
And in JS (and other functional languages) that frequently involves functions.
function processBatch (remaining, processed, batchSize,
transform, onComplete, onProgress) {
if (!remaining.length) {
return onComplete(processed);
}
const batch = remaining.slice(0, batchSize);
const tail = remaining.slice(batchSize);
const totalProcessed = processed.concat(batch.map(transform));
return scheduleBatch(tail, totalProcessed, batchSize,
transform, onComplete, onProgress);
}
function scheduleBatch (remaining, processed, batchSize,
transform, onComplete, onProgress) {
onProgress(processed, remaining, batchSize);
setTimeout(() => processBatch(remaining, processed, batchSize,
transform, onComplete, onProgress));
}
const noop = () => {};
const identity = x => x;
function processArray (array, batchSize, transform, onComplete, onProgress) {
scheduleBatch(
array,
[],
batchSize,
transform || identity,
onComplete || noop,
onProgress || noop
);
}
This can be simplified extremely, and the reality is that I'm just having a little fun here, but if you follow the trail, you should see recursion in a closed system that works with an arbitrary transform, on arbitrary objects, of arbitrary array lengths, with arbitrary code-execution when complete, and when each batch is completed and scheduling the next run.
To be honest, you could even swap this implementation out for a custom scheduler, by changing 3 lines of code or so, and then you could log whatever you wanted...
const numbers = [1, 2, 3, 4, 5, 6];
const batchSize = 2;
const showWhenDone = numbers => console.log(`Done with: ${numbers}`);
const showProgress = (processed, remaining) =>
`${processed.length} done; ${remaining.length} to go`;
const quintuple = x => x * 5;
processArray(
numbers,
batchSize,
quintuple,
showWhenDone,
showProgress
);
// 0 done; 6 to go
// 2 done; 4 to go
// 4 done; 2 to go
// Done with: 5, 10, 15, 20, 25, 30
Overkill? Oh yes. But worth familiarizing yourself with the concepts, if you're going to spend some time in the language.
Thank-you all for your comments and suggestions.
Below is a code that I settled on. The code works for any task (in my case, processing an array) and gives the browser time to update the UI if need be.
The "do_task" function starts an anonymous function via setInterval that alternates between two steps -- processing the array in batches and showing the progress, this continues until all elements in the array have been processed.
function do_task() {
const k_task_process_array = 1;
const k_task_show_progress = 2;
var working = false;
var task_step = k_task_process_array;
var batch_size = 1000;
var idx = 0;
var idx_end = 0;
var da_len = data_array.length;
// Start the task ...
var task_id = setInterval(function () {
if (!working) {
working = true;
switch (task_step) {
case k_task_process_array:
idx_end = Math.min( idx + batch_size, da_len );
while (idx < idx_end) {
// do the voodoo we need to do ...
}
idx++;
}
task_step = k_task_show_progress;
working = false;
break;
default:
// Show progress here ...
// Continue processing array ...
task_step = k_task_process_array;
working = false;
}
// Check if done ...
if (idx >= da_len) {
clearInterval(task_id);
task_id = null;
}
working = false;
}
}, 1);
}

How is Node so much faster in this benchmark?

In an attempt to learn some systems programming, I was going to attempt to write a tokeniser in rust. Immediately though I found it to be extremely slow at iterating over a string's chars. I put together a simple benchmark to show what I mean.
src/bench.html is a html doc with approx 3000 chars
node:
var input = require('fs').readFileSync('src/bench.html', 'utf8');
var len = input.length;
for(var i = 0; i < 100; i+=1) run();
function run () {
var index = 0;
while (index < len) {
var c = input.charAt(index);
// noop
index++;
}
}
rust:
use std::error::Error;
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
fn main() {
// Create a path to the desired file
let path = Path::new("src/bench.html");
let display = path.display();
// Open the path in read-only mode, returns `io::Result<File>`
let mut file = match File::open(&path) {
// The `description` method of `io::Error` returns a string that
// describes the error
Err(why) => panic!("couldn't open {}: {}", display,
Error::description(&why)),
Ok(file) => file,
};
// Read the file contents into a string, returns `io::Result<usize>`
let mut s = String::new();
match file.read_to_string(&mut s) {
Err(why) => panic!("couldn't read {}: {}", display,
Error::description(&why)),
Ok(_) => {
for x in 1..100 {
for token in s.chars() {
match token {
_ => {
// noop
}
}
}
}
println!("done!");
}
}
}
Can someone explain what I'm doing incorrectly in the rust example to make it 10x slower than the same thing in node?
All code can be found here https://github.com/shakyShane/rust-vs-node
Simple answer, when benchmarking, don't use target/debug/program but run cargo build --release first. This will give you target/release/program for your benchmarks :)

Get subfolders from folder with multiple levels

I have code which checks all the files in subfolders for a folder. But how can I change it to not only check on subfolder level but also the subfolders of the subfolder and so on?
This is the code i have for the folder and its subfolders:
var fso = new ActiveXObject("Scripting.FileSystemObject");
fso = fso.getFolder(path);
var subfolders = new Object();
subfolders = fso.SubFolders;
var oEnumerator = new Enumerator(subfolders);
for (;!oEnumerator.atEnd(); oEnumerator.moveNext())
{
var itemsFolder = oEnumerator.item().Files;
var oEnumerator2 = new Enumerator(itemsFolder);
var clientFileName = null;
for(;!oEnumerator2.atEnd(); oEnumerator2.moveNext())
{
var item = oEnumerator2.item();
var itemName = item.Name;
var checkFile = itemName.substring(itemName.length - 3);
if(checkFile == ".ac")
{
var clientFileName = itemName;
break;
}
}
}
On each level of subfolders I need to check all the files if it can find a .ac file.
The solution I mentioned in my comment would look something like this (I don't know much about ActiveX, so there are a lot of comments so hopefully you can easily correct any mistakes):
//this is the function that looks for the file inside a folder.
//if it's not there, it looks in every sub-folder by calling itself
function getClientFileName(folder) {
//get all the files in this folder
var files = folder.Files;
//create an enumerator to check all the files
var enumerator = new Enumerator(files);
for(;!enumerator.atEnd(); enumerator.moveNext()) {
//get the file name we're about to check
var file = enumerator.item().Name;
//if the file name is too short skip this one
if (file.length<3) continue;
//check if this file's name matches, if it does, return it
if (file.substring(file.length - 3)==".ac") return file;
}
//if we finished this loop, then the file was not inside the folder
//so we check all the sub folders
var subFolders = folder.SubFolders;
//create an enumerator to check all sub folders
enumerator = new Enumerator(subFolders);
for(;!enumerator.atEnd(); enumerator.moveNext()) {
//get the sub folder we're about to check
var subFolder = enumerator.item();
//try to find the file in this sub folder
var fileName = getClientFileName(subFolder);
//if it was inside the sub folder, return it right away
if (fileName!=null) return fileName;
}
//if we get this far, we did not find the file in this folder
return null;
}
You would then call this function like so:
var theFileYouAreLookingFor = getClientFileName(theFolderYouWantToStartLookingIn);
Again, beware of the above code: I did not test it, nor do I know much about ActiveX, I merely took your code and changed it so it should look in all the sub folders.
What you want is a recursive function. Here's a simple recursive function that iterates thru each file in the provided path, and then makes a recursive call to iterate thru each of the subfolders files. For each file encountered, this function invokes a provided callback (which is where you'd do any of your processing logic).
Function:
function iterateFiles(path, recursive, actionPerFileCallback){
var fso = new ActiveXObject("Scripting.FileSystemObject");
//Get current folder
folderObj = fso.GetFolder(path);
//Iterate thru files in thisFolder
for(var fileEnum = new Enumerator(folderObj.Files); !fileEnum.atEnd(); fileEnum.moveNext()){
//Get current file
var fileObj = fso.GetFile(fileEnum.item());
//Invoke provided perFile callback and pass the current file object
actionPerFileCallback(fileObj);
}
//Recurse thru subfolders
if(recursive){
//Step into each sub folder
for(var subFolderEnum = new Enumerator(folderObj.SubFolders); !subFolderEnum.atEnd(); subFolderEnum.moveNext()){
var subFolderObj = fso.GetFolder(subFolderEnum.item());
//Make recursive call
iterateFiles(subFolderObj.Path, true, actionPerFileCallback);
}
}
};
Usage (here I'm passing in an anonymous function that get's called for each file):
iterateFiles(pathToFolder, true, function(fileObj){
Wscript.Echo("File Name: " + fileObj.Name);
};
Now.. That is a pretty basic example. Below is a more complex implementation of a similar function. In this function, we can recursively iterate thru each file as before. However, now the caller may provide a 'calling context' to the function which is in turn passed back to the callback. This can be powerful as now the caller has the ability to use previous information from it's own closure. Additionally, I provide the caller an opportunity to update the calling context at each recursive level. For my specific needs when designing this function, it was necessary to provide the option of checking to see if each callback function was successful or not. So, you'll see checks for that in this function. I also include the option to perform a callback for each folder that is encountered.
More Complex Function:
function iterateFiles(path, recursive, actionPerFileCallback, actionPerFolderCallback, useFnReturnValue, callingContext, updateContextFn){
var fso = new ActiveXObject("Scripting.FileSystemObject");
//If 'useFnReturnValue' is true, then iterateFiles() should return false IFF a callback fails.
//This function simply tests that case.
var failOnCallbackResult = function(cbResult){
return !cbResult && useFnReturnValue;
}
//Context that is passed to each callback
var context = {};
//Handle inputs
if(callingContext != null){
context.callingContext = callingContext;
}
//Get current folder
context.folderObj = fso.GetFolder(path);
//Do actionPerFolder callback if provided
if(actionPerFolderCallback != null){
var cbResult = Boolean(actionPerFolderCallback(context));
if (failOnCallbackResult(cbResult)){
return false;
}
}
//Iterate thru files in thisFolder
for(var fileEnum = new Enumerator(context.folderObj.Files); !fileEnum.atEnd(); fileEnum.moveNext()){
//Get current file
context.fileObj = fso.GetFile(fileEnum.item());
//Invoke provided perFile callback function with current context
var cbResult = Boolean(actionPerFileCallback(context));
if (failOnCallbackResult(cbResult)){
return false;
}
}
//Recurse thru subfolders
if(recursive){
//Step into sub folder
for(var subFolderEnum = new Enumerator(context.folderObj.SubFolders); !subFolderEnum.atEnd(); subFolderEnum.moveNext()){
var subFolderObj = fso.GetFolder(subFolderEnum.item());
//New calling context that will be passed into recursive call
var newCallingContext;
//Provide caller a chance to update the calling context with the new subfolder before making the recursive call
if(updateContextFn != null){
newCallingContext = updateContextFn(subFolderObj, callingContext);
}
//Make recursive call
var cbResult = iterateFiles(subFolderObj.Path, true, actionPerFileCallback, actionPerFolderCallback, useFnReturnValue, newCallingContext, updateContextFn);
if (failOnCallbackResult(cbResult)){
return false;
}
}
}
return true; //if we made it here, then all callbacks were successful
};
Usage:
//Note: The 'lib' object used below is just a utility library I'm using.
function copyFolder(fromPath, toPath, overwrite, recursive){
var actionPerFileCallback = function(context){
var destinationFolder = context.callingContext.toPath;
var destinationPath = lib.buildPath([context.callingContext.toPath, context.fileObj.Name]);
lib.createFolderIfDoesNotExist(destinationFolder);
return copyFile(context.fileObj.Path, destinationPath, context.callingContext.overwrite);
};
var actionPerFolderCallback = function(context){
var destinationFolder = context.callingContext.toPath;
return lib.createFolderIfDoesNotExist(destinationFolder);
};
var callingContext = {
fromPath : fromPath,
toPath : lib.buildPath([toPath, fso.GetFolder(fromPath).Name]), //include folder in copy
overwrite : overwrite,
recursive : recursive
};
var updateContextFn = function(currentFolderObj, previousCallingContext){
return {
fromPath : currentFolderObj.Path,
toPath : lib.buildPath([previousCallingContext.toPath, currentFolderObj.Name]),
overwrite : previousCallingContext.overwrite,
recursive : previousCallingContext.recursive
}
}
return iterateFiles(fromPath, recursive, actionPerFileCallback, null, true, callingContext, updateContextFn);
};
I know this question is old but I stumbled upon it and hopefully my answer will help someone!

Categories

Resources