Find amounts of files and get total line count with Node FS - javascript

I'm building a node script asynchronously outputs a directory's file count and line count; however, I am having trouble with its asynchronous control flow.
// Import Dependencies
const fs = require('fs');
const get_dir_line_count = (dir) => {
let output = { file_count: 0, file_line: 0, path: '' };
new Promise( (resolve, reject) => {
fs.readdir(dir, (err, dir_contents) => {
resolve(dir_contents);
});
}).then( (promise_contents) => {
Promise.all(promise_contents.map( (file) => {
const file_path = dir + '/' + file;
return new Promise( (resolve, reject) => {
fs.stat(file_path, (err, stat) => {
if(err || file[0] === '.') return err;
if(stat.isDirectory() && file !== 'node_modules'){
get_dir_line_count(file_path);
}
else if(stat.isFile()){
promise_line_count(file_path)
.then( (line_count) => {
output.path = dir;
output.file_line += line_count;
output.file_count++;
resolve(output);
});
};
});
}).then( (resolved_output) => {
console.log(resolved_output)
return resolved_output;
});
}));
});
};
const promise_line_count = (pathToFile) => {
let line_count = 0;
return new Promise( (resolve, reject) => {
fs.createReadStream(pathToFile)
.on("data", (buffer) => {
buffer.forEach( (chunk) => {
if(chunk === 10) line_count++;
});
}).on("end", () => {
resolve(line_count);
});
});
};
const directory = process.argv[2];
get_dir_line_count('./../' + directory);
My intention is to recursively go through the directories that outputs Promise.all arrays. Each array is a collection of the directory's computed data. However, I am having asynchronous control flow issue upon the Promise.all. If anyone can provide feedback, that would be helpful.
Output:
Project = 5 Files, 50 lines
Project/src = 10 Files, 60 lines
Project/apple = 20 Files, 200 lines
...etc

One issue is that you're not returning anything from get_dir_line_count function itself:
const get_dir_line_count = (dir) => {
let output = { file_count: 0, file_line: 0, path: '' };
new Promise( (resolve, reject) => {
// ^---- missing a return statement
Another problem is that you forgot to return the result from Promise.all so the chain can be properly built:
// ...
}).then( (promise_contents) => {
Promise.all(promise_contents.map( (file) => {
// ^---- missing a return
You've also forgotten to return (or resolve) the recursive call to get_dir_line_count:
if(err || file[0] === '.') return err;
if(stat.isDirectory() && file !== 'node_modules'){
get_dir_line_count(file_path);
// ^--- missing a return statement or resolve statement
}
Finally, since you're returning the output object from get_dir_line_count, you can check things work by adding a then and passing the result into console.log:
const directory = process.argv[2];
get_dir_line_count('./../' + directory).then(console.log) // <-- get the output object and the log it
As far as dealing with the complexity of asynchronous code in general, main thing you can do to clean up the control flow is to extract individual logic into separate functions.
Bellow you can find a code example of one approach along with embedded comments (I also preserved the underscored naming preference):
const fs = require('fs');
const path = require('path');
// resolves with the file names within the given directory
function get_file_names(dir) {
return new Promise((resolve, reject) => {
fs.readdir(dir, (err, fileNames) => {
if (err) return reject(err);
resolve(fileNames);
});
});
}
// resolves with an object containing the type ('file' or 'dir') for the given file path and the file path itself: { file_path, type }
function get_path_and_type(file_path) {
return new Promise((resolve, reject) => {
fs.stat(file_path, (err, stat) => {
if (err) return reject(err);
if (!stat.isDirectory() && !stat.isFile()) reject('Invalid Type');
const type = stat.isDirectory() ? 'dir' : 'file';
resolve({
file_path,
type
});
});
});
}
// same as before, counts lines for the given file path
function count_lines(file_path) {
return new Promise((resolve, reject) => {
let lineCount = 0;
fs.createReadStream(file_path)
.on("data", (buffer) => {
buffer.forEach((chunk) => {
if (chunk === 10) lineCount++;
});
}).on("end", () => {
resolve(lineCount);
}).on("error", reject);
});
};
function get_dir_line_count(dir) {
const output = {
file_count: 0,
file_lines: 0,
path: dir
};
// get all filenames in the given directory
return get_file_names(dir)
// filter all file names that start with a '.' or include the string 'node_modules'
.then((names) =>
names.filter((name) =>
!name.startsWith('.') && !name.includes('node_modules')
)
)
// map every file name into a promise that resolves with the type for that file name within the given dir
.then((names) =>
names.map((name) =>
get_path_and_type(path.join(dir, name))
.catch(console.warn) // log invalid typed files if necessary
)
).then((paths_and_types_promises) =>
Promise.all(paths_and_types_promises.map((promise) =>
promise.then(({
file_path,
type
}) => {
if (type === 'dir') {
// if current file path corresponds to a directory
// recursive count its files and lines and add it to the overall output
return get_dir_line_count(file_path)
.then((recursive_output) => {
output.file_count += recursive_output.file_count;
output.file_lines += recursive_output.file_count;
});
} else {
// count the lines for the current file path and then update the overall output
return count_lines(file_path)
.then((file_lines) => {
output.file_lines += file_lines;
output.file_count += 1;
})
}
})
))
// this last chain makes sure we wait for the promise to resolve
// and populate the output object before resolving with it
).then(() => output);
}
get_dir_line_count(process.argv[2])
.then(console.log);

const fs = require('fs');
const path = require('path');
let output = {};
let lastDir = '';
const walk = (dir) => {
return new Promise((resolve, reject) => {
output[dir] = {
files: 0,
lines: 0,
path: ''
};
fs.readdir(dir, (err, list) => {
if (err) {
return reject(err);
}
let pending = list.length;
if (!pending) {
return resolve(output);
}
list.forEach((file) => {
file = path.resolve(dir, file);
fs.stat(file, (err, stat) => {
if (stat && stat.isDirectory()) {
walk(file)
.then((res) => {
if (!--pending) {
resolve(output);
}
})
}
else {
let lc = 0;
fs.createReadStream(file)
.on('data', (buffer) => {
buffer.forEach((chunk) => {
if (chunk === 10) {
lc++;
}
})
})
.on('end', () => {
output[dir].files++;
output[dir].lines += lc;
output[dir].path = dir;
if (!--pending) {
resolve(output);
}
});
}
})
})
})
});
};
walk('.')
.then(console.log)
.catch(console.log);

Related

Chained Promises not fully resolving on await

I have a function that reads files in a directory asynchronously (readdir) and filters for csv files. I also have an async function that calls readdir filtered for csv files and then iterates through them with fast-csv. Logging to the console the list and its length within the .on('end') function, I can see that they produce the desired results. however, my async call only resolves the first iteration.
const fs = require(`fs`);
const path = require(`path`);
const csv = require(`fast-csv`);
var ofsActivities = [];
const currDir = path.join(__dirname + `/../Downloads/`);
const readdir = async dirname => {
return new Promise((resolve, reject) => {
fs.readdir(dirname, (error, filenames) => {
error ? reject(error) : resolve(filenames);
});
});
};
const filtercsvFiles = (filename) => {
return filename.split(`.`)[1] == `csv`;
};
const ofsDataObjectArray = async () => {
return readdir(currDir).then(async filenames => {
return await new Promise((resolve, reject) => {
filenames = filenames.filter(filtercsvFiles);
for (let i = 0; i < filenames.length; i++) {
let currFilePath = currDir + filenames[i];
console.log(`Reading File: ${filenames[i]}`);
csv
.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, error => reject(error))
.on(`end`, () => resolve(ofsActivities)); //Inserting a console.log(ofsActivities.length) logs the correct and expected length on the last iteration
}
});
});
};
(async () => {
let list = await ofsDataObjectArray(); // This seems to only resolve the first iteration within the promise
console.log(list.length);
})();
You need to call resolve() only when the LAST csv.parseFile() is done. You're calling it when the FIRST one is done, thus the promise doesn't wait for all the others to complete. I'd suggest you promisify csv.parseFile() by itself and then await that inside the loop or accumulate all the promises from csv.parseFile() and use Promise.all() with all of them.
Here's using await on each csv.parseFile():
const ofsDataObjectArray = async () => {
return readdir(currDir).then(async filenames => {
filenames = filenames.filter(filtercsvFiles);
for (let i = 0; i < filenames.length; i++) {
let currFilePath = currDir + filenames[i];
console.log(`Reading File: ${filenames[i]}`);
await new Promise((resolve, reject) => {
csv.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, reject)
.on(`end`, () => resolve(ofsActivities));
});
}
return ofsActivities;
});
};
Or, here's running them in parallel with Promise.all():
const ofsDataObjectArray = async () => {
return readdir(currDir).then(filenames => {
filenames = filenames.filter(filtercsvFiles);
return Promise.all(filenames.map(file => {
let currFilePath = currDir + file;
console.log(`Reading File: ${file}`);
return new Promise((resolve, reject) => {
csv.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, error => reject(error))
.on(`end`, () => resolve(ofsActivities));
});
}))
});
};
P.S. It's unclear from your question what final result you're trying to accumulate (you have left that out) so you will have to add that to this code in the "doing stuff" code or by modifying the resolve(something) code.

How to wait for function to finish before running another one or rest of the code?

I can't figure this one out. I have one function that connects to an SFTP server and downloads files. Then, I have a second function that reads the contents, puts the data in an array, and returns the array.
The problem is that the second function always runs first. I tried different methods but I can't get it to work. That connection to SFTP is quite slow, it can take like 10+ seconds to finish. But I need to somehow wait for it to finish before doing anything else.
const SFTPConfig = require('../config/keys').sftpconfig;
const getCSATFiles = async function(targetDate) {
try {
let Client = require('ssh2-sftp-client');
let sftp = new Client();
const date = moment(targetDate);
var dir = `../csv/${targetDate}/`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
sftp
.connect(SFTPConfig, 'once')
.then(() => {
return sftp.list('/In/Archives/');
})
.then(data => {
data.forEach(item => {
const fileName = item.name;
const remotePath = '/In/Archives/' + fileName;
const localePath = path.join(dir + fileName);
if (
moment(item.modifyTime)
.format('YYYY-MM-DD hh:mm')
.toString()
.slice(0, 10) ===
date
.format('YYYY-MM-DD hh:mm')
.toString()
.slice(0, 10)
) {
sftp
.fastGet(remotePath, localePath, {})
.then(() => {
console.log('finished getting the files!');
sftp.end();
})
.catch(err => {
sftp.end();
console.log(err, 'fastGet method error');
});
}
});
});
} catch (error) {
console.log(error);
}
};
const readCSVFiles = async function(targetDate) {
try {
const casesBO = [];
var dir = `../csv/${targetDate}/`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
const allLocalFiles = path.join(__dirname, dir);
const readDir = util.promisify(fs.readdir);
const files = await readDir(allLocalFiles);
for (let file of files) {
fs.createReadStream(allLocalFiles + file)
.pipe(csv.parse({ headers: true, delimiter: ';' }))
.on('error', error => console.error(error))
.on('data', row => {
if (row['[REGION2]'] !== 'FR') {
casesBO.push(row['[CALLERNO_EMAIL_SOCIAL]']);
console.log(
`${row['[AGENT]']} is ${row['[REGION2]']} and case = ${
row['[CALLERNO_EMAIL_SOCIAL]']
}`
);
}
})
.on('end', rowCount => {
console.log(`Parsed ${rowCount} rows`);
});
}
return casesBO;
} catch (error) {
console.log(error);
}
};
const testFunc = async () => {
const csatfiles = await getCSATFiles('2021-02-03');
const boData = await readCSVFiles('2021-02-03');
console.log(boData);
};
testFunc();
#1 as #messerbill suggested, you need to return the promise from your function.
#2 your promise has a loop inside of it that have more promises. In this case, you need to collect those promises and use Promise.all to resolve them before you second function runs. I put comments on the lines you need to change below. Try this:
const SFTPConfig = require('../config/keys').sftpconfig;
const getCSATFiles = function(targetDate) {
try {
let Client = require('ssh2-sftp-client');
let sftp = new Client();
const date = moment(targetDate);
var dir = `../csv/${targetDate}/`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
// I return the promise here
return sftp
.connect(SFTPConfig, 'once')
.then(() => {
return sftp.list('/In/Archives/');
})
.then(data => {
// I set up my promises as a blank array
const promises = [];
data.forEach(item => {
const fileName = item.name;
const remotePath = '/In/Archives/' + fileName;
const localePath = path.join(dir + fileName);
if (
moment(item.modifyTime)
.format('YYYY-MM-DD hh:mm')
.toString()
.slice(0, 10) ===
date
.format('YYYY-MM-DD hh:mm')
.toString()
.slice(0, 10)
) {
// I collect the promises here
promises.push(sftp
.fastGet(remotePath, localePath, {})
.then(() => {
console.log('finished getting the files!');
sftp.end();
})
.catch(err => {
sftp.end();
console.log(err, 'fastGet method error');
}));
}
});
// I resolve them here
return Promise.all(promises);
});
} catch (error) {
console.log(error);
}
};
const readCSVFiles = async function(targetDate) {
try {
const casesBO = [];
var dir = `../csv/${targetDate}/`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
const allLocalFiles = path.join(__dirname, dir);
const readDir = util.promisify(fs.readdir);
const files = await readDir(allLocalFiles);
for (let file of files) {
fs.createReadStream(allLocalFiles + file)
.pipe(csv.parse({ headers: true, delimiter: ';' }))
.on('error', error => console.error(error))
.on('data', row => {
if (row['[REGION2]'] !== 'FR') {
casesBO.push(row['[CALLERNO_EMAIL_SOCIAL]']);
console.log(
`${row['[AGENT]']} is ${row['[REGION2]']} and case = ${
row['[CALLERNO_EMAIL_SOCIAL]']
}`
);
}
})
.on('end', rowCount => {
console.log(`Parsed ${rowCount} rows`);
});
}
return casesBO;
} catch (error) {
console.log(error);
}
};
const testFunc = async () => {
const csatfiles = await getCSATFiles('2021-02-03');
const boData = await readCSVFiles('2021-02-03');
console.log(boData);
};
testFunc();
You need to take care, that you return the promise you want to resolve inside the function body in order to get the promise resolved at the right time.
async function promiseNotReturned() {
new Promise(resolve => setTimeout(resolve.bind(null), 5000))
}
async function promiseReturned() {
return new Promise(resolve => setTimeout(resolve.bind(null), 5000))
}
async function run() {
await promiseNotReturned()
console.log("does not wait for 5 seconds")
await promiseReturned()
console.log("waits for 5 seconds")
}
run()

Promise chaining causing increased execution time?

I am create a simple NODE-JS function that Converts PDF to Image > Crops Image > Merge Them back with ImageMagick.
and this is the complete code i am using :
var os = require('os');
var fs = require('fs');
var path = require('path');
var gs = require('node-gs');
var sharp = require('sharp');
var areaMap = require('./areaMap');
const { performance } = require('perf_hooks');
var spawn = require('child_process').spawnSync;
var pExcep = 'someException';
var gsPath = 'Ghostscript/gs26';
var src = path.join(os.tmpdir(), '/');
var Files = {
file1: path.join(src, 'out1.jpeg'),
file2: path.join(src, 'out2.jpeg'),
OutImg: path.join(src, 'out.jpeg')
}
var crop = function (s, sFile) {
return new Promise((res, rej) => {
s = areaMap[s];
sharp(Files.OutImg).extract(s)
.toFile(sFile)
.then(()=> res())
.catch((err) => rej(err));
});
};
var getBaseCard = function (s) {
if (RegExp('^([0-9]{8})$').test(s)) { return 'SOMETHINGHERE' } else { return 'inception'; }
//This can be done on client side.
}
var GetCardType = function (base, sInfo) {
return new Promise((res, rej) => {
if (base === 'SOEMTHINGHERE') {
if (sInfo.includes('SOMETHINGHERE2')) {
if (sInfo.includes(pExcep)) {
res('PA_S_')
} else {
res('PA_S2')
}
} else {
res('PA_ST')
}
} else {
res('SA_')
}
})
}
var PdfToText = function (file, pass) {
return new Promise((res, rej) => {
gs()
.batch().safer().nopause().res(2).option('-dDEVICEWIDTHPOINTS=20').option('-dDEVICEHEIGHTPOINTS=20').option('-dFIXEDMEDIA').option('-sPDFPassword=' + pass).device('txtwrite').output('-').input(file).executablePath(gsPath)
.exec((err, stdout, stderr) => {
if (!err) {
res(stdout);
} else {
console.log(stdout);
console.log(err);
console.log(stderr);
}
})
});
}
var getBaseImage = function (file, pass, quality) {
return new Promise((res, rej) => {
gs()
.batch().nopause().safer().res(300 * quality).option('-dTextAlphaBits=4').option('-dGraphicsAlphaBits=4').option('-sPDFPassword=' + pass)
.executablePath(gsPath).device('jpeg').output(Files.OutImg).input(file)
.exec((err, stdout, stderr) => {
if (!err) { res(); } else { rej(stdout) };
})
})
}
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
getBaseImage(file, password, quality) // Convert PDF to Image
.then(() => {
PdfToText(file, password) // Extract Text from pdf
.then((res) => {
GetCardType(getBaseCard(password), res) // finally get PDF Type
.then((ct) => {
// crop image here using Sharp
Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
.then(() => {
// Merge Above two image into one using ImageMagick convert
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve(); // finally resolve
}).catch((err) => reject(err));
}).catch((err) => reject(err))
}).catch((err) => reject(err))
}).catch((err) => reject(err))
})
}
and now these are the problem i am facing:
1. ImageMagick isn't creating the output file.
2. fs.unlinksysnc throws ENOENT: no such file or directory, unlink '/tmp/out1.jpeg'
on average every second execution.
3. Using above code increases execution time.
For Example: getBaseImage should complete in 600ms but it takes 1400 using above code.
About speed in General it (The Complete Function not just getBaseImage) should finish in 1100-1500ms(*) on average but the time taken is ~2500ms.
*1100-1500ms time is achievable by using function chaining but that is hard to read and maintaine for me.
I am going to use this function in Firebase Functions.
How to properly chain these functions ?
EDIT
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
console.log(performance.now());
getBaseImage(file, password, quality) //Convert PDF TO IMAGE
.then(() => { return PdfToText(file, password) })
.then((res) => {return GetCardType(getBaseCard(password), res) })
.then((ct) => {
return Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
})
.then(() => {
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
.catch((err) => { console.log(err) });
Using above pattern didn't solved my issues here.
There's a good chance this weirdness is caused by using the file system. If I understand it correctly, the fs in cloud functions is in memory, so when you write to it, read from it, and remove from it, you're using more and less os memory. That can get weird if a function is called repeatedly and re uses the loaded module.
One thing to try to keep the state clean for each invocation is to put everything (including the requires) inside the scope of the handler. That way you instantiate everything freshly on each invocation.
Finally, you don't seem to be waiting for the spawned convert command to run, you'll need to wait for it to complete:
const convertProc = spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
convertProc.on('close', function() {
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
convertProc.on('close', function(error) {
reject(error);
});
Then you wait for it to complete before you resolve.

Node.js file system: Promise once read all files

I am using Node.js file system to build an array of file paths. I would like to know when all files have been read, so I could work further with my array.
Sequence of events:
Go into a folder
Get a path of each file
Put each path into an array
Let me know once you're done
Code:
'use strict';
const fs = require('fs');
function readDirectory(path) {
return new Promise((resolve, reject) => {
const files = [];
fs.readdir(path, (err, contents) => {
if (err) {
reject(err);
}
contents.forEach((file) => {
const pathname = `${ path }/${ file }`;
getFilesFromPath(pathname).then(() => {
console.log('pathname', pathname);
files.push(pathname);
});
resolve(files);
});
});
});
}
function getFilesFromPath(path) {
return new Promise((resolve, reject) => {
const stat = fs.statSync(path);
if (stat.isFile()) {
fs.readFile(path, 'utf8', (err, data) => {
if (err) {
reject(err);
} else {
resolve(data);
}
});
} else if (stat.isDirectory()) {
readDirectory(path);
}
});
}
getFilesFromPath('./dist');
Would be great to glue with:
Promise.all(files).then(() => {
// do stuff
})
Your suggestion pretty much works - did you try it? Here's a typical way of doing it:
getFilesFromPath( path ).then( files => {
const filePromises = files.map( readFile );
return Promises.all( filePromises );
}).then( fileContentsArray => {
//do stuff - the array will contain the contents of each file
});
You'll have to write the "readFile()" function yourself, but looks like you got that covered.

Converting callbacks with for loop and recursion to promises

I wrote a function running recursively to find out files whose name include given world. I do not understand how promises works and cannot find a way to write this function with promises despite trying hard.
I tried returning a promise inside findPath function but I couldn't use it since extractFiles calls findPath. I tried to create a list of promises and return all but couldn't succeed neither.
So how could I write these functions with promises?
const fs = require('fs');
const path = require('path');
function findPath(targetPath, targetWord, done) {
if (!fs.existsSync(targetPath)) return;
fs.readdir(targetPath, (err, allPaths) => {
if (err) done(err, null);
for (aPath of allPaths) {
aPath = path.join(targetPath, aPath);
extractFiles(aPath, targetWord, done);
}
});
function extractFiles(aPath, targetWord, done) {
fs.lstat(aPath, (err, stat) => {
if (err) done(err, null);
if (stat.isDirectory()) {
findPath(aPath, targetWord, done);
}
else if (aPath.indexOf(targetWord) >= 0) {
let fileName = aPath.split('.')[0];
done(null, fileName);
}
});
}
}
findPath('../modules', 'routes', file => {
console.log(file);
});
Firstly, to make the "core" code more readable, I'd promisify the fs functions
const promisify1p = fn => p1 => new Promise((resolve, reject) => {
fn(p1, (err, result) => {
if(err) {
reject(err);
} else {
resolve(result);
}
});
});
const readdirAsync = promisify1p(fs.readdir);
const lstatAsync = promisify1p(fs.lstat);
Then, just chain the promises as you would with any other promises
const fs = require('fs');
const path = require('path');
function findPath(targetPath, targetWord) {
const readPath = target =>
readdirAsync(target)
.then(allPaths =>
Promise.all(allPaths.map(aPath => extractFiles(path.join(target, aPath))))
.then(x => x.filter(x=>x)) // remove all the "false" entries - i.e. that don't match targetWord
.then(x => [].concat.apply([], x)) // flatten the result
);
const extractFiles = aPath =>
lstatAsync(aPath).then(stat => {
if (stat.isDirectory()) {
return readPath(aPath);
} else if (aPath.includes(targetWord)) {
return aPath.split('.')[0];
}
return false;
});
return readPath(targetPath);
}
findPath('../modules', 'routes')
.then(results => {
// do things with the results - which is an array of files that contain the targetWord
})
.catch(err => console.error(err));
Not much to it at all.

Categories

Resources