synchronous hashing function for files - javascript

I have a function which generates a checksum for a given path
function getHash(path) {
var fs = require('fs');
var crypto = require('crypto');
var fd = fs.createReadStream(path);
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function () {
hash.end();
// *** Here is my problem ***
console.log(hash.read());
});
fd.pipe(hash);
};
I want to call the calcNewHash function so that it returns the hash, the problem is, that I haven't found an asynchronous version of this, maybe some one can help.
Just add a return statement doesn't work, because the function is in a Listener
Later I want to add the checksum to an object, so I could give the reference to it as parameter, but this still does not change that this is ansynchronous ...

You basically have 2 solutions:
#1 Work with promises (i.e. q - npm install q --save)
function getHash(path) {
var Q = require('q');
var fs = require('fs');
var crypto = require('crypto');
var deferred = Q.defer();
var fd = fs.createReadStream(path);
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function () {
hash.end();
// *** Here is my problem ***
console.log(hash.read());
deferred.resolve(hash.read());
});
fd.pipe(hash);
return deferred.promise;
};
getHash('c:\\')
.then(function(result) {
// do something with the hash result
});
#2 Or use a callback function
function getHash(path, cb) {
var fs = require('fs');
var crypto = require('crypto');
var fd = fs.createReadStream(path);
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function () {
hash.end();
// *** Here is my problem ***
console.log(hash.read());
if (cb) {
cb(null, hash.read());
}
});
fd.pipe(hash);
};
getHash('C:\\', function (error, data) {
if (!error) {
// do something with data
}
});
If you don't have deep nesting in callback functions I would go for option #2.
(Note: behind the scenes promises are also using callbacks, it's just a 'cleaner' solution if you have deep nesting)

I know this is old but it is in the top results when searching for something along these lines. So for those that land here looking for a solution to this, here you go: (note that this only works well if you know the file is small. Otherwise for larger files refer to the answer provided by Dieterg)
const fs = require('fs');
const crypto = require('crypto');
function fileHashSync(filePath){
var fileData;
try{ fileData = fs.readFileSync(filePath, 'utf8'); }
catch(err){
if(err.code === 'ENOENT') return console.error('File does not exist. Error: ', err);
return console.error('Error: ', err);
}
return crypto.createHash('sha1').update(fileData, 'utf8').digest('hex');
}

Related

Writing to file in NodeJS calling from another JS file

I am having a weird issue writing to a file in NodeJS.
I have this code in my FileHandler.js:
module.exports.writeFile = function (arr) {
var fs = require('fs');
console.log(arr);
var file = fs.createWriteStream(__dirname+'\\test.txt',{encoding: 'utf8'});
file.on('error', function (err) {
console.log(err); });
file.on("finish", function() {
console.log("finished");
});
arr.forEach(function (item) {
file.write(item+"\n");
});
file.end();
}
If I append
exports.writeFile(["1","2","3"])
To the end of this file and then run node FileHandler.js
The file is created correctly.
However, if I call the writeFile function from another .js file as:
var R = require("r-script");
const dataHandler = require("./DataHandler");
const fileHandler = require("./FileHandler");
var out = R(__dirname + "\\apriori.R");
exports.getRules = function () {
dataHandler.getListOfPageVisitsBySession(1000781912582,1530781912582,function (result){
//ignored result variable
fileHandler.writeFile(["1","2","3"]);
})
}
and passing the exact same array to the function it doesn't write anything (but the file is created), neither fires err or finish event.
If it matters, the DataHandler method contains a request module and a GET to another API.
Any clue of the problem?
Thanks in advance

Nodejs: Loop Through File and Parse PDFs using Callback in Sync

I am new to node so am struggling quite a bit with the Async nature of it.
I am trying to create a script that will parse the pdfs inside a directory and output them in txt format in another directory.
To do this, I am using fs and pdf2json npm packages. I am passing the parseData function as a callback in the loopingFiles function. The only problem I am having is the async nature of node.
It will loop through all the files at the same time and the output is then a jumbled mess in the last file index.
I would like to process this synchronously such that it will wait once the data is finished parsing to write to the txt and then loop again.
I have tried promises but to no avail. Any help would be much appreciated!
var fs = require('fs'),
PDFParser = require("pdf2json");
let pdfParser = new PDFParser(this,1);
var parseData = function(pdf, index) {
txtFile = "/Users/janet/node/pdf/Destination/".concat(index.toString().concat(".txt"))
pdfFile = "/Users/janet/node/pdf/Source/".concat(pdf);
pdfParser.loadPDF(pdfFile);
// Parsing the pdf file in question
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) );
pdfParser.on("pdfParser_dataReady", pdfData => {
fs.writeFile(txtFile, pdfParser.getRawTextContent());
});
};
var loopingFiles = function(callback) {
fs.readdir("/Users/janet/node/pdf/Source", function (err, files) {
if (err) {
console.log(err);
} else {
files.forEach( function(file, index) {
callback(file, index);
});
};
});
};
loopingFiles(parseData);
Something like this?
var fs = require("fs"),
PDFParser = require("pdf2json");
let pdfParser = new PDFParser(this, 1);
var parseData = function(pdfs, index = 0) {
// finished
if (index >= pdfs.length) return;
let pdf = pdfs[index];
txtFile = "/Users/janet/node/pdf/Destination/".concat(
index.toString().concat(".txt")
);
pdfFile = "/Users/janet/node/pdf/Source/".concat(pdf);
// Parsing the pdf file in question
pdfParser.on("pdfParser_dataError", errData => {
console.error(errData.parserError)
// not sure if you want to call this here to keep going or not
parseData(pdfs, index + 1);
});
pdfParser.on("pdfParser_dataReady", pdfData => {
fs.writeFile(txtFile, pdfParser.getRawTextContent(), function() {
// when we're all done, call this function again, with the index of the next pdf
parseData(pdfs, index + 1);
});
});
pdfParser.loadPDF(pdfFile);
};
var loopingFiles = function(callback) {
fs.readdir("/Users/janet/node/pdf/Source", function(err, files) {
if (err) {
console.log(err);
} else {
callback(files, 0);
}
});
};
loopingFiles(parseData);
the main difference is passing the whole array of pdfs to the function with an index, and only calling that function again with an incremented index once the current one is completed

Modules with Arguments NodeJS

I have two files, home.js and module.js in the same directory.
What I'm trying to do is, I'm trying to pass the variable named directory as I call the function I exported from module.js.
It gives me this error:
binding.readdir(pathModule._makeLong(path), req);
Type error: path must be a string.
What I'm trying to figure out is, I've passed the directory variable which is process.argv[2] (contains the path) from home.js as I call the function in module.js that requires the same argument (path).
home.js
var fs = require('fs');
var path = require('path');
var module = require('./module.js');
var directory = process.argv[2];
var extensionRequired = process.argv[3];
function printList(err, data) {
if(err) return err;
list.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
module(directory, extensionRequired, printList);
module.js
var fs = require('fs');
var path = require('path');
module.exports = function (directory, extensionRequired, callBack) {
fs.readdir(directory, function(err, list) {
if(err) return err;
callBack(err, list)
});
}
I think you made a mistake, and forgot to rename the list variable:
function printList(err, data) {
if(err) return err;
// Here list => data
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
In your callback-method, named printList, you set the second argument as data. If you want to access the second argument's value again, you have to use data in your code or reassign it to another variable.
Your method may then look like this:
function printList(err, data) {
if (err) return err;
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
Additionally, I see two more problems with your code:
In module.js, you're requiring the parameter extensionRequired. If you look closely, you'll find, that it isn't even used in this method. This isn't really an error, but would in my opinion be seen as inelegant. Rather pass it trough to the printList as an additional argument (more the node-typical way IMHO) or use it as a global-scope variable as you are currently doing anyway.
In your module.exports-anonymous function from module.js, you are using if (err) return err;. I'd highly recommend you to not do such a thing. Because this is an asynchronous method, you can't really return something, as the return-statement might actually be executed after you called this method. Instead, pass your error as the first argument of the callback. If there is no error, pass null instead, so you can easily figure out if something unexpected happened. Always check that!
Your module.js could then look something like this:
var fs = require('fs');
var path = require('path');
module.exports = function (directory, callback) {
fs.readdir(directory, function(err, list) {
if (err)
// Error happened, pass it to the callback
callback(err);
else
// Everything ran smooth, send null as the error (no error)
// and the list as the second argument.
callback(null, list)
});
}
Your home.js should then be changed accordingly:
var fs = require('fs');
var path = require('path');
var module = require('./module.js');
var directory = process.argv[2];
var extensionRequired = process.argv[3];
function printList(err, data) {
if (err) {
console.error("An error occurred:", err);
// Exit with an error-code of one to
// tell failure to the calling process and
// prevent printing the probably 'undefined' data-variable
process.exit(1);
}
data.forEach(function (file) {
if(path.extname(file) === '.' + extensionRequired) {
console.log(file);
}
});
}
// extensionRequired removed, as it is not needed
module(directory, printList);

How to use ES8 async/await with streams?

In https://stackoverflow.com/a/18658613/779159 is an example of how to calculate the md5 of a file using the built-in crypto library and streams.
var fs = require('fs');
var crypto = require('crypto');
// the file you want to get the hash
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function() {
hash.end();
console.log(hash.read()); // the desired sha1sum
});
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
But is it possible to convert this to using ES8 async/await instead of using the callback as seen above, but while still keeping the efficiency of using streams?
The await keyword only works on promises, not on streams. There are ideas to make an extra stream-like data type that would get its own syntax, but those are highly experimental if at all and I won't go into details.
Anyway, your callback is only waiting for the end of the stream, which is a perfect fit for a promise. You'd just have to wrap the stream:
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
var end = new Promise(function(resolve, reject) {
hash.on('end', () => resolve(hash.read()));
fd.on('error', reject); // or something like that. might need to close `hash`
});
There also exists a helper function to do just that in more recent versions of nodejs - pipeline from the stream/promises module:
import { pipeline } from 'node:stream/promises';
const fd = fs.createReadStream('/some/file/name.txt');
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
const end = pipeline(fd, hash);
Now you can await that promise:
(async function() {
let sha1sum = await end;
console.log(sha1sum);
}());
If you are using node version >= v10.0.0 then you can use stream.pipeline and util.promisify.
const fs = require('fs');
const crypto = require('crypto');
const util = require('util');
const stream = require('stream');
const pipeline = util.promisify(stream.pipeline);
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
async function run() {
await pipeline(
fs.createReadStream('/some/file/name.txt'),
hash
);
console.log('Pipeline succeeded');
}
run().catch(console.error);
Node V15 now has a promisfiy pipeline in stream/promises.
This is the cleanest and most official way.
const { pipeline } = require('stream/promises');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
We all should appreciate how much works it's done here:
Capture errors in all the streams.
Destroy unfinished streams when error is raised.
Only return when the last writable stream is finished.
This pipe thing is one of the most powerful feature Node.JS has. Making it fully async is not easy. Now we have it.
Something like this works:
for (var res of fetchResponses){ //node-fetch package responses
const dest = fs.createWriteStream(filePath,{flags:'a'});
totalBytes += Number(res.headers.get('content-length'));
await new Promise((resolve, reject) => {
res.body.pipe(dest);
res.body.on("error", (err) => {
reject(err);
});
dest.on("finish", function() {
resolve();
});
});
}
2021 Update:
New example from Node documentation:
async function print(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
console.log(data);
}
see https://nodejs.org/api/stream.html#stream_readable_symbol_asynciterator
I would comment, but don't have enough reputation.
A WORD OF CAUTION:
If you have an application that is passing streams around AND doing async/await, be VERY CAREFUL to connect ALL pipes before you await. You can end up with streams not containing what you thought they did. Here's the minimal example
const { PassThrough } = require('stream');
async function main() {
const initialStream = new PassThrough();
const otherStream = new PassThrough();
const data = [];
otherStream.on('data', dat => data.push(dat));
const resultOtherStreamPromise = new Promise(resolve => otherStream.on('end', () => { resolve(Buffer.concat(data)) }));
const yetAnotherStream = new PassThrough();
const data2 = [];
yetAnotherStream.on('data', dat => data2.push(dat));
const resultYetAnotherStreamPromise = new Promise(resolve => yetAnotherStream.on('end', () => { resolve(Buffer.concat(data2)) }));
initialStream.pipe(otherStream);
initialStream.write('some ');
await Promise.resolve(); // Completely unrelated await
initialStream.pipe(yetAnotherStream);
initialStream.end('data');
const [resultOtherStream, resultYetAnotherStream] = await Promise.all([
resultOtherStreamPromise,
resultYetAnotherStreamPromise,
]);
console.log('other stream:', resultOtherStream.toString()); // other stream: some data
console.log('yet another stream:', resultYetAnotherStream.toString()); // yet another stream: data
}
main();
I believe it will be helpful for someone:
async function readFile(filename) {
let records = []
return new Promise(resolve => {
fs.createReadStream(filename)
.on("data", (data) => {
records.push(data);
})
.on("end", () => {
resolve(records)
});
})
}

How to make fibrous wait in Node.js?

I'm new to Node.js, and realized that one of the big differences with it and client side javascript is how asynchronous everything is.
To try and address this, I'm attempting to use fibrous to turn my code back into a more functional style of programming, but having some problems:
How can I make the following fibrous code work ?
for example, the I'd like the following code to print 1,2,3, but it prints 1,3,2
function test()
{
var fibrous = require('fibrous');
var fs = require('fs');
fibrous.run(function() {
var data = fs.sync.readFile('/etc/passwd');
console.log('2');
});
}
function runTest()
{
console.log('1');
test();
console.log('3');
}
runTest();
// This prints 1,3,2 to the console, not 1,2,3 as I'd like.
in a real use case, the above routine would be wrap a DB method that runs async, and make it so I could write things like:
var dbTable = new dbTableWrapper();
var data = dbTable.getData();
/*
... do things with the data.
The "getData" routine is the same as my "test" function above.
*/
Is the answer to run the (newly added) "runTest" routine using a fibrous.run call itself?
That's part of it, yeah. Fibrous will need to call runTest itself to be able to manage its execution.
Then, test just needs to be wrapped rather than .run():
var test = fibrous(function () {
var data = fs.sync.readFile('/etc/passwd');
console.log('2');
});
And should be called with .sync():
test.sync();
var fibrous = require('fibrous');
var fs = require('fs');
var test = fibrous(function () {
var data = fs.sync.readFile('/etc/passwd');
console.log('2');
});
function runTest() {
console.log('1');
test.sync();
console.log('3');
}
fibrous.run(runTest);
Other a Express using :
var express = require('express');
var router = express.Router();
var fibrous = require('fibrous');
router.use(fibrous.middleware);
router.get('/sync', function(req, res, next) {
var order_categories = Order_Category.sync.list(options);
console.log("Order_Category count : " , order_categories.length);
var content_tags = Content_Tag.sync.list(options);
console.log("content_tags count : " , content_tags.length);
var creatives = Creative.sync.list(options);
console.log("creatives count : " , creatives.length);
return res.send( {
order_categories: order_categories,
content_tags: content_tags,
creatives: creatives
}
);
});

Categories

Resources