How to use ES8 async/await with streams? - javascript

In https://stackoverflow.com/a/18658613/779159 is an example of how to calculate the md5 of a file using the built-in crypto library and streams.
var fs = require('fs');
var crypto = require('crypto');
// the file you want to get the hash
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function() {
hash.end();
console.log(hash.read()); // the desired sha1sum
});
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
But is it possible to convert this to using ES8 async/await instead of using the callback as seen above, but while still keeping the efficiency of using streams?

The await keyword only works on promises, not on streams. There are ideas to make an extra stream-like data type that would get its own syntax, but those are highly experimental if at all and I won't go into details.
Anyway, your callback is only waiting for the end of the stream, which is a perfect fit for a promise. You'd just have to wrap the stream:
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
var end = new Promise(function(resolve, reject) {
hash.on('end', () => resolve(hash.read()));
fd.on('error', reject); // or something like that. might need to close `hash`
});
There also exists a helper function to do just that in more recent versions of nodejs - pipeline from the stream/promises module:
import { pipeline } from 'node:stream/promises';
const fd = fs.createReadStream('/some/file/name.txt');
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
const end = pipeline(fd, hash);
Now you can await that promise:
(async function() {
let sha1sum = await end;
console.log(sha1sum);
}());

If you are using node version >= v10.0.0 then you can use stream.pipeline and util.promisify.
const fs = require('fs');
const crypto = require('crypto');
const util = require('util');
const stream = require('stream');
const pipeline = util.promisify(stream.pipeline);
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
async function run() {
await pipeline(
fs.createReadStream('/some/file/name.txt'),
hash
);
console.log('Pipeline succeeded');
}
run().catch(console.error);

Node V15 now has a promisfiy pipeline in stream/promises.
This is the cleanest and most official way.
const { pipeline } = require('stream/promises');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
We all should appreciate how much works it's done here:
Capture errors in all the streams.
Destroy unfinished streams when error is raised.
Only return when the last writable stream is finished.
This pipe thing is one of the most powerful feature Node.JS has. Making it fully async is not easy. Now we have it.

Something like this works:
for (var res of fetchResponses){ //node-fetch package responses
const dest = fs.createWriteStream(filePath,{flags:'a'});
totalBytes += Number(res.headers.get('content-length'));
await new Promise((resolve, reject) => {
res.body.pipe(dest);
res.body.on("error", (err) => {
reject(err);
});
dest.on("finish", function() {
resolve();
});
});
}

2021 Update:
New example from Node documentation:
async function print(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
console.log(data);
}
see https://nodejs.org/api/stream.html#stream_readable_symbol_asynciterator

I would comment, but don't have enough reputation.
A WORD OF CAUTION:
If you have an application that is passing streams around AND doing async/await, be VERY CAREFUL to connect ALL pipes before you await. You can end up with streams not containing what you thought they did. Here's the minimal example
const { PassThrough } = require('stream');
async function main() {
const initialStream = new PassThrough();
const otherStream = new PassThrough();
const data = [];
otherStream.on('data', dat => data.push(dat));
const resultOtherStreamPromise = new Promise(resolve => otherStream.on('end', () => { resolve(Buffer.concat(data)) }));
const yetAnotherStream = new PassThrough();
const data2 = [];
yetAnotherStream.on('data', dat => data2.push(dat));
const resultYetAnotherStreamPromise = new Promise(resolve => yetAnotherStream.on('end', () => { resolve(Buffer.concat(data2)) }));
initialStream.pipe(otherStream);
initialStream.write('some ');
await Promise.resolve(); // Completely unrelated await
initialStream.pipe(yetAnotherStream);
initialStream.end('data');
const [resultOtherStream, resultYetAnotherStream] = await Promise.all([
resultOtherStreamPromise,
resultYetAnotherStreamPromise,
]);
console.log('other stream:', resultOtherStream.toString()); // other stream: some data
console.log('yet another stream:', resultYetAnotherStream.toString()); // yet another stream: data
}
main();

I believe it will be helpful for someone:
async function readFile(filename) {
let records = []
return new Promise(resolve => {
fs.createReadStream(filename)
.on("data", (data) => {
records.push(data);
})
.on("end", () => {
resolve(records)
});
})
}

Related

How can I make a readline await async promise?

On NodeJS I need to make a grep like function for log research purposes and I'm trying to make it using readline (since I don't want readline-sync). I've read many messages, tutorials, documentation, stackoverflow posts, and so on, but I couldn't understood how to make it works.
const grep = async function(pattern, filepath){
return new Promise((resolve, reject)=>{
let regex = new RegExp(pattern);
let fresult = ``;
let lineReader = require(`readline`).createInterface({
input: require(`fs`).createReadStream(filepath)
});
lineReader.on(`line`, function (line) {
if(line.match(regex)){
fresult += line;
}
});
resolve(fresult);
});
}
let getLogs = await grep(/myregex/gi, filepath);
console.log(getLogs);
Which gives me:
SyntaxError: await is only valid in async functions and the top level bodies of modules
Where am I wrong? I feel like I'm good but did a beginner mistake which is dancing just under my eyes.
What the other answers have missed is that you have two problems in your code.
The error in your question:
Top level await (await not wrapped in an async function) is only possible when running Node.js "as an ES Module", which is when you are using import {xyz} from 'module' rather than const {xyz} = require('module').
As mentioned, one way to fix this is to wrap it in an async function:
// Note: You omitted the ; on line 18 (the closing } bracket)
// which will cause an error, so add it.
(async () => {
let getLogs = await grep(/myregex/gi, filepath);
console.log(getLogs);
})();
A different option is to save your file as .mjs, not .js. This means you must use import rather than require.
The third option is to create a package.json and specify "type": "module". Same rules apply.
A more fundamental issue:
When you call resolve() the event handler in lineReader.on('line') will not have been executed yet. This means that you will be resolving to an empty string and not the user's input. Declaring a function async does nothing to wait for events/callbacks, after all.
You can solve this by waiting for the 'close' event of Readline & only then resolve the promise.
const grep = async function(pattern, filepath){
return new Promise((resolve, reject)=>{
let regex = new RegExp(pattern);
let fresult = ``;
let lineReader = require(`readline`).createInterface({
input: require(`fs`).createReadStream(filepath)
});
lineReader.on(`line`, function (line) {
if(line.match(regex)){
fresult += line;
}
});
// Wait for close/error event and resolve/reject
lineReader.on('close', () => resolve(fresult));
lineReader.on('error', reject);
});
}; // ";" was added
(async () => {
let getLogs = await grep(/myregex/gi, 'foo');
console.log("output", getLogs);
})();
A tip
The events module has a handy utility function named once that allows you to write your code in a shorter and clearer way:
const { once } = require('events');
// If you're using ES Modules use:
// import { once } from 'events'
const grep = async function(pattern, filepath){
// Since the function is 'async', no need for
// 'new Promise()'
let regex = new RegExp(pattern);
let fresult = ``;
let lineReader = require(`readline`).createInterface({
input: require(`fs`).createReadStream(filepath)
});
lineReader.on(`line`, function (line) {
if(line.match(regex)){
fresult += line;
}
});
// Wait for the first 'close' event
// If 'lineReader' emits an 'error' event, this
// will throw an exception with the error in it.
await once(lineReader, 'close');
return fresult;
};
(async () => {
let getLogs = await grep(/myregex/gi, 'foo');
console.log("output", getLogs);
})();
// If you're using ES Modules:
// leave out the (async () => { ... })();
Wrap the function call into an async IIFE:
(async()=>{
let getLogs = await grep(/myregex/gi, filepath);
console.log(getLogs);
})()
try this:
async function run(){
let getLogs = await grep(/myregex/gi, `/`);
console.log(getLogs);
}
run();

Node does not wait for loop to complete

I have tried async/await & using promises however I cannot get this code to execute in order.
The code iterates through a document and parses it before saving it to an array, then saving the array to .json file.
The code continues to run before the loop finishes however which means it writes an empty file as the parsing has not been completed.
Turning it into an async function to await does not solve the issue. Nor does returning a promise and then using .then() to execute final code. It still runs straight away.
const fs = require('fs');
const cheerio = require('cheerio');
const mammoth = require("mammoth");
const articleFolder = './Articles/';
var allArticles = [];
const extractDocuments = async () => {
let files = fs.readdirSync(articleFolder);
for(const file of files) {
await convertToHTML(file);
}
completedExtraction();
}
async function convertToHTML(filename) {
var filepath = articleFolder + filename;
mammoth.convertToHtml({path: filepath})
.then(function(result){
let html = result.value; // The generated HTML
let messages = result.messages; // Any messages, such as warnings during conversion
updateArticles(filename, html);
})
.done();
}
function updateArticles (filename, html) {
var article = {
file: filename,
content: parseHTML(html)
}
allArticles.push(article);
}
function parseHTML (html) {
let $ = cheerio.load(html);
let title = $('h3').first().text();
let date = $('h3:eq(1)').text();
$('h3').slice(0,2).remove()
let content = $('body').html();
let parsedArticle = {
title: title,
date: date,
content: content
}
return parsedArticle;
}
function completedExtraction() {
fs.writeFile('./articles.json', JSON.stringify(allArticles), (err)=>{
if (err) throw err;
console.log('File Written.');
});
console.log('Finished.');
}
extractDocuments();
To solve with map I would do something similar to:
const extractDocuments = async () => {
let files = fs.readdirSync(articleFolder);
const articlePromises = files.map(async file => {
const html = await convertToHTML(file)
return {
filename: file,
html: html
}
})
allArticles = await Promise.all(articlePromises)
completedExtraction();
}
async function convertToHTML(filename) {
var filepath = articleFolder + filename;
return mammoth.convertToHtml({path: filepath})
.then(function(result){
let html = result.value; // The generated HTML
let messages = result.messages; // Any messages, such as warnings during conversion
return html
})
.done();
}
So to wrap up extractDocuments uses a map to iterate and create articles. convertToHTML only returns the created HTML and nothing more. We no longer use the updateArticles since this is now handled in the extractDocuments
Hopes this helps a bit. Hope it points you in the right direction

Firebase - how to await getDownloadURL

I have a function that retrieves the data from a document correctly. However, one image has the URL as it's field already. The other image only has a firebase image reference. Before I proceed to another function, I need to wait until the download URL has been fetched. I've attempted it below without much luck, and I'm not entirely sure i've stuck the async in the right place either.
getPhoto(user_id: string) {
this._subscription = this._activatedRoute.params.pipe(
switchMap(params => {
return this.service.getPhoto(params);
})
).subscribe(async(result) => {
const imageOne = result.imageOne;
// Need to await the download URL here
const imageTwo = this.blah(result.imageTwoRef)
this.otherFunction(imageOne, imageTwo)
});
}
blah(reference){
var storage = firebase.storage();
var imageTwo = reference;
var imagePathRef = storage.ref().child(imageTwo);
imagePathRef.getDownloadURL().then((url) => {
return url;
});
}
Using the async keyword only works on function, and by doing so, it will return a promise. So your usage is correct in that instance.
You can use await only in an async function and next to a promise call. It will stop the execution until your promise get resolved.
I think you are almost done. Try it like this and let me know:
getPhoto(user_id: string) {
this._subscription = this._activatedRoute.params.pipe(
switchMap(params => {
return this.service.getPhoto(params);
})
).subscribe(async(result) => {
const imageOne = result.imageOne;
// Need to await the download URL here
const imageTwo = await this.blah(result.imageTwoRef)
this.otherFunction(imageOne, imageTwo);
});
}
async blah(reference){
var storage = firebase.storage();
var imageTwo = reference;
var imagePathRef = storage.ref().child(imageTwo);
const url = await imagePathRef.getDownloadURL();
return url;
}

Promise condition equal

I want to wait in my code until two values are the same. For this I use
await new Promise((resolve, reject) => {
if(curCount == maxTests) resolve;
});
But I think, this is only called one time. How I can make it that if both values are the same the promise is resolved? How to avoid that it will never send a resolve?
UPDATE:
Some requested the function that makes trouble. Here is the whole function, without it sub function. The function will fill the q-queue to fullfill the tests sync. The problem ist that req.body.selection.forEach immediately returns but I want to wait until the whole queue is ready. So my idea was to add a promise to the end and hit until current and max are the same.
router.post('/imgtest', async (req, res) => {
console.log('Start Image Test');
//Er kommt an.
req.setTimeout(5000000); // Nach Adam Riese 83 Minuten.
process.setMaxListeners(0);
io = req.app.get('socketio');
//Caluclate the max amounnt of tests
const maxTests = req.body.selection.length * req.body.servers.length;
var curCount = 0;
//RETURN IF THIS IS READY. CURRENTLY IT RETURNS IMMEDIATLY
req.body.selection.forEach(async function(entry) {
//Jetzt erstmal die Domain aus der DB holen
var dbUrl = await getUrl(entry);
console.log('tapsi');
var bildFormat = '';
var arrQuestionmark = dbUrl.split('?');
if(arrQuestionmark.length==2){
if(arrQuestionmark[1].includes('&')){
var arrAnd = arrQuestionmark[1].split('&');
arrAnd.forEach(function(entry) {
if(entry.includes('format=')){
var arrFormat = entry.split('=');
bildFormat = arrFormat[1];
}
});
}
}
var masterName = uuidv1();
const orgpath = path.resolve(__basedir, 'tests/downloads', masterName + '-.' + bildFormat);
//Download the MAsterimage
(async () => {
await queue.add(() =>downloadImage(dbUrl, 'c11', req.body.domain, bildFormat, orgpath) );
})();
req.body.servers.forEach(async function(xserver) {
var fileName = masterName + '-' + xserver + '.' + bildFormat;
const dpath = path.resolve(__basedir, 'tests/downloads', fileName);
(async () => {
await queue.add(() => downloadImage(dbUrl, xserver, req.body.domain, bildFormat, dpath));
//console.log('Done ' + entry);
})();
(async () => {
await queue.add(async() => startCompare(orgpath, dpath, 'null:').then(function(result) {
console.log(result);
curCount++;
messageIO(curCount,maxTests);
}));
//console.log('done compare ' + entry);
//fs.unlinkSync(dpath);
})();
});
});
console.log('Need to wait');
res.sendStatus(200);
});
You're correct in assuming that will only be called once. A way around that is to, within the function, create a loop via setInterval - doing a regular check and resolving if true and clearing the loop.
Not too sure what trying to achieve but one thing is for certain, Array.prototype.forEach() will not await even if its callback is async and performing a test inside a new Promise(...) constructor won't help.
Good news though, for loops will await.
Here's the code with for loops instead of .forEach() (twice), unnecessary stuff removed, and otherwise tidied up.
First a suggestion ...
// https://nodejs.org/api/querystring.html
// The querystring module provides utilities for parsing and formatting URL query strings.
const querystring = require('querystring');
... then:
router.post('/imgtest', async (req, res) => {
req.setTimeout(5000000); // Nach Adam Riese 83 Minuten.
process.setMaxListeners(0);
// io = req.app.get('socketio'); // not used
const masterName = uuidv1(); // moved from inner loop
for (i=0; i<req.body.selection.length; i++) {
let entry = req.body.selection[i];
let dbUrl = await getUrl(entry);
let bildFormat = querystring.parse(dbUrl).format;
let orgpath = path.resolve(__basedir, 'tests/downloads', `${masterName}-.${bildFormat}`);
await queue.add(() => downloadImage(dbUrl, 'c11', req.body.domain, bildFormat, orgpath));
for (j=0; j<req.body.servers.length; j++) {
let xserver = req.body.servers[j];
let dpath = path.resolve(__basedir, 'tests/downloads', `${masterName}-${xserver}.${bildFormat}`);
await queue.add(() => downloadImage(dbUrl, xserver, req.body.domain, bildFormat, dpath));
await queue.add(async() => startCompare(orgpath, dpath, 'null:'); // probably safe to remove `async`
// fs.unlinkSync(dpath); // ???
}
}
res.sendStatus(200);
});
This should get you started. I expect that you still have some way to go. At the very least you need to add a try/catch structure and be prepared to send error/status back to the client.

Async not awaiting function before running

I'm trying to parse a specification website from saved HTML on my computer. I can post the file upon request.
I'm burnt out trying to figure out why it won't run synchronously. The comments should log the CCCC's first, then BBBB's, then finally one AAAA.
The code I'm running will not wait at the first hurdle (it prints AAAA... first). Am I using request-promise incorrectly? What is going on?
Is this due to the .each() method of cheerio (I'm assuming it's synchronous)?
const rp = require('request-promise');
const fs = require('fs');
const cheerio = require('cheerio');
async function parseAutodeskSpec(contentsHtmlFile) {
const topics = [];
const contentsPage = cheerio.load(fs.readFileSync(contentsHtmlFile).toString());
const contentsSelector = '.content_htmlbody table td div div#divtreed0e338374 nobr .toc_entry a.treeitem';
contentsPage(contentsSelector).each(async (idx, topicsAnchor) => {
const topicsHtml = await rp(topicsAnchor.attribs['href']);
console.log("topicsHtml.length: ", topicsHtml.length);
});
console.log("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
return topics;
}
Try it this way:
let hrefs = contentsPage(contentsSelector).map((idx, topicsAnchor) => {
return topicsAnchor.attribs['href']
}).get()
let topicsHtml
for(href of hrefs){
topicsHtml = await rp(href);
console.log("topicsHtml.length: ", topicsHtml.length);
}
Now the await is outside of map or each which doesn't quite work the way you think.
As #lumio stated in his comment, I also think that this is because of the each function being synchrone.
You should rather use the map method, and use the Promise.all() on the result to wait enough time:
const obj = contentsPage(contentsSelector).map(async (idx, topicsAnchor) => {
const topicsHtml = await rp(topicsAnchor.attribs['href']);
console.log("topicsHtml.length: ", topicsHtml.length);
const topicsFromPage = await parseAutodeskTopics(topicsHtml)
console.log("topicsFromPage.length: ", topicsFromPage.length);
topics.concat(topicsFromPage);
})
const filtered = Object.keys(obj).filter(key => !isNaN(key)).map(key => obj[key])
await Promise.all(filtered)
console.log("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
Based on the other answers here I came to a rather elegant conclusion. Note the avoidance of async/await in the .map() callback, as cheerio's callbacks (and from what I've learned about async/await, generally all callbacks) seem not to honour the synchronous nature of await well:
async function parseAutodeskSpec(contentsHtmlFile) {
const contentsPage = cheerio.load(fs.readFileSync(contentsHtmlFile).toString());
const contentsSelector = '.content_htmlbody table td div div#divtreed0e338374 nobr .toc_entry a.treeitem';
const contentsReqs = contentsPage(contentsSelector)
.map((idx, elem) => rp(contentsPage(elem).attr('href')))
.toArray();
const topicsReqs = await Promise.all(contentsReqs)
.map(req => parseAutodeskTopics(req));
return await Promise.all(topicsReqs);
}

Categories

Resources