I'm trying to parse a specification website from saved HTML on my computer. I can post the file upon request.
I'm burnt out trying to figure out why it won't run synchronously. The comments should log the CCCC's first, then BBBB's, then finally one AAAA.
The code I'm running will not wait at the first hurdle (it prints AAAA... first). Am I using request-promise incorrectly? What is going on?
Is this due to the .each() method of cheerio (I'm assuming it's synchronous)?
const rp = require('request-promise');
const fs = require('fs');
const cheerio = require('cheerio');
async function parseAutodeskSpec(contentsHtmlFile) {
const topics = [];
const contentsPage = cheerio.load(fs.readFileSync(contentsHtmlFile).toString());
const contentsSelector = '.content_htmlbody table td div div#divtreed0e338374 nobr .toc_entry a.treeitem';
contentsPage(contentsSelector).each(async (idx, topicsAnchor) => {
const topicsHtml = await rp(topicsAnchor.attribs['href']);
console.log("topicsHtml.length: ", topicsHtml.length);
});
console.log("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
return topics;
}
Try it this way:
let hrefs = contentsPage(contentsSelector).map((idx, topicsAnchor) => {
return topicsAnchor.attribs['href']
}).get()
let topicsHtml
for(href of hrefs){
topicsHtml = await rp(href);
console.log("topicsHtml.length: ", topicsHtml.length);
}
Now the await is outside of map or each which doesn't quite work the way you think.
As #lumio stated in his comment, I also think that this is because of the each function being synchrone.
You should rather use the map method, and use the Promise.all() on the result to wait enough time:
const obj = contentsPage(contentsSelector).map(async (idx, topicsAnchor) => {
const topicsHtml = await rp(topicsAnchor.attribs['href']);
console.log("topicsHtml.length: ", topicsHtml.length);
const topicsFromPage = await parseAutodeskTopics(topicsHtml)
console.log("topicsFromPage.length: ", topicsFromPage.length);
topics.concat(topicsFromPage);
})
const filtered = Object.keys(obj).filter(key => !isNaN(key)).map(key => obj[key])
await Promise.all(filtered)
console.log("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
Based on the other answers here I came to a rather elegant conclusion. Note the avoidance of async/await in the .map() callback, as cheerio's callbacks (and from what I've learned about async/await, generally all callbacks) seem not to honour the synchronous nature of await well:
async function parseAutodeskSpec(contentsHtmlFile) {
const contentsPage = cheerio.load(fs.readFileSync(contentsHtmlFile).toString());
const contentsSelector = '.content_htmlbody table td div div#divtreed0e338374 nobr .toc_entry a.treeitem';
const contentsReqs = contentsPage(contentsSelector)
.map((idx, elem) => rp(contentsPage(elem).attr('href')))
.toArray();
const topicsReqs = await Promise.all(contentsReqs)
.map(req => parseAutodeskTopics(req));
return await Promise.all(topicsReqs);
}
Related
I record favorite offers that a user 'hearts' in my app . These records include the owner and offer IDs. I want to collect the top 25 favorited Offers for a particular user. All firestore commands are asynchronous and I need to collect all the offer objects before I render the page.
Its my first time using async / await and what started as one has quickly grown into nested async / awaits. There must be a simpler way to collect the IDs from the fav objects and then lookup the Offers with those IDs?
async getItems() {
const collectfavs = async () => {
favsRef = firestore.collection('favs').where('owner','==',getUserID()).orderBy('created', 'desc').limit(25);
let allFavsSnapshot = await favsRef.get();
allFavsSnapshot.forEach(doc => {
let data = doc.data();
favsList.push(data.offer);
});
console.log('favs:',favsList);
}
const collectoffers = async () => {
favsList.forEach(async (fav) => {
let doc = await firestore.collection('offers').doc(fav).get()
console.log('doc:', doc);
let data = doc.data();
data.id = doc.id;
offerList.push(data);
});
console.log('offers:', offerList);
}
await collectfavs();
await collectoffers();
}
I'm not sure why you're defining two local functions just to call them each once. That seems like more code than necessary to get the job done. Other than that, what you're doing doesn't seem very complex to me. But if you want to reduce the lines of code:
async getItems() {
favsRef = firestore.collection('favs').where('owner','==',getUserID()).orderBy('created', 'desc').limit(25);
let allFavsSnapshot = await favsRef.get();
allFavsSnapshot.forEach(doc => {
let data = doc.data();
favsList.push(data.offer);
});
console.log('favs:',favsList);
favsList.forEach(async (fav) => {
let doc = await firestore.collection('offers').doc(fav).get()
console.log('doc:', doc);
let data = doc.data();
data.id = doc.id;
offerList.push(data);
});
console.log('offers:', offerList);
}
Bear in mind that I have no idea where you defined favsList and offerList, so I'm just blindly using it the same way you showed.
As I understand from your comment to Doug Stevenson above you want to ensure that you lists will be filled before using them. In order to do that you could you Promises. What Promises do is give us a way to deal with asynchrony in the code in a sequential manner.
All you need to do is create a promise that will ensure that you first fill the Lists you want and then you use them.
Let me know if this helps.
I ended up needing to create my own asyncForEach routine to accommodate the async calls to firebase inside the loop. Thanks to an article by Sebastien Chopin asyncForEach
async function asyncForEach(array, callback) {
for (let index = 0; index < array.length; index++) {
await callback(array[index], index, array);
}
}
and then in my getItems:
await asyncForEach(favsList, async (fav) => {
let doc = await firestore.collection('offers').doc(fav).get()
let data = doc.data();
data.id = doc.id;
offerList.push(data);
});
There are similar questions like this one here in stackoverflow, but I can't find one that resolves this in particular. I have two forEach loops in Node and one (this one: s.forEach((su) => { cw1.push(su.courseWorkId); }); ) uses the results of the previous one to do its iteration, but it fires asynchronously before the first one finishes so it always tells me that "const s" (the result of the first forEach loop) is empty. Everything else in the code works fine. Any help would be appreciated.
const fs = require('fs');
const path = require('path');
const {google} = require('googleapis');
const keys = require("./file.json");
const sc = require('./jwt.js');
const scopes = sc.scopes;
const j2 = new google.auth.JWT(keys.client_email,null,keys.private_key,scopes,'me#email.com');
const classroom = google.classroom({version: 'v1', auth:j2});
const cl = classroom.courses;
let cl1 = ['34809075556', '34800434710'];
let cw1 = [];
function getwi(){
cl1.forEach((ids) => {
cl.courseWork.list({courseId:ids}, (e,r) => {
const s = r.data.courseWork;
s.forEach((su) => { cw1.push(su.courseWorkId); });
});
});
}
getwi();
Modifying a global variable asynchronously is generally a bad idea. Instead wrap the asynchronous task into a promise, then resolve that promise to the value you need.
const getCourseWork = courseId => new Promise((resolve, reject) => cl.courseWork.list({ courseId, }, (err, result) => err ? reject(err) : resolve(result)));
Then you can get the result as:
Promise.all( cl1.map(getCourseWork) )
.then(result => console.log("The result is:", result.flat().map(it => it.id)))
.catch(error => console.error("An error occured:", error));
That way, all the calls are done in parallel, and are thus faster as doing one after another.
Read on
I have a fuction which includes the following:
const newThreads = newItems.reduce( (acc, item) => {
request(item.href, function(error2, response2, html2){
if(!error2) {
const thread = cheerio.load(html2)
const today = thread('div#ignmsgbttns1').parent().parent().find("b:contains('Today')")
if(today.text()) {
acc.push(item)
}
}
})
return acc
}, [])
console.log(newThreads)
Of course the log returns an empty array, because of the async stuff
(request) executed in the reduce loop.
So what I would like to do is:
const newThreads = await newItems.reduce( etc...
And wait for the requests in the reduce loop to be done.
But I don't get my head around how to do it properly.
So I know I have to use async, await or promises, but don't know how to do it.
I think the reduce callback also has to be async but absolutely not sure on this point.
The request method comes from the npm request package , they also provide some packages to use promises, but to be honest, I don't know how to apply it with reduce.
I'm pretty sure there is already a similar question somewhere but couldn't find it.
Any help would be greatly appreciated.
ps: for those wondering what cheerio is, here the link.
Final code after applying answer
I had to use the async-request package
const newThreads = newItems.reduce(async (acc, item) => {
const current = await acc;
const html2 = await requestAsync(item.href);
const thread = cheerio.load(html2.body);
const today = thread('div#ignmsgbttns1')
.parent()
.parent()
.find("b:contains('Today')");
if (today.text()) current.push(item);
return current;
}, []);
newThreads.then((res) => {
//..doing stuff with res
})
In order to make this work you'll need the Promise returning version.
const newThreads = newItems.reduce(async (acc, item) => { // note async
const current = await acc; // unwrap the previous Promise
try {
const html2 = await request(item.href); // unwrap request Promise
const thread = cheerio.load(html2);
const today = thread('div#ignmsgbttns1')
.parent()
.parent()
.find("b:contains('Today')");
if (today.text()) current.push(item);
} catch (error2) {
// do whatever
}
return current;
}, []);
The newThreads variable will be a Promise of an array of items that passed the conditional check.
By default the Promise.All([]) function returns a number based index array that contains the results of each promise.
var promises = [];
promises.push(myFuncAsync1()); //returns 1
promises.push(myFuncAsync1()); //returns 2
Promise.all(promises).then((results)=>{
//results = [0,1]
}
What is the best vanilla way to return a named index of results with Promise.all()?
I tried with a Map, but it returns results in an array this way:
[key1, value1, key2, value2]
UPDATE:
My questions seems unclear, here is why i don't like ordered based index:
it's crappy to maintain: if you add a promise in your code you may have to rewrite the whole results function because the index may have change.
it's awful to read: results[42] (can be fixed with jib's answer below)
Not really usable in a dynamic context:
var promises = [];
if(...)
promises.push(...);
else{
[...].forEach(... => {
if(...)
promises.push(...);
else
[...].forEach(... => {
promises.push(...);
});
});
}
Promise.all(promises).then((resultsArr)=>{
/*Here i am basically fucked without clear named results
that dont rely on promises' ordering in the array */
});
ES6 supports destructuring, so if you just want to name the results you can write:
var myFuncAsync1 = () => Promise.resolve(1);
var myFuncAsync2 = () => Promise.resolve(2);
Promise.all([myFuncAsync1(), myFuncAsync2()])
.then(([result1, result2]) => console.log(result1 +" and "+ result2)) //1 and 2
.catch(e => console.error(e));
Works in Firefox and Chrome now.
Is this the kind of thing?
var promises = [];
promises.push(myFuncAsync1().then(r => ({name : "func1", result : r})));
promises.push(myFuncAsync1().then(r => ({name : "func2", result : r})));
Promise.all(promises).then(results => {
var lookup = results.reduce((prev, curr) => {
prev[curr.name] = curr.result;
return prev;
}, {});
var firstResult = lookup["func1"];
var secondResult = lookup["func2"];
}
If you don't want to modify the format of result objects, here is a helper function that allows assigning a name to each entry to access it later.
const allNamed = (nameToPromise) => {
const entries = Object.entries(nameToPromise);
return Promise.all(entries.map(e => e[1]))
.then(results => {
const nameToResult = {};
for (let i = 0; i < results.length; ++i) {
const name = entries[i][0];
nameToResult[name] = results[i];
}
return nameToResult;
});
};
Usage:
var lookup = await allNamed({
rootStatus: fetch('https://stackoverflow.com/').then(rs => rs.status),
badRouteStatus: fetch('https://stackoverflow.com/badRoute').then(rs => rs.status),
});
var firstResult = lookup.rootStatus; // = 200
var secondResult = lookup.badRouteStatus; // = 404
If you are using typescript you can even specify relationship between input keys and results using keyof construct:
type ThenArg<T> = T extends PromiseLike<infer U> ? U : T;
export const allNamed = <
T extends Record<string, Promise<any>>,
TResolved extends {[P in keyof T]: ThenArg<T[P]>}
>(nameToPromise: T): Promise<TResolved> => {
const entries = Object.entries(nameToPromise);
return Promise.all(entries.map(e => e[1]))
.then(results => {
const nameToResult: TResolved = <any>{};
for (let i = 0; i < results.length; ++i) {
const name: keyof T = entries[i][0];
nameToResult[name] = results[i];
}
return nameToResult;
});
};
A great solution for this is to use async await. Not exactly ES6 like you asked, but ES8! But since Babel supports it fully, here we go:
You can avoid using only the array index by using async/await as follows.
This async function allows you to literally halt your code inside of it by allowing you to use the await keyword inside of the function, placing it before a promise. As as an async function encounters await on a promise that hasn't yet been resolved, the function immediately returns a pending promise. This returned promise resolves as soon as the function actually finishes later on. The function will only resume when the previously awaited promise is resolved, during which it will resolve the entire await Promise statement to the return value of that Promise, allowing you to put it inside of a variable. This effectively allows you to halt your code without blocking the thread. It's a great way to handle asynchronous stuff in JavaScript in general, because it makes your code more chronological and therefore easier to reason about:
async function resolvePromiseObject(promiseObject) {
await Promise.all(Object.values(promiseObject));
const ret = {};
for ([key, value] of Object.entries(promiseObject)) {
// All these resolve instantly due to the previous await
ret[key] = await value;
};
return ret;
}
As with anything above ES5: Please make sure that Babel is configured correctly so that users on older browsers can run your code without issue. You can make async await work flawlessly on even IE11, as long as your babel configuration is right.
in regards to #kragovip's answer, the reason you want to avoid that is shown here:
https://medium.com/front-end-weekly/async-await-is-not-about-making-asynchronous-code-synchronous-ba5937a0c11e
"...it’s really easy to get used to await all of your network and I/O calls.
However, you should be careful when using it multiple times in a row as the await keyword stops execution of all the code after it. (Exactly as it would be in synchronous code)"
Bad Example (DONT FOLLOW)
async function processData() {
const data1 = await downloadFromService1();
const data2 = await downloadFromService2();
const data3 = await downloadFromService3();
...
}
"There is also absolutely no need to wait for the completion of first request as none of other requests depend on its result.
We would like to have requests sent in parallel and wait for all of them to finish simultaneously. This is where the power of asynchronous event-driven programming lies.
To fix this we can use Promise.all() method. We save Promises from async function calls to variables, combine them to an array and await them all at once."
Instead
async function processData() {
const promise1 = downloadFromService1();
const promise2 = downloadFromService2();
const promise3 = downloadFromService3();
const allResults = await Promise.all([promise1, promise2, promise3]);
In https://stackoverflow.com/a/18658613/779159 is an example of how to calculate the md5 of a file using the built-in crypto library and streams.
var fs = require('fs');
var crypto = require('crypto');
// the file you want to get the hash
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function() {
hash.end();
console.log(hash.read()); // the desired sha1sum
});
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
But is it possible to convert this to using ES8 async/await instead of using the callback as seen above, but while still keeping the efficiency of using streams?
The await keyword only works on promises, not on streams. There are ideas to make an extra stream-like data type that would get its own syntax, but those are highly experimental if at all and I won't go into details.
Anyway, your callback is only waiting for the end of the stream, which is a perfect fit for a promise. You'd just have to wrap the stream:
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
var end = new Promise(function(resolve, reject) {
hash.on('end', () => resolve(hash.read()));
fd.on('error', reject); // or something like that. might need to close `hash`
});
There also exists a helper function to do just that in more recent versions of nodejs - pipeline from the stream/promises module:
import { pipeline } from 'node:stream/promises';
const fd = fs.createReadStream('/some/file/name.txt');
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
const end = pipeline(fd, hash);
Now you can await that promise:
(async function() {
let sha1sum = await end;
console.log(sha1sum);
}());
If you are using node version >= v10.0.0 then you can use stream.pipeline and util.promisify.
const fs = require('fs');
const crypto = require('crypto');
const util = require('util');
const stream = require('stream');
const pipeline = util.promisify(stream.pipeline);
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
async function run() {
await pipeline(
fs.createReadStream('/some/file/name.txt'),
hash
);
console.log('Pipeline succeeded');
}
run().catch(console.error);
Node V15 now has a promisfiy pipeline in stream/promises.
This is the cleanest and most official way.
const { pipeline } = require('stream/promises');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
We all should appreciate how much works it's done here:
Capture errors in all the streams.
Destroy unfinished streams when error is raised.
Only return when the last writable stream is finished.
This pipe thing is one of the most powerful feature Node.JS has. Making it fully async is not easy. Now we have it.
Something like this works:
for (var res of fetchResponses){ //node-fetch package responses
const dest = fs.createWriteStream(filePath,{flags:'a'});
totalBytes += Number(res.headers.get('content-length'));
await new Promise((resolve, reject) => {
res.body.pipe(dest);
res.body.on("error", (err) => {
reject(err);
});
dest.on("finish", function() {
resolve();
});
});
}
2021 Update:
New example from Node documentation:
async function print(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
console.log(data);
}
see https://nodejs.org/api/stream.html#stream_readable_symbol_asynciterator
I would comment, but don't have enough reputation.
A WORD OF CAUTION:
If you have an application that is passing streams around AND doing async/await, be VERY CAREFUL to connect ALL pipes before you await. You can end up with streams not containing what you thought they did. Here's the minimal example
const { PassThrough } = require('stream');
async function main() {
const initialStream = new PassThrough();
const otherStream = new PassThrough();
const data = [];
otherStream.on('data', dat => data.push(dat));
const resultOtherStreamPromise = new Promise(resolve => otherStream.on('end', () => { resolve(Buffer.concat(data)) }));
const yetAnotherStream = new PassThrough();
const data2 = [];
yetAnotherStream.on('data', dat => data2.push(dat));
const resultYetAnotherStreamPromise = new Promise(resolve => yetAnotherStream.on('end', () => { resolve(Buffer.concat(data2)) }));
initialStream.pipe(otherStream);
initialStream.write('some ');
await Promise.resolve(); // Completely unrelated await
initialStream.pipe(yetAnotherStream);
initialStream.end('data');
const [resultOtherStream, resultYetAnotherStream] = await Promise.all([
resultOtherStreamPromise,
resultYetAnotherStreamPromise,
]);
console.log('other stream:', resultOtherStream.toString()); // other stream: some data
console.log('yet another stream:', resultYetAnotherStream.toString()); // yet another stream: data
}
main();
I believe it will be helpful for someone:
async function readFile(filename) {
let records = []
return new Promise(resolve => {
fs.createReadStream(filename)
.on("data", (data) => {
records.push(data);
})
.on("end", () => {
resolve(records)
});
})
}