learnyounode #9 juggling async - javascript

I am trying to go through nodeschool's learnyounode.
This problem is the same as the previous problem (HTTP COLLECT) in
that you need to use http.get(). However, this time you will be
provided with three URLs as the first three command-line arguments.
You must collect the complete content provided to you by each of the
URLs and print it to the console (stdout). You don't need to print out
the length, just the data as a String; one line per URL. The catch is
that you must print them out in the same order as the URLs are
provided to you as command-line arguments.
I'm confused as to why my solution doesn't work exactly since it looks the same to me but more functional and am unsure of their inner test workings:
1. ACTUAL: ""
1. EXPECTED: "As busy as a dead horse also lets get some dero. Built like a sleepout no dramas lets get some chook. She'll be right thingo my she'll be right ute. "
2. ACTUAL: "She'll be right bizzo no worries she'll be right fair dinkum. We're going aerial pingpong no worries as busy as a gyno. "
2. EXPECTED: "She'll be right bizzo no worries she'll be right fair dinkum. We're going aerial pingpong no worries as busy as a gyno. "
3. ACTUAL: "He's got a massive pretty spiffy heaps she'll be right brizzie. He hasn't got a fly wire where shazza got us some strewth. She'll be right spit the dummy with it'll be fair go. We're going gobsmacked with as stands out like arvo. He's got a massive bush bash mate she'll be right slacker. "
3. EXPECTED: "He's got a massive pretty spiffy heaps she'll be right brizzie. He hasn't got a fly wire where shazza got us some strewth. She'll be right spit the dummy with it'll be fair go. We're going gobsmacked with as stands out like arvo. He's got a massive bush bash mate she'll be right slacker. "
4. ACTUAL: ""
4. EXPECTED: ""
my code:
var http = require('http');
var bl = require('bl');
var result = [];
var urls = process.argv.slice(2);
urls.forEach(function(url, i) {
http.get(url, function(response) {
response.pipe(bl(function(err, data) {
if (err) return console.error(err);
result[i] = data.toString();
if (i === urls.length - 1) {
console.log(result.join('\n'));
}
}));
});
});
official solution:
var http = require('http')
var bl = require('bl')
var results = []
var count = 0
function printResults () {
for (var i = 0; i < 3; i++)
console.log(results[i])
}
function httpGet (index) {
http.get(process.argv[2 + index], function (response) {
response.pipe(bl(function (err, data) {
if (err)
return console.error(err)
results[index] = data.toString()
count++
if (count == 3)
printResults()
}))
})
}
for (var i = 0; i < 3; i++)
httpGet(i)
Basically the first test never passes (although if there is only 1 url in the iterated array (instead of 3), the first test passes but not the others). Any insight would be great. I'm not sure where to ask about this and perhaps I'm just missing some JS thing, so sorry if this is not appropriate.

You haven't made sure that all of the urls have been downloaded.
The requests don't necessarily come back in order. Consider if 3 comes back first. You'll skip the other two urls and only print out 3.
The demo code counts the number of responses so it's guaranteed to get everything before it prints out the answer.

I think that you just need wait until all requested results ends or any one error. There are my passed answer:
var http = require('http');
var bl = require('bl');
var urls = process.argv.slice(2)
var count = urls.length;
var results = [];
urls.forEach((url, index) => {
http.get(url, (res) => {
res.pipe(bl((err, data) => {
if (err) throw err;
results[index] = data.toString();
count--;
if (count == 0) {
results.forEach((result) => {
console.log(result)
});
}
}))
})
})

var http = require('http');
var links = [2, 3, 4];
var buffer = [];
(function render(index) {
http.get(process.argv[links[index]], function (response){
response.setEncoding('utf8');
response.on('data', function(chunk){
if(buffer[index] === undefined) {
buffer[index] = '';
}
buffer[index] += chunk;
});
response.on('end', function () {
var newIndex = index+1;
if(links[newIndex] !== undefined) {
render(newIndex);
} else {
return renderOutput();
}
});
response.on('error', console.error);
}).on('error', console.error);
})(0); //self-calling function
function renderOutput() {
buffer.forEach(function (elem) {
console.log(elem);
});
}

I got it working without using bufferList(bl) module and may be more generic approach.
var http = require('http');
var urlList = [];
urlList.push(process.argv[2]);
urlList.push(process.argv[3]);
urlList.push(process.argv[4]);
var results = []
var count = 0
function getURLdata (index) {
http.get(urlList[index], function(response){
var data = {};
data[index] = '';
response.setEncoding('utf-8');
response.on('error', function(err){
console.log(err);
});
response.on('data', function(chunk){
data[index] += chunk;
});
response.on('end', function(){
results[index] = data;
count++;
if (count == urlList.length){
for (var i = 0; i < urlList.length; i++){
console.log(results[i][i]);
}
}
});
});
}
for (var i = 0; i < urlList.length; i++)
getURLdata(i);

I am a beginner so maybe this solution has problems, this is using async/await, by making an array of promises and waiting for them to resolve, this will control the order of responses
const axios = require("axios")
const getURL = async url =>
{
let res = await axios.get(url)
return res.data
}
const getUrlArray = () =>
{
let args = process.argv.slice(2)
.map(e => getURL(e))
return Promise.all(args)
}
getUrlArray()
.then(data => data.forEach(e => console.log(e)))

Pretty simple solution, but gets the job done:
const http = require('http');
const bl = require('bl');
var x;
for (x = 2; x < 5; x++) {
http.get(process.argv[x], function (res) {
res.pipe(bl(function (err, data) {
if (err) { return console.error(err) }
console.log(data.toString());
}));
});
};

Related

iterating Javascript array and delete based on condition

I want to iterate through an array of words, look up the definition and delete the word if no definition is found.
my code looks as follows;
var words = ["word1", "word2", "word3",]
function Meaning(words){
const getMeaning = async () => {
const response = await fetch(`https://api.dictionaryapi.dev/api/v2/entries/en/${words}`)
const myJson = await response.json()
for(i = 0; i < words.length; ++i) {
if(!response[i]){
myJson.splice(i,1)
console.log(myJson)
}
}}
This is not really doing anything atm. Where am I going wrong?
edit to add context
tried like this as well;
for(i = 0; i < words.length; ++i)
fetch(`https://api.dictionaryapi.dev/api/v2/entries/en/${words[i]}`).then((response) => {
if (response === 404) {
let response = words
words[i].splice(i,1)
console.log(response)
}
throw new Error('Something went wrong');
})
.then((responseJson) => {
let response = words
response[i].splice(i,1)
})
.catch((error) => {
console.log(error)
});
I can print out the 404 error when it finds no definition, but I can't remove it from the words array
After quick look at the API, and it appears to handle only single words, so the caller needs to make the requests one at a time. Here's how to do it...
const baseUrl = 'https://api.dictionaryapi.dev/api/v2/entries/en/';
// one word lookup. resolve to an array of definitions
async function lookupWord(word) {
const res = await fetch(baseUrl + word);
return res.json();
}
// resolve to a bool, true if the word is in the corpus
async function spellCheck(word) {
const defArray = await lookupWord(word);
return Array.isArray(defArray) && defArray.length > 0;
}
// create a spellCheck promise for every word and resolve with the results
// note, this mutates the array and resolves to undefined
async function spellCheckWords(array) {
const checks = await Promise.all(array.map(spellCheck));
for (let i=array.length-1; i>=0; i--) {
if (!checks[i]) array.splice(i,1);
}
}
// test it (a little)
let array = ['hello', 'whereforeartthou', 'coffee'];
spellCheckWords(array).then(() => {
console.log(array)
})
try this code, you need to check every single element of array from response
var words = ["word1", "word2", "word3"];
function Meaning(words) {
const getMeaning = async () => {
const response = await fetch(`https://api.dictionaryapi.dev/api/v2/entries/en/${words}`)
const myJson = await response.json()
let result = [];
myJson.forEach(element => {
if(words.includes(element)) {
result.push(element)
}
});
return result;
}
return getMeaning();
}

Issues with Array Variable

app.get("/indsalesx/:store/:mm", (req, res) => {
connect();
let ddd = [];
let staffarray = [{}];
let store = req.params.store;
let mm = req.params.mm;
const SP = mongoose.model(`sales${store}`, Sales);
let num = stafflist[store].length - 1;
for (i = 0; i <= num; i++) {
let staffname = stafflist[store][i];
let calc = 0;
SP.find(
{ v_salesperson: stafflist[store][i], v_month: mm },
"v_amount",
(err, doc) => {
let t = doc.length - 1;
doc.map((res) => {
calc = calc + res.v_amount;
});
ddd.name = staffname;
ddd.amount = calc;
staffarray.push(ddd);
}
);
}
console.log(staffarray);
});
The issue I have is: Why is staffarray returning an empty array? staffarray was declared as an empty array of objects, and in a loop function, objects were pushed to to array. But when I console.log(staffarray), it returns the empty array of objects declared initially.
Any help on what to do?
When using find(), you can use 2 approaches.
Pass a callback function
await the function to execute and return the results.
It appears that you used the first approach which means that you are passing a callback into the find() method which handles the result once received.
The console.log() code line will execute before the result will return since it's the next line to execute after the for loop.
So, let's go through what it happening here:
Javascript is executing the find() code line.
That line of code is being placed in the web API which are the pieces of the browser in which concurrency kicks in and makes the call to the server for us.
The console.log() line is being executed with an empty array (since the results haven't been received yet.
After some time, results came back and the callback is being set in the callback queue.
The JS event loop takes the callback from the callback queue and executes it.
This is part of the javascript event loop. you could read more about this here
Mongoose documentation: Model.find()
you can use for of with async/await instead of for
app.get("/indsalesx/:store/:mm", async(req, res) => {
connect();
let ddd = [];
let staffarray = [{}];
let store = req.params.store;
let mm = req.params.mm;
const SP = mongoose.model(`sales${store}`, Sales);
let num = stafflist[store].length - 1;
var list = Array.from(Array(num).keys());
for (let i of list) {
let staffname = stafflist[store][i];
let calc = 0;
let doc = await SP.find(
{ v_salesperson: stafflist[store][i], v_month: mm },
"v_amount"
);
let t = doc.length - 1;
doc.map((res) => {
calc = calc + res.v_amount;
});
ddd.name = staffname;
ddd.amount = calc;
staffarray.push(ddd);
}
console.log(staffarray);
});
I have been able to solve it, all I needed was proper structuring with the async and await statements.
app.get("/indsalesx/:store/:mm", async (req, res) => {
connect();
let ddd = {};
let staffarray = [];
let store = req.params.store;
let mm = req.params.mm;
const SP = mongoose.model(`sales${store}`, Sales);
let num = stafflist[store].length - 1;
for (i = 0; i <= num; i++) {
let staffname = stafflist[store][i];
let calc = 0;
await SP.find(
{ v_salesperson: stafflist[store][i], v_month: mm },
"v_amount",
(err, doc) => {
let t = doc.length - 1;
doc.map((res) => {
calc = calc + res.v_amount;
});
staffarray.push({ name: staffname, amount: calc });
}
);
}
console.log(staffarray);
res.send({ data: staffarray });
});

How to wait for iteration to complete before returning

I am trying to loop through an Array of JSON objects (var requestArray = req.body;, specifically requestArray['filter']), persisting each object into a database. After each persistence, I pull the last persisted data table, add it to an array let responseDataArray = []; in responseDataArray.push(result);. This array is then returned as a request response.
app.post('/sound', function (req, res) {
var requestArray = req.body;
let responseDataArray = [];
for (var i = 0; i < requestArray['filter'].length; i++) {
if (i > 3)
break;
var revEl = requestArray['filter'][i];
// console.log('GUID >>> ' + i + ' : ' + revEl['_revEntityGUID'] + ' >>> ' + JSON.stringify(revEl));
persistSingleItemPromise(revEl).then(function (result) {
responseDataArray.push(result);
console.log(JSON.stringify(responseDataArray));
});
}
console.log((responseDataArray));
res.send(responseDataArray);
});
The problem is in the for loop. It delays, and I only return an empty array responseDataArray = [] since it returns before the iteration completes.
I have tried using a Promose persistSingleItemPromise:
let persistSingleItemPromise = function (revData) {
return new Promise(function (resolve, reject) {
revPersSaveRevEntity.revPersSaveRevEntity(revData, function (result) {
resolve(result);
});
});
};
This doesn't help. How can I resolve this?
Thank you all in advance.
I was thinking of something like this.
Didn't test it please let me know if it works ;-)
Keep in mind, that your callback needs the async prefix too.
const resultPromise = requestArray['filter'].reduce( async ( accPromise, revEl ) => {
const acc = await accPromise
const result = await persistSingleItemPromise(revEl)
acc.push( result )
return result
}, Promise.resolve( [] ) )
const responseDataArray = await resultPromise
You could use Promise.all and store the promises. Then, wait for all of them to resolve
Like
app.post("/sound", function(req, res) {
var requestArray = req.body;
let responsePromises = [];
for (var i = 0; i < requestArray["filter"].length; i++) {
if (i > 3) break;
var revEl = requestArray["filter"][i];
// console.log('GUID >>> ' + i + ' : ' + revEl['_revEntityGUID'] + ' >>> ' + JSON.stringify(revEl));
responsePromises.push(persistSingleItemPromise(revEl));
}
Promise.all(responsePromises).then(result => res.send(result));
});
An example simulation here
const promises = [];
for (let i = 1; i < 4; i++) {
promises.push(new Promise(resolve => {
// Simulate asynchronous request
setTimeout(() => {
resolve("Resolved " + i);
}, 100 * i);
}));
}
// Notice how the result takes some time.
// It's basically waiting for all the promises to resolve
Promise.all(promises).then(results => console.log(results));
I think you should add all your promises from "persistSingleItemPromise" to an array and perform a Promise.All(list).then() on them and await the result before returning.

how to display responses from a looped scrape (cheerio)

I am scraping this site to collect all rows with the year 2013, but there are 7 pages and I have my request in a loop. How can I display the results after all 7 responses have been received? If I simply try to console.log the rowTrack array, it displays empty because of the async nature of the code. Ideally I want to run the requests in order of the loop so that the results of the first page are the first elements of the array etc..
var request = require("request"),
cheerio = require("cheerio"),
rowTrack = [];
for (var i = 1; i <= 7; i++) {
var url = "http://www.boxofficemojo.com/alltime/world/?pagenum=" + i + "&p=.htm";
request(url, function(error, response, body) {
if (!error) {
var $ = cheerio.load(body),
rows = $('table table tr');
rows.each(function(j, element) {
var select = $(element.children).text().split('\r\n')
select.shift();
select.pop();
if (select[select.length - 1] == "2013") {
rowTrack.push(select);
}
});
}
});}
How can I display the results?
The site you're scraping has changed a bit since the question was asked. The table is still there, but the URL and pagination are a bit different.
JS has moved on to promises and the requests package is deprecated. Nowadays, with promises, you'd do:
const cheerio = require("cheerio"); // ^1.0.0-rc.12
const baseUrl =
"https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW";
(async () => {
const results = [];
for (let i = 0; i < 6; i++) {
const response = await fetch(`${baseUrl}&offset=${i * 100}`);
const $ = cheerio.load(await response.text());
results.push(...[...$("tr")]
.map(e => [...$(e).find("td")].map(e => $(e).text()))
.filter(e => e.at(-1) === "2013")
);
}
console.log(results);
})();
The above code runs in series, but you can parallelize it with Promise.all:
const cheerio = require("cheerio");
const baseUrl =
"https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW";
(async () => {
const results = await Promise.all(
[...Array(6)].map(async (_, i) => {
const response = await fetch(`${baseUrl}&offset=${i * 100}`);
const $ = cheerio.load(await response.text());
return [...$("tr")]
.map(e => [...$(e).find("td")].map(e => $(e).text()))
.filter(e => e.at(-1) === "2013");
})
);
console.log(results.flat());
})();
Node 18 has native fetch, but if you're stuck with a legacy situation without promises, you can store each result in an array and use a counter to determine how many requests have completed. When the last request resolves, trigger the next stage of processing.
const cheerio = require("cheerio");
const request = require("request"); // ^2.88.2
const getRankings = done => {
const results = [];
const total = 6;
let completed = 0;
const baseUrl =
"https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW";
for (let i = 0; i < total; i++) {
request(`${baseUrl}&offset=${i * 100}`, function (error, response, body) {
if (error) {
console.error(err);
}
const $ = cheerio.load(body);
results[i] = [...$("tr")]
.map(e => [...$(e).find("td")].map(e => $(e).text()))
.filter(e => e.at(-1) === "2013");
if (++completed === total) {
done(results.flat());
}
});
}
};
getRankings(results => {
console.log(results);
});
The above code runs all of the requests in parallel. To do the requests sequentially, you can chain the callbacks:
const cheerio = require("cheerio");
const request = require("request");
const getRankings = (done, results=[], total=6, i=0) => {
const baseUrl =
"https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW";
request(`${baseUrl}&offset=${i * 100}`, (error, response, body) => {
if (error) {
console.error(err);
}
const $ = cheerio.load(body);
results[i] = [...$("tr")]
.map(e => [...$(e).find("td")].map(e => $(e).text()))
.filter(e => e.at(-1) === "2013");
if (i + 1 === total) {
done(results.flat());
}
else {
getRankings(done, results, total, i + 1);
}
});
}
getRankings(results => {
console.log(results);
});
Error handling on failed requests is left as an exercise. I haven't bothered adapting modern JS idioms like .at(-1), .flat() and so forth to work on older Node versions. Cheerio's .toArray() can be used instead of spreads, .at(-1) can be recreated with roughly const last = a => a[a.length-1]; and .flat() can be [].concat(...results).

Best way to iterate and make an async call during each iteration

If you have to loop and make a bunch of calls to a repository or gateway in my case, how do I do that asynchronously meaning not wrapping my async calls inside a synchronous for loop?
For example, what would be a better approach (restructuring this code) to loop through a set of ids, and make the call to find() below like I'm trying to do?
The goal: I want to take an array of ids, iterate them, and during each iteration, use the id to call find() on my gateway to go get the object for that id, then stuff it into a final array in which I'll return when all said and done.
What I'm using:
q (for promises)
co-pg (to hit the database)
someModule.js
var _gateway = require('./database/someGateway');
var cars = [];
var car;
for (var i = 0; i < results.docs.length; i++){
var carId = results.docs[i].carId;
_gateway.find(carId)
.then(function(data){
console.log('data[0]: ' + data[0].id);
cars.push(data[0]);
})
.done();
}
console.log("cars: " + cars.length); // length here is 0 because my asyn calls weren't done yet
result(cars);
someGateway.js
'use strict';
var Q = require('q');
var _carModel = require('../../models/car');
module.exports = {
models: {
car: _carModel
},
find: _find
};
function _find(carId)
{
return _carModel.find(carId);
};
carModel.js
'use strict';
var Q = require('q');
var pg = require('co-pg')(require('pg'));
var config = require('../../models/database-config');
var car = module.exports = {};
car.find = Q.async(function *(id)
{
var query = 'SELECT id, title, description FROM car WHERE id = ' + id;
var connectionResults = yield pg.connectPromise(config.connection);
var client = connectionResults[0];
var done = connectionResults[1];
var result = yield client.queryPromise(query);
done();
console.log("result.rows[0].id: " + result.rows[0].id);
return result.rows;
});
so I need help understanding how to refactor my code in someModule.js to get that working properly, so that I make a call to find() for each id, stuff each found car into the array, then return the array. The carModel code is async. It goes out to a physical database to perform the actual query lookup.
UPDATE #1
Ok after a couple more hours of trying all sorts of sh** (q.all(), and a ton of other combinations of callback code, etc.) here's what I have at this point:
someModule.js
var _data;
var Q = require('q');
var _solrClient = require('../models/solr/query');
var _solrEndpoint = "q=_text&indent=true&rows=10";
var _postgreSQLGateway = require('./database/postgreSQLGateway');
module.exports = {
data: function(data){
_data = data;
},
find: function (text, result){
if(!searchText){
result(null);
};
_solrClient.query(endpoint, function(results){
var carIds = [];
var cars = [];
var car;
for (var i = 0; i < results.docs.length; i++){
carIds.push(results.docs[i].carId);
}
for (var i = 0; i < carIds.length; i++) {
var car = _postgreSQLGateway.find(carIds[i], function(o){
console.log("i: " + i);
});
};
});
}
};
someGateway.js
'use strict';
var Q = require('q');
var _carModel = require('../../models/postgreSQL/car');
module.exports = {
models: {
car: _carModel
},
find: _find
};
function _find(carId, foundCar)
{
console.log("CALL MADE");
_carModel.find(carId)
.then(function(car){
console.log("car: " + car[0].id);
foundCar(car);
});
};
carModel.js
[same code, has not changed]
Of course I noticed that the for loop fires off all my function calls asyncronously and so when I console.write the i, it's 10 because the for loop is done but then as we know, the rest of the console.logs happen later after the callbacks are done.
So I still can't get this working right...
Also when I was playing around I started down this path but it ended at a brick wall:
var find = Q.async(function(carIds, cars)
{
var tasks = [];
var foundCars = [];
for (var i = 0; i < carIds.length; i++) {
tasks.push(_postgreSQLGateway.find(carIds[' + i + ']));
};
Q.all([tasks.join()]).done(function (values) {
for (var i = 0; i < values.length; i++) {
console.log("VALUES: " + values[0]);
foundCars.push(values[0]);
}
cars(foundCars);
});
});
I ended up with [object promise] every time for values[i] instead of a car for value[i]
I don't know the Q promises library, but here's a solution using generic Promises built into node.js. This runs all the requests in parallel and then when all results have been collected, it runs the final .then() handler with all the results:
var _gateway = require('./database/someGateway');
var promises = [];
for (var i = 0; i < results.docs.length; i++) {
promises.push(_gateway.find(results.docs[i].carId).then(function (data) {
console.log('data[0]: ' + data[0].id);
return data[0];
}));
}
Promise.all(promises).then(function(cars) {
// cars will be an array of results in order
console.log("cars: " + cars.length);
result(cars);
});
Individual promise libraries (like the one I know Bluebird) have features built in that lets you do this kind of activity in even less code, but I've intentionally kept this answer to just using standard promise features.
This is potentially really easy with the vanilla Promise API from es6 (and replicated by Bluebird and other libs). First map the IDs to an array of promises:
var promises = results.docs.map(function(doc) {
return _gateway.find(doc.carId);
});
Then create a promise for the aggregate result:
var allDone = Promise.all(promises);
Then inside the done() callback of the aggregate promise, you'll have a final array of results, in the same length and order as the carId array:
allDone.then(function(results) {
// do something with "results"
});

Categories

Resources