NodeJS: Parsing data sequentially and asynchronously - javascript

I am writing a NodeJS app that will load data from a database, parse it, and then save the parsed result to a different table in the database. Here is what I currently have:
parse(index, from, to) {
var collection = this.getCollectionName();
var interval = global.Settings.Parser.ParseInterval;
var promises = [];
console.log('%d - %d', from, from + interval);
for(from; from < to; from += interval) {
promises.push(new Promise((resolve, reject) => {
var scoped = from;
this.data.query(collection, {[index]: { $gte: from, $lte: from + interval}, (result) => {
for(var i = 0; i < result.length; i++)
this.sendToBuilder(result[i]);
resolve();
});
}));
}
promises.reduce((promise) => {
Promise.resolve()
});
}
The code seems to do what it should, but since the database query is asynchronous, it seems that out-of-order is a common occurrence. I do not want this to happen. I want each query and promise to execute sequentially to maintain the order of data.
I am trying the array.reduce() method to try and chain each promise to execute sequentially, but due to the nature of Promises it simply starts the promise and continues on, making them all fire simultaneously.
How can I ensure that it will execute sequentially? I don't mind delays between each promise as long as it doesn't block the actual thread.

Here's a working example in the spirit of your code using async/await:
function getData(reqId, collection, index, gte, lte) {
return new Promise((resolve, reject) => {
const delay = Math.floor(Math.random() * 2000) + 1;
console.log(`[getData ${reqId}] delay: ${delay}`);
const params = { collection, [index]: { $gte: gte, $lte: lte } };
const results = [0, 1, 2].map(r => `result ${reqId}.${r}`);
setTimeout(() => {
console.log(`[getData ${reqId}] this.data.query(${JSON.stringify(params)})`);
resolve(results);
}, delay);
});
}
async function parse(index, from, to) {
const collection = 'My Collection';
const interval = 10;
console.log(`Processing ${from} to ${to} by ${interval}:`);
for (from; from <= to; from += interval) {
const reqId = from;
console.log(`[parse] BEGIN ${reqId}`);
const results = await getData(reqId, collection, index, from, from + interval);
results.forEach(result => {
console.log(`[parse - awaited ${reqId}] this.sendToBuilder(${result})`);
});
console.log(`[parse] END ${reqId}`);
}
}
parse('idx', 200, 250);

Related

array return value is always empty

i'm trying to program a prototype right now, however i have a problem, my return values, my output overall always returns an empty array.
If I put everything into a function it works, but if I divide everything into code blocks it doesn't work anymore.
I always get an empty array back as I said.
What am I doing wrong?
async function dataLength(){
return new Promise((resolve, reject) => {
pdo.query(`SELECT * FROM mainsites`, function(err, result) {
let results = result;
let resultsLength = results.length;
let dataIndexMainId = [];
for(let index = 0; index < resultsLength; index++){
dataIndexMainId[index] = results[index]["id"];
}
resolve();
})
})
}
async function getSubSitesIndex(length){
let dataSitesIndex = [];
for(let i = 0; i <= length; i++){
await new Promise((resolve) => {
pdo.query("SELECT * FROM subsites WHERE main = ?",[i] , function(err, result) {
dataSitesIndex[i] = result;
resolve();
})
})
}
let filterDataSitesIndex = dataSitesIndex.filter(String);
console.log(filterDataSitesIndex);
return filterDataSitesIndex;
}
async function getIndex(paramIndex){
let indexResult = await paramIndex;
let indexArray = [];
for (let indexRes of indexResult){
for(let res of indexRes){
indexArray.push(res);
}
}
return indexArray;
}
if I execute the code like this
getIndex(
await getSubSitesIndex(
await dataLength()
)
);
In dataLength, your pdo.query call is not properly promisified.
However, you shouldn't have to write 3 functions for this at all. Do not make multiple queries to your database. Use a single query that does a JOIN - much more efficient!
function getIndex() {
return new Promise((resolve, reject) => {
pdo.query(`
SELECT mainsites.id AS main_id, subsites.id AS sub_id
FROM mainsites
JOIN subsites ON subsites.main = mainsites.id;
`, [], (err, result) => {
if (err) reject(err);
else resolve(result);
});
});
}

How can I call a function when another have been executed

I'm a complete beginner at JavaScript. I just want to call the function called seconONe() just after the function firstOne() completes its execution. by saying this, I mean the function two will call when the value of that p1 is 4 ( in this case ); I can achieve it by calling a setTimeout() function. but what if I don't know how many does it take to execute { the first one() }?
// getting DOM element
const p1 = document.getElementById(`one`);
const p2 = document.getElementById(`two`);
const p3 = document.getElementById(`three`);
// first function
function firstOne() {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
p1.innerHTML = i;
}, i * 1000);
}
}
// second function
function seconOne() {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
p2.innerHTML = i;
}, i * 1000);
}
}
A possible solution is to work with promises. More info about promises here.
Working example
var p1 = 1;
var p2 = 2;
var p3 = 3;
const firstPromise = new Promise((resolve, reject) => {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
p1 = i;
}, i * 1000);
}
resolve()
});
const secondPromise = new Promise((resolve, reject) => {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
p2 = i;
}, i * 1000);
}
resolve()
});
//run first promise
console.log("First promise called")
firstPromise
.then((response) => {
console.log("First promise done")
//run second promise after first promise succeed
console.log("Second promise called")
secondPromise
.then((response) => console.log("Second promise done"))
})
your question is not childish at all. What you will need to understand are callbacks and promise handlers. This just tells JavaScript to wait till a task has been completed in order to execute the next task.
firstOne().then(() => secondOne())
put if condition in your firstOne function.
const p1 = document.getElementById(`one`);
const p2 = document.getElementById(`two`);
const p3 = document.getElementById(`three`);
// first function
function firstOne() {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
if(i == 4){
seconOne();
}else{
p1.innerHTML = i;
}
}, i * 1000);
}
}
// second function
function seconOne() {
for (let i = 0; i < 5; i++) {
setTimeout(() => {
p2.innerHTML = i;
}, i * 1000);
}
}
Just to build on the other answers that have suggested using a Promise, here's a more generalised solution that also uses async/await.
(In summary: call a function with a count, and an element. That function will return a promise that "at some point" work will be completed. An inner function loops updating the element content until that count has been reached, at which point the promise resolves, and the next thing can start).
// Cache the elements
const p1 = document.querySelector('#one');
const p2 = document.querySelector('#two');
const p3 = document.querySelector('#three');
// `timer` accepts a count, and the element
// to apply the count to
function timer(count, el) {
// Return a promise that basically says:
// once I'm done doing this work, resolve,
// and then the event queue can
// get on with the next thing
return new Promise(resolve => {
// So we create a loop that logs the numbers
// in our element up to the count we specified.
// and when that number is reached, resolve the promise
function loop(n = 0) {
// If our current `n` value is <= count
if (n <= count) {
// Set the content of the element
el.textContent = n;
// Call `loop` again after a second
// with an incremented `n` value
setTimeout(loop, 1000, ++n);
// Otherwise resolve the promise
} else {
resolve();
}
}
loop();
});
}
// And now we just await each resolved promise
async function main() {
await timer(4, p1);
await timer(7, p2);
await timer(20, p3);
console.log('Done!');
}
main();
<div id="one"></div>
<div id="two"></div>
<div id="three"></div>
Additional documentation
querySelector

There are 100 promises in an array and we need to process 5 at a time in JS. how to achieve this?

There are 100 promises in an array and we need to process 5 at a time in JS. how to achieve this?
(Asked in Microsoft interview)
Use a pool. There are a number of implementations in JS, such as this one that has a nice looking API:
const PromisePool = require("async-promise-pool");
// concurrency is the only option for PromisePool and enables you to
// choose how many promises will run at once
const pool = new PromisePool({ concurrency: 3 });
// elsewhere add functions to the pool that produce promises. We use
// functions here to prevent the promises from immediately executing.
pool.add(() => thingThatReturnsAPromise());
// you can await pool.all to ensure that all promises in the pool are
// resolved before continuing.
await pool.all();
I would use a function to execute promises in sequence instead of parallel. Once done, create an array of groups of 5 to solve in parallel using Promise.all:
const PROMISES_AMOUNT = 100
const GROUP_AMOUNT = 5
// Function to divide the array in various chuncks of similar size
function chunkArray(myArray, chunk_size){
let index = 0;
let arrayLength = myArray.length;
let tempArray = [];
for (index = 0; index < arrayLength; index += chunk_size) {
myChunk = myArray.slice(index, index+chunk_size);
// Do something if you want with the group
tempArray.push(myChunk);
}
return tempArray;
}
// the promise we will use
function interval(index) {
return new Promise(function(resolve, reject) {
const time = index*100
setTimeout(function() {
console.log(`Waited ${time}!`)
resolve(index);
}, time)
})
};
// Our array of 100 promises
const promises = new Array(PROMISES_AMOUNT).fill(null).map((_, index) => interval(index ))
// The array of 100 promises divided by groups of 5 elements
const groupedPromises = chunkArray(promises, GROUP_AMOUNT).map((promisesGroup) => () => Promise.all(promisesGroup))
// A function to divide an array
function chunkArray(myArray, chunk_size){
var index = 0;
var arrayLength = myArray.length;
var tempArray = [];
for (index = 0; index < arrayLength; index += chunk_size) {
myChunk = myArray.slice(index, index+chunk_size);
// Do something if you want with the group
tempArray.push(myChunk);
}
return tempArray;
}
// A function to execute promises in sequence
const promisesInSequence = (arrayOfTasks) => {
let results = []
return new Promise((resolve, reject) => {
const resolveNext = (arrayOfTasks) => {
// If all tasks are already resolved, return the final array of results
if (arrayOfTasks.length === 0) return resolve(results)
// Extract first promise and solve it
const first = arrayOfTasks.shift()
first().then((res) => {
console.log('Solved a group in parallel: ', res)
results.push(res)
resolveNext(arrayOfTasks)
}).catch((err) => {
reject(err)
})
}
resolveNext(arrayOfTasks)
})
}
promisesInSequence(groupedPromises)
.then((result) => console.log(result))

Creating a Timestamped Object Array for Sampling Data in Javascript?

Goal is to push sampled data, as an object, onto an array, at a periodic interval and wait to log the new array out to the console once it is finalized.
I'm new to JS, so take it easy ;). I am likely making this more complicated than it needs to be. Thought it would be as simple as a setTimeout() in a for loop.
I have been able to generate the array two different ways, using IIFE with a setTimeout() also the setInterval() below. Not sure how to get the async await function working with an array push() method querying length. Maybe this is not a good approach?
class Sample {
constructor(tag, timeStamp) {
this.tag = tag;
this.timeStamp = Date.now();
}
}
function arrayGenerator(tag){
return sampleArr.push(new Sample(tag));
};
function setIntSample(callback, delay, iterations) {
var i = 0;
var intervalID = setInterval(function () {
callback(i);
if (++i === iterations) {
clearInterval(intervalID);
}
}, delay);
};
Above seems to work console.log()-ing the array as it is generated in the arrayGenerator() function. Below, no dice
function resolveAfterArrGeneration(){
return new Promise(resolve => {
arrLength = setIntSample(i => {arrayGenerator(i)}, 3000, 5)
if (arrLength === 5) {resolve();}
});
}
async function ans() {
var answer = await resolveAfterArrGeneration();
console.log(sampleArr);
}
ans();
The basic idea is to return a promise and resolve the promise when the setInterval has run enough iterations. You can do that in a single function with something like this (with extra console.logs to show the process):
class Sample {
constructor(tag, timeStamp) {
this.tag = tag;
this.timeStamp = Date.now();
}
}
function makeSamples(iterations, delay){
let samples = [], i = 0;
return new Promise(resolve => {
let intervalID = setInterval(function () {
console.log("pushing new sample")
samples.push(new Sample('tag: ' + i));
if (++i === iterations) {
console.log("finished resolving")
clearInterval(intervalID);
resolve(samples)
}
}, delay);
})
}
makeSamples(5, 1000).then(console.log)
I would isolate the delay part (the asynchronous) part and create a separate, generic function delay() for that. All the rest becomes simple then, using an async function and for loop:
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
class Sample {
constructor(tag, timeStamp) {
this.tag = tag;
this.timeStamp = Date.now();
}
}
async function setIntSample(callback, ms, iterations) {
const arr = [];
for (let i = 0; i < iterations; i++) {
if (i) await delay(ms); // don't delay first time
arr.push(callback(i));
}
return arr;
}
const newSample = (tag) => new Sample(tag)
console.log("wait for it....");
setIntSample(newSample, 1000, 5).then(console.log);
Another way I just got working with a generator function
function* simpleGenerator(){
var index = 0;
while (true)
yield {tag: index++, time: Date.now()}
}
var gen = simpleGenerator();
..with the corresponding push
arr.push(gen.next().value);

Run concurrent HTTP requests in an async function

I am working on a project that needs an async function that's roughly equivalent to the following
async function task(url) {
var r1 = await fetch(url).then(resp => resp.text());
var r2 = await fetch(url + "/" + r1).then(resp => resp.json());
//r2 is an array of urls
var total = 0;
for (var u of r2) {
tmp = await fetch(u).then(resp => resp.text());
total += parseInt(tmp)
}
return total
}
The issue is that there are hundreds of elements in r2, each of the element is an URL. If I do it sequentially, this function will take a loooong time to complete. I would like to run 10 URLs concurrently (could be adjusted to other numbers), wonder how would I rewrite the async function.
Chunk the initial array into pieces of 10, then wait for each chunk to complete with Promise.all before starting the next one:
async function getTotal(urlPart, subArr) {
const resps = await Promise.all(subArr.map(url =>
fetch(url).then(resp => resp.json())
))
return resps.reduce((a, b) => a + b);
}
async function task(url) {
const r1 = await fetch(url).then(resp => resp.text());
const r2 = await fetch(url + "/" + r1).then(resp => resp.json());
const chunks = [];
const { length } = r2
for (let i = 0; i < length; i += 10) {
chunks.push(r2.slice(i, i + 10));
}
let total = 0;
for (const subArr of chunks) {
total += await getTotal(urlPart, subarr);
}
return total;
}
Here's some code I created years ago that allows you to create a "parallel" queue
const makeQueue = length => {
length = (isNaN(length) || length < 1) ? 1 : length;
const q = Array.from({length}, () => Promise.resolve());
let index = 0;
const add = cb => {
index = (index + 1) % length;
return (q[index] = q[index].then(() => cb()));
};
return add;
};
This will allow up to 10 simultaneous requests (or whatever you pass in as the argument)
In your code, I guess you could use it like
async function task(url) {
const q = makeQueue(10); // 10 requests at a time
var r1 = await fetch(url).then(resp => resp.text());
var r2 = await fetch(url + "/" + r1).then(resp => resp.json());
return Promise.all(r2.map(u => q(() => fetch(u).then(resp => resp.text())))).then(v => v.map(parseInt).reduce((a, b) => a+b));
}
the return can also be
return Promise.all(r2.map(u => q(() => fetch(u).then(resp => resp.text()).then(parseInt)))).then(v => v.reduce((a, b) => a+b));
broken down that is equivalent of
const fetch1 = u => fetch(u).then(resp => resp.text()).then(parseInt);
const promises = r2.map(u => q(() => fetch1(u)));
return Promise.all(promises).then(v => v.reduce((a, b) => a+b));
The benefit of this method is that there should be 10 requests "on the go" for a maximum amount of time
Note, browsers tend to limit the number of simultaneous requests per host, so you may not see any improvement with queue size greater than 6 (I think that's the most common limit)
Appreciate all the good answers here! I studied them and come up with the following solution which I think is slightly simpler (for many of us beginners) :-)
This solution doesn't divid all the url-fetching jobs in the beginning because it's uncertain how much time each url-fetching will take.
Instead it makes each worker go through all the urls, if a url is assigned to another worker, it will just move on to next one.
var tasks
var total = 0
var gId = 0
var workerId
manager(4)
async function manager(numOfWorkers) {
var workers = []
tasks = r2.map(function(u) {return {id: gId++, assigned: -1, url: u }})
for (var i=0; i<numOfWorkers; i++) { workers.push(worker()) }
await Promise.all(workers)
console.log(total)
}
async function worker() {
var wid = workerId; workerId ++;
var tmp;
for (var u of tasks) {
if (u.assigned == -1) {
u.assigned = wid;
console.log("unit " + u.id + " assigned to " + wid)
tmp = await fetch(u.url).then(r=>r.text())
total += parseInt(tmp);
}
}
}
In short, ditch the await. By using await, you are literally telling it to wait here until it is done with this one thing.
If you want to parallelize them, make use of Promise.all(). Any async function returns a Promise which can still be used like a normal Promise. Promise.all() accepts an array of Promise objects, and will call then() once all of those requests are done, giving you an array of the results from each.
You could do something like this:
const urls = [/* bunch of URLs */];
Promise.all(
urls.map(url =>
fetch(url).then(res => res.text())
)
).then(results => /* do something with results */)
In this case, results will be an array of the results from your various requests, in the same order as they were passed in.
Now, if you want to be able to have a specific number of them running at a time, you'd want to change it up a bit and have some limits on what's going on.
I usually use a technique which just uses a simple counter to keep track of how many are active, and then fires off more when they are done.
You can do something like this:
// dummy fetch for example purposes, resolves between .2 and 3 seconds
const fakeFetch = url => new Promise(resolve => setTimeout(() => resolve(url), Math.random() * 2800 + 200));
const inputUrls = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
const limit = 2; // this sets the limit of how many can run at once, set to 10 to run 10 concurrently
const delay = 100; // delay in ms between each batch starting
function fetchAll(urls) {
let active = 0;
let queue = urls.slice(0); // clone urls
// inner function so urls and results can be shared with all calls
function fetchAllInner() {
if (active < limit && queue.length) {
const count = Math.min(limit - active, queue.length);
const urlsThisBatch = queue.slice(0, count);
queue = queue.slice(count); // remaining
return Promise.all(
urlsThisBatch.map(url => {
active++; // increment active
console.log('start', url);
return fakeFetch(url)
.then(r => {
console.log('done', url);
active--; // decrement active
return new Promise(resolve => // new Promise to promisify setTimeout
setTimeout(() =>
resolve(fetchAllInner() // kicks off run again when one finishes
.then(fetchR => [].concat(r, fetchR)) // combine them
), delay
)
);
})
})
).then(r => r.reduce((a, u) => [].concat(u, a), [])); // flatten from Promise.all()
}
return Promise.resolve([]); // final resolve
}
return fetchAllInner();
}
fetchAll(inputUrls)
.then(results => console.log('all done', results));
In a nutshell, what this is doing is it'll create a Promise.all() for a batch (however many we can start up until we hit our limit). Then, when one finishes, it'll set a timeout to start up another batch by recursively calling the same function. It's wrapped in another function simply to avoid having to have some variables be global.
This also has an added delay if you want, so you can throttle how many requests you'll make and not hammer the system too bad. If you don't want to use a delay, you can just set it to 0 or remove the new Promise(resolve => setTimeout bit.
The above version is a bit verbose to make it easier to understand. Here is a more "production-ready" version (be sure to switch fakeFetch to fetch and handle calling res.text())
const fakeFetch = url => new Promise(resolve => setTimeout(() => resolve(url), Math.random() * 2800 + 200));
function fetchAll(urls, limit = 10, delay = 200) {
let active = 0;
const queue = urls.splice(0);
function fetchAllInner() {
if (active >= limit || !queue.length) {
return Promise.resolve([]);
}
const count = Math.min(limit - active, queue.length);
active = limit;
return Promise.all(
queue.splice(0, count)
.map(url => fakeFetch(url)
.then(r => {
active--;
return new Promise(resolve =>
setTimeout(() => resolve(
fetchAllInner().then(fetchR => [].concat(r, fetchR))
), delay)
);
})
)
).then(r =>
r.reduce((a, u) => [].concat(u, a), []));
}
return fetchAllInner();
}
console.log('give it a few seconds');
fetchAll(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
.then(r => console.log('all done', r))

Categories

Resources