I have such a loop :
var i,j,temparray,chunk = 200;
for (i=0,j=document.mainarray.length; i<j; i+=chunk) {
temparray = document.mainarray.slice(i,i+chunk);
var docs = collection.find({ id: { "$in": temparray}}).toArray();
docs.then(function(singleDoc)
{
if(singleDoc)
{
console.log("single doc length : " + singleDoc.length);
var t;
for(t = 0, len = singleDoc.length; t < len;t++)
{
fs.appendFile("C:/Users/x/Desktop/names.txt", singleDoc[t].name + "\n", function(err) {
if(err) {
return console.log(err);
}
});
}
}
});
}
The loop iterates for two times. In first iteration it gets 200 elements, in second, it gets 130 elements. And when I open the .txt file, I see only 130 names. I guess because of the async nature of Node.js, only second part of the array is processed. What should I do to get all parts of the array to be processed? Thanks in advance.
EDIT : I finally turned the code to this :
var generalArr = [];
var i,j,temparray,chunk = 200;
for (i=0,j=document.mainarray.length; i<j; i+=chunk) {
temparray = document.mainarray.slice(i,i+chunk);
generalArr.push(temparray);
}
async.each(generalArr, function(item, callback)
{
var docs = collection.find({ id: { "$in": item}}).toArray();
docs.then(function(singleDoc)
{
if(singleDoc)
{
console.log("single doc length : " + singleDoc.length);
var t;
for(t = 0, len = singleDoc.length; t < len;t++)
{
fs.appendFile("C:/Users/x/Desktop/names.txt", singleDoc[t].name + "\n", function(err) {
if(err) {
return console.log(err);
}
});
}
}
});
callback(null);
})
When I change this line :
var docs = collection.find({ id: { "$in": item}}).toArray();
To this line :
var docs = collection.find({ id: { "$in": item}}).project({ name: 1 }).toArray();
It works, I'm able to print all names. I guess there is a problem with memory when I try without .project(). How can I make this work without using project? Should I change some memory limits? Thanks in advance.
I think your code is unnecessary complicated and appending file in a loop is very expensive when compared to in-memory computation. A better way would be to write to file just once.
var i, j, temparray, chunk = 200;
for (i = 0, j = document.mainarray.length; i < j; i += chunk) {
temparray = document.mainarray.slice(i, i + chunk);
generalArr.push(temparray);
}
const queryPromises = [];
generalArr.forEach((item, index) => {
queryPromises.push(collection.find({ id: { "$in": item } }).toArray());
});
let stringToWrite = '';
Promise.all(queryPromises).then((result) => {
result.forEach((item) => {
item.forEach((element) => {
//create a single string which you want to write
stringToWrite = stringToWrite + "\n" + element.name;
});
});
fs.appendFile("C:/Users/x/Desktop/names.txt", stringToWrite, function (err) {
if (err) {
return console.log(err);
} else {
// call your callback or return
}
});
});
In the code above, I do the following.
Wait for all the db queries to finish
Lets iterate over this list and create one string that we need to write to the file
Write to the file
Once you go asynchronous you cannot go back - all your code needs to be asynchronous. In node 8 you handle this with async and await keywords. In older versions you can use Promise - async/await are just syntax sugar for it anyway.
However, most of the API in node are older than Promise, and so they use callbacks instead. There is a promisify function to update callback functions to promises.
There are two ways to handle this, you can let all the asynchronous actions happen at the same time, or you can chain them one after another (which preserves order but takes longer).
So, collection.find is asynchronous, it either takes a callback function or returns a Promise. I'm going to assume that the API you're using does the latter, but your problem could be the former (in which case look up promisify).
var findPromise = collection.find({ id: { "$in": item}});
Now, at this point findPromise holds the running find action. We say this is a promise that resolves (completes successfully) or rejects (throws an error). We want to queue up an action to do once it completes, and we do that with then:
// The result of collection.find is the collection of matches
findPromise.then(function(docs) {
// Any code we run here happens asynchronously
});
// Code here will run first
Inside the promise we can return further promises (allowing them to be chained - complete one async, then complete the next, then fire the final resolve once all done) or use Promise.all to let them all happen in parallel and resolve once done:
var p = new Promise(function(resolve, reject) {
var findPromise = collection.find({ id: { "$in": item}});
findPromise.then(function(docs) {
var singleDocNames = [];
for(var i = 0; i < docs.length; i++) {
var singleDoc = docs[i];
if(!singleDoc)
continue;
for(var t = 0; t < singleDoc.length; t++)
singleDocNames.push(singleDoc[t].name);
}
// Resolve the outer promise with the final result
resolve(singleDocNames);
});
});
// When the promise finishes log it to the console
p.then(console.log);
// Code inline here will fire before the promise
This is much easier in node 8 with async/await:
async function p() {
// Await puts the rest of this function in the .then() of the promise
const docs = await collection.find({ id: { "$in": item}});
const singleDocNames = [];
for(var i = 0; i < docs.length; i++) {
// ... synchronous code unchanged ...
}
// Resolve the outer promise with the final result
return singleDocNames;
});
// async functions can be treated like promises
p().then(console.log);
If you need to write the results to a text file asynchronously there are a couple of ways to do it - you can wait until the end and write all of them, or chain a promise to write them after each find, though I find parallel IO operations tend to be at more risk of deadlocks.
Code above have multiple issues about asynchronous control flow. Similar code possible can exists, but only if case of using ES7 async/await operators on all async operation.
Of course, you can easily achieve solution by promises sequence. Solution:
let flowPromise = Promise.resolve();
const chunk = 200;
for (let i=0,j=document.mainarray.length; i<j; i+=chunk) {
flowPromise = flowPromise.then(() => {
const temparray = document.mainarray.slice(i,i+chunk);
const docs = collection.find({ id: { "$in": temparray}}).toArray();
return docs.then((singleDoc) => {
let innerFlowPromise = Promise.resolve();
if(singleDoc) {
console.log("single doc length : " + singleDoc.length);
for(let t = 0, len = singleDoc.length; t < len;t++) {
innerFlowPromise = innerFlowPromise.then(() => new Promise((resolve, reject) =>
fs.appendFile(
"C:/Users/x/Desktop/names.txt", singleDoc[t].name + "\n",
err => (err ? reject(err) : resolve())
)
));
}
}
return innerFlowPromise;
}
});
}
flowPromise.then(() => {
console.log('Done');
}).catch((err) => {
console.log('Error: ', err);
})
When use async-like control flow, based on Promises, always remember that every loop and function call sequence will not pause execution till async operation be done, so include all then sequences manually. Or use async/await syntax.
Which version of nodejs are you using? You should use the native async/await support which is built into newer versions nodejs (no libraries required). Also note, fs.appendFile is asyncronous so you need to either use a library like promisify to transform the callback into a promise or just use the appendFileSync and suffer the blocking IO (but might be okay for you, depending on the use case.)
async function(){
...
for(var item of generalArr) {
var singleDoc = await collection.find({ id: { "$in": item}}).toArray();
// if(singleDoc) { this won't do anything, since collection.find will always return something even if its just an empty array
console.log("single doc length : " + singleDoc.length);
var t;
for(t = 0, len = singleDoc.length; t < len;t++){
fs.appendFileSync("C:/Users/x/Desktop/names.txt", singleDoc[t].name + "\n");
}
};
}
var docs = collection.find({ id: { "$in": document.mainarray}}), // returns a cursor
doc,
names = [],
toInsert;
function saveToFile(cb) {
toInsert = names.splice(0,100);
if(!toInsert.length) return cb();
fs.appendFile("C:/Users/x/Desktop/names.txt", toInsert.join("\n"), cb);
}
(function process() {
if(docs.hasNext()) {
doc = docs.next();
doc.forEach(function(d) {
names.push(d.name);
});
if(names.length === 100) {
// save when we have 100 names in memory and clear the memory
saveToFile(function(err) {
process();
});
} else {
process();
}
} else {
saveToFile(function(){
console.log('All done');
});
}
}()); // invoke the function
If you can't solve your issue using core modules and basic nodejs, there is most likely a lack of understanding of how things work or insufficient knowledge about a library (in this case FileSystem module).
Here is how you can solve your issue, without 3th party libraries and such.
'use strict';
const
fs = require('fs');
let chunk = 200;
// How many rounds of array chunking we expect
let rounds = Math.ceil(mainArray.length/chunk);
// copy to temp (for the counter)
let tempRounds = rounds;
// set file name
let filePath = './names.txt'
// Open writable Stream
let myFileStream = fs.createWriteStream(filePath);
// from round: 0-${rounds}
for (let i = 0; i < rounds; i++) {
// assume array has ${chunk} elements left in this round
let tempChunk = chunk;
// if ${chunk} is to big i.e. i=3 -> chunk = 600 , but mainArray.length = 512
// This way we adjust the last round for "the leftovers"
if (mainArray.length < i*chunk) tempChunk = Math.abs(mainArray.length - i*chunk);
// slice it for this round
let tempArray = mainArray.slice(i*chunk, i*chunk + tempChunk);
// get stuff from DB
let docs = collection.find({ id: { "$in": tempArray}}).toArray();
docs.then(function(singleDoc){
// for each name in the doc
for (let j = 0; j < singleDoc.length; j++) {
// write to stream
myFileStream.write(singleDoc[t].name + "\n");
}
// declare round done (reduce tempRounds) and check if it hits 0
if (!--tempRounds) {
// if all rounds are done, end the stream
myFileStream.end();
// BAM! you done
console.log("Done")
}
});
}
The key is to use fs.WritableStreams :)
link here to docs
Related
I'm trying to write the following code and make it work synchronously, but the only problem that it works correctly with console.log, which prints me every item in array with delay in 1 second, but don't work with the following structure:
for (let i = 0; i < array.length; i++) {
setTimeout(function () {
1.http request via rp or request.get (I receive a huge data array)
2. .map results
3.insert to Mongo via mongoose
}
}
as for now I have the following code inside:
request.get({url: array[i].url}), function (error, body) {
body.map(element => {
//do stuff, it works fine
});
collection.insertMany(body, function (err, docs) {
//#justloggerthings
}
Or I have almost the same version with rp instead of request.get
By default I have mongoose.Promise = global.Promise;
Why this cause a problem? Because body.length is very huge dataset which eat a lot of RAM. (Now imagine 20+ arrays with insertMany)
So Mongo trying to insertMany all responses from request at once (when they ready, w/o 1000s delay). Actually that's why I choose request instead of rp (request-promise) but it seems look async too. So should I choose another http get module from npm and switch to it. And not to worry about it?
Or should I wrap this operations to promise || made an async function and recall it inside loop every time (1000s for example) when I it's correctly finished. In this case, the only thing which I found actual on StackOverflow is:
How to insert data to mongo synchronously (Nodejs, Express)
Bit it's a bit outdated. So any ideas?
Well, i dont have your actual code so i will write in pseudo code what you can do.
const chunkArray = (array, chunkSize) => {
let i,j,chunks = [];
for (i = 0, j = array.length; i < j; i += chunkSize) {
chunks.push(array.slice(i, i + chunkSize));
}
return chunks;
}
for (let i = 0; i < array.length; i++) {
let bigArray = await request('your data url');
// do some mapping or whatever
// then break your large array into smaller chunks
let chunks = chunkArray(bigArray, 10);
let j;
for (j = 0; j < chunks.length; j++) {
await collection.insertMany(chunks[j]);
}
}
Actual code that solve my problem is:
async function test (name, url, lastModified) {
try {
const response = await rp({uri: url, json: true});
response.map(async (element) => {
if (element.buyout > 0) {
element.price = (element.buyout / element.quantity);
}
element.lastModified = lastModified
});
return collection.insertMany(response);
} catch (err) {
console.error(err)
}
}
async function addAsync() {
const z = await test();
console.log(z);
}
addAsync();
I am trying to build a downloader that automatically retries downloading. Basically, a task queue which retries tasks for a certain number of times. I first tried using Promise.all() but the "trick" to circumvent the fail-on-first-reject described here did not help (and is an anti-pattern as described further down in that thread)
So I got a version working which seems to somewhat do what I want. At least the results it prints are correct. But it still throws several uncaught exception test X errors/warnings and I don't know what to do about that.
The Code:
asd = async () => {
// Function simulating tasks which might fail.
function wait(ms, data) {
return new Promise( (resolve, reject) => setTimeout(() => {
if (Math.random() > 0.5){
resolve(data);
} else {
reject(data);
}
}, ms) );
}
let tasks = [];
const results = [];
// start the tasks
for ( let i = 0; i < 20; i++) {
const prom = wait(100 * i, 'test ' + i);
tasks.push([i, prom]);
}
// collect results and handle retries.
for ( let tries = 0; tries < 10; tries++){
failedTasks = [];
for ( let i = 0; i < tasks.length; i++) {
const task_idx = tasks[i][0];
// Wait for the task and check whether they failed or not.
// Any pointers on how to improve the readability of the next 6 lines appreciated.
await tasks[i][1].then(result => {
results.push([task_idx, result])
}).catch(err => {
const prom = wait(100 * task_idx, 'test ' + task_idx);
failedTasks.push([task_idx, prom])
});
}
// Retry the tasks which failed.
if (failedTasks.length === 0){
break;
} else {
tasks = failedTasks;
}
console.log('try ', tries);
}
console.log(results);
}
In the end, the results array contains (unless a task failed 10 times) all the results. But still uncaught exceptions fly around.
As not all rejected promises result in uncaught exceptions, my suspicion is, that starting the tasks first and applying then()/catch() later is causing some timing issues here.
Any improvements or better solutions to my problems are appreciated. E.g. my solution only allows retries "in waves". If anyone comes up with a better continuous solution, that would be much appreciated as well.
Using await and asnyc allows to solve that in a much clearer way.
You pass an array of tasks (functions that when executed start the given task) to the execute_tasks. This function will call for each of those tasks the execute_task, passing the task function to it, the execute_task will return a Promise containing the information if the task was successful or not.
The execute_task as a loop that loops until the async task was successful or the maximum number of retries reached.
Because each of the tasks has its own retry loop you can avoid those waves. Each task will queue itself for a new execution as it fails. Using await this way creates some kind of cooperative multitasking. And all errors are handled because the task is executed in a try catch block.
function wait(ms, data) {
return new Promise((resolve, reject) => setTimeout(() => {
if (Math.random() > 0.5) {
resolve(data);
} else {
reject(new Error());
}
}, ms));
}
async function execute_task(task) {
let result, lastError;
let i = 0
//loop until result was found or the retry count is larger then 10
while (!result && i < 10) {
try {
result = await task()
} catch (err) {
lastError = err
// maybe sleep/wait before retry
}
i++
}
if (result) {
return { success: true, data: result }
} else {
return { success: false, err: lastError }
}
}
async function execute_tasks(taskList) {
var taskPromises = taskList.map(task => execute_task(task))
// the result could be sorted into failed and not failed task before returning
return await Promise.all(taskPromises)
}
var taskList = []
for (let i = 0; i < 10; i++) {
taskList.push(() => {
return wait(500, {
foo: i
})
})
}
execute_tasks(taskList)
.then(result => {
console.dir(result)
})
I seem to be having an issue when trying to write to a file in a loop, the loop is iterating even though the first file has not been created (I think i am either not understanding promises or the asynchronous nature of the script)
So on the command line i will run node write_file_script.js premier_league
// teams.js
module.exports = {
premier_league: [
{ team_name: "Team 1", friendly_name: "name 1"},
{ team_name: "Team 2", friendly_name: "name 2"}
]
}
My Script
const args = process.argv.slice(2);
const TEAM = require('./teams');
const Excel = require('exceljs');
const workbook = new Excel.Workbook();
for (var team = 0; team < TEAM[args].length; team++) {
console.log("Starting Excel File generation for " + TEAM[args][team]['team_name']);
var fhcw = require('../data_files/home/fhcw/' + TEAM[args][team]['friendly_name'] + '_home' + '.json');
fhcw = fhcw.map(Number);
workbook.xlsx.readFile('./excel_files/blank.xlsx')
.then(function() {
var worksheet = workbook.getWorksheet(1);
// Write FHCW
for (i=0; i < fhcw.length; i++) {
col = i+6;
worksheet.getCell('E'+ col).value = fhcw[i];
}
console.log(TEAM[args][team])
workbook.xlsx.writeFile('./excel_files/' + TEAM[args] + '/' + TEAM[args][team]['friendly_name'] + '.xlsx');
});
}
The output i get when running this is
Starting Excel File generation for Team 1
Starting Excel File generation for Team 2
undefined
(node:75393) UnhandledPromiseRejectionWarning: Unhandled promise rejection
(rejection id: 1): TypeError: Cannot read property 'friendly_name' of undefined
So it seems like the file is not being written but the loop continues, how can i ensure the file is written before moving onto the next loop?
Thanks
If the function is returning a Promise, and you want to do them serially (one at a time) instead of in parallel (start all at the same time), you'll need to wait for each before you start the next using then().
Also, note that your TEAM is just an export of an array (at least as presented), so you can't give it args, which is where the other error comes from.
When you have a list of things to do, the best way to do them is to have a queue which you run until you run out of files. In this case, it looks like your TEAM array is your queue, but since this is an export, I'd recommend not necessarily changing this, but instead copy it to another array which you can alter:
const args = process.argv.slice(2);
const TEAM = require('./teams');
const Excel = require('exceljs');
const workbook = new Excel.Workbook();
const writeNextFile = (queue) => {
// If we have nothing left in the queue, we're done.
if (!queue.length) {
return Promise.resolve();
}
const team = queue.shift(); // get the first element, take it out of the array
console.log("Starting Excel File generation for " + team.team_name);
var fhcw = require('../data_files/home/fhcw/' + team.friendly_name + '_home' + '.json');
fhcw = fhcw.map(Number);
// return this promise chain
return workbook.xlsx.readFile('./excel_files/blank.xlsx')
.then(function() {
var worksheet = workbook.getWorksheet(1);
// Write FHCW
for (i=0; i < fhcw.length; i++) {
col = i+6;
worksheet.getCell('E'+ col).value = fhcw[i];
}
console.log(team);
// not sure what you thought this would TEAM[args] would have translated to, but it wouldn't have been a string, so I put ??? for now
// also, making the assumption that writeFile() returns a Promise.
return workbook.xlsx.writeFile('./excel_files/' + team.??? + '/' + team.friendly_name + '.xlsx');
}).then(() => writeNextFile(queue));
}
writeNextFile(TEAM.slice(0)) // make a copy of TEAM so we can alter it
.then(() => console.log('Done'))
.catch(err => console.error('There was an error', err));
Basically, our function takes an array and will write the first team, then call itself recursively to call the next. Eventually it'll all resolve and you're end up with is a Promise that resolves at the end.
When it comes to Promises, you basically always need to chain them together. You won't be able to use a for loop for them, or any other standard loop for that matter.
If you wanted to write them all at once, it is a little cleaner, because you can just do a non-recursive map for each and use Promise.all to know when they are done.
const writeNextFile = (team) => {
console.log("Starting Excel File generation for " + team.team_name);
var fhcw = require('../data_files/home/fhcw/' + team.friendly_name + '_home' + '.json');
fhcw = fhcw.map(Number);
// return this promise chain
return workbook.xlsx.readFile('./excel_files/blank.xlsx')
.then(function() {
var worksheet = workbook.getWorksheet(1);
// Write FHCW
for (i=0; i < fhcw.length; i++) {
col = i+6;
worksheet.getCell('E'+ col).value = fhcw[i];
}
console.log(team);
// not sure what you thought this would TEAM[args] would have translated to, but it wouldn't have been a string, so I put ??? for now
// also, making the assumption that writeFile() returns a Promise.
return workbook.xlsx.writeFile('./excel_files/' + team.??? + '/' + team.friendly_name + '.xlsx');
});
}
Promise.all(TEAM.map(writeTeamFile))
.then(() => console.log('Done')
.catch(err => console.error('Error'));
Promises are for async, and generally when you have a group of things, you want to do them in parallel because it's faster. That's why the parallel version is a lot cleaner; we don't have to call things recursively.
Since you are not writing to the same file, you can loop through the files and perform read/write operations.
You can bind the index value in the loop, and move ahead.
Example code.
var filesWrittenCount = 0;
for(var team = 0; team < TEAM[args].length; team++){
(function(t){
// call your async function here.
workbook.xlsx.readFile('./excel_files/blank.xlsx').then(function() {
var worksheet = workbook.getWorksheet(1);
// Write FHCW
for (i=0; i < fhcw.length; i++) {
col = i+6;
worksheet.getCell('E'+ col).value = fhcw[i];
}
console.log(TEAM[args][t])
workbook.xlsx.writeFile('./excel_files/' + TEAM[args] + '/' + TEAM[args][t]['friendly_name'] + '.xlsx');
// update number of files written.
filesWrittenCount++;
if (totalFiles == filesWrittenCount) {
// final callback function - This states that all files are updated
done();
}
});
}(team));
}
function done() {
console.log('All data has been loaded');
}
Hope this helps.
Maybe this is a general issue, and i need a solution to my case : due to the non blocking aspect of javascript , I don't find how can I execute my function with all iteration in for loop , and here is my example ,
var text_list=[]
for (var i = 0; i < 10; i++) {
var element = array[index];
tesseract.process("img"+i+".jpg", options, function (err, text) {
if (err) {
return console.log("An error occured: ", err);
}
text_list.push(text)
});
}
console.log(text_list) //
And the result as if I do :
tesseract.process("img"+9+".jpg"...
tesseract.process("img"+9+".jpg"...
tesseract.process("img"+9+".jpg"...
.
.
.
and what i need is :
tesseract.process("img"+0+".jpg"...
tesseract.process("img"+1+".jpg"...
tesseract.process("img"+2+".jpg"...
.
.
.
Your question does not really explain what result you are getting and your code looks like it's missing parts of the code. So, all I can really do here to help is to explain generically (using your code where possible) how to solve this class of problem.
If you are ending up with a lot of results that all reference the last value of i in your loop, then you are probably trying to reference i in an async callback but because the callback is called sometime later, the for loop has already finished long before the callback executes. Thus, your value of i is sitting on the last value it would have in the for loop. But, your question doesn't actually show code that does that, so this is just a guess based on the limited result you describe. To solve that type of issue, you have make sure you're separately keeping track of i for each iteration of the loop. There are many ways to do that. In ES6, using let in the for loop definition will solve that entire issue for you. One can also construct a closure, use .forEach(), etc...
Async operations with a loop require extra work and coding to deal with. The modern solution is to convert your async operations to use promises and then use features such as Promise.all() to both tell you when all the async operations are done and to keep the results in order for you.
You can also code it manually without promises. Here's a manual version:
const len = 10;
let text_list = new Array(10);
let doneCnt = 0;
let errFlag = false;
// using let here so each invocation of the loop gets its own value of i
for (let i = 0; i < len; i++) {
tesseract.process("img"+i+".jpg", options, function (err, text) {
if (err) {
console.log("An error occured: ", err);
// make sure err is wrapped in error object
// so you can tell errors in text_list array from values
if (!(err instanceof Error)) {
err = new Error(err);
}
text_list[i] = err;
errFlag = true;
} else {
text_list[i] = text;
}
// see if we're done with all the requests
if (++doneCnt === len) {
if (errFlag) {
// deal with situation where there were some errors
} else {
// put code here to process finished text_list array
}
}
});
}
// you can't process results here because async operations are not
// done yet when code here runs
Or, using promises, you can make a "promisified" version of tesseract.process() and then use promise functionality to track multiple async operations:
// make promisified version of tesseract.process()
tesseract.processP = function(img, options) {
return new Promise(function(resolve, reject) {
tesseract.process(img, options, function(err, text) {
if (err) {
reject(err)
} else {
resolve(text);
}
});
});
}
const len = 10;
let promises = [];
for (let i = 0; i < len; i++) {
promises.push(tesseract.processP("img"+i+".jpg", options));
}
Promise.all(promises).then(function(results) {
// process results array here (in order)
}).catch(function(err) {
// handle error here
});
I have an array that contains an array of promises, and each inner array could have either 4k, 2k or 500 promises.
In total there are around 60k promises and I may test it with other values as well.
Now I need to execute the Promise.all(BigArray[0]).
Once the first inner array is done, I need to execute the next Promise.all(BigArray[1]) and so on and so on.
If I try to execute a Promise.all(BigArray) it's throwing:
fatal error call_and_retry_2 allocation failed - process out of memory
I need to execute it each of promises in series, not in parallel which I think that’s what Node its doing. I shouldn't use new libs however am willing to consider the answer!.
Edit:
Here is an example piece of code:
function getInfoForEveryInnerArgument(InnerArray) {
const CPTPromises = _.map(InnerArray, (argument) => getDBInfo(argument));
return Promise.all(CPTPromises)
.then((results) => {
return doSomethingWithResults(results);
});
}
function mainFunction() {
BigArray = [[argument1, argument2, argument3, argument4], [argument5, argument6, argument7, argument8], ....];
//the summ of all arguments is over 60k...
const promiseArrayCombination = _.map(BigArray, (InnerArray, key) => getInfoForEveryInnerArgument(InnerArray));
Promise.all(promiseArrayCombination).then((fullResults) => {
console.log(fullResults);
return fullResults;
})
}
An answer from October 2020. Async/await makes it short: only 10 code lines+JSDoc.
/**
* Same as Promise.all(items.map(item => task(item))), but it waits for
* the first {batchSize} promises to finish before starting the next batch.
*
* #template A
* #template B
* #param {function(A): B} task The task to run for each item.
* #param {A[]} items Arguments to pass to the task for each call.
* #param {int} batchSize
* #returns {Promise<B[]>}
*/
async function promiseAllInBatches(task, items, batchSize) {
let position = 0;
let results = [];
while (position < items.length) {
const itemsForBatch = items.slice(position, position + batchSize);
results = [...results, ...await Promise.all(itemsForBatch.map(item => task(item)))];
position += batchSize;
}
return results;
}
Your question is a bit misnamed which may have confused some folks in this question and in the previous version of this question. You are trying to execute a batch of async operations in series, one batch of operations, then when that is done execute another batch of operations. The results of those async operations are tracked with promises. Promises themselves represent async operations that have already been started. "Promises" aren't executed themselves. So technically, you don't "execute a batch of promises in series". You execute a set of operations, track their results with promises, then execute the next batch when the first batch is all done.
Anyway, here's a solution to serializing each batch of operations.
You can create an inner function which I usually call next() that lets you process each iteration. When the promise resolves from processing one innerArray, you call next() again:
function mainFunction() {
return new Promise(function(resolve, reject) {
var bigArray = [[argument1, argument2, argument3, argument4], [argument5, argument6, argument7, argument8], ....];
//the summ of all arguments is over 60k...
var results = [];
var index = 0;
function next() {
if (index < bigArray.length) {
getInfoForEveryInnerArgument(bigArray[index++]).then(function(data) {
results.push(data);
next();
}, reject);
} else {
resolve(results);
}
}
// start first iteration
next();
});
}
This also collects all the sub-results into a results array and returns a master promise who's resolved value is this results array. So, you could use this like:
mainFunction().then(function(results) {
// final results array here and everything done
}, function(err) {
// some error here
});
You could also use the .reduce() design pattern for iterating an array serially:
function mainFunction() {
var bigArray = [[argument1, argument2, argument3, argument4], [argument5, argument6, argument7, argument8], ....];
return bigArray.reduce(function(p, item) {
return p.then(function(results) {
return getInfoForEveryInnerArgument(item).then(function(data) {
results.push(data);
return results;
})
});
}, Promise.resolve([]));
}
This creates more simultaneous promises than the first option and I don't know if that is an issue for such a large set of promises (which is why I offered the original option), but this code is cleaner and the concept is convenient to use for other situations too.
FYI, there are some promise add-on features built for doing this for you. In the Bluebird promise library (which is a great library for development using promises), they have Promise.map() which is made for this:
function mainFunction() {
var bigArray = [[argument1, argument2, argument3, argument4], [argument5, argument6, argument7, argument8], ....];
return Promise.map(bigArray, getInfoForEveryInnerArgument);
}
#jfriend00 Just adding to your answer using async/await with reduce:
function runPromisesInSeries(bigArray, getInfoForEveryInnerArgument) {
try {
return bigArray.reduce(async (acc, cItem) => {
const results = await acc
const data = await getInfoForEveryInnerArgument(cItem)
results.push(data)
return results
}, Promise.resolve([]))
} catch (err) {
throw err
}
}
In addition, if original array is not of promises but of objects that should be processed, batch processing can be done without an external dependency using combination of Array.prototype.map(), Array.prototype.slice() and Promise.all():
// Main batch parallelization function.
function batch(tasks, pstart, atonce, runner, pos) {
if (!pos) pos = 0;
if (pos >= tasks.length) return pstart;
var p = pstart.then(function() {
output('Batch:', pos / atonce + 1);
return Promise.all(tasks.slice(pos, pos + atonce).map(function(task) {
return runner(task);
}));
});
return batch(tasks, p, atonce, runner, pos + atonce);
}
// Output function for the example
function output() {
document.getElementById("result").innerHTML += Array.prototype.slice.call(arguments).join(' ') + "<br />";
window.scrollTo(0, document.body.scrollHeight);
}
/*
* Example code.
* Note: Task runner should return Promise.
*/
function taskrunner(task) {
return new Promise(function(resolve, reject) {
setTimeout(function() {
output('Processed:', task.text, 'Delay:', task.delay);
resolve();
}, task.delay);
});
}
var taskarray = [];
function populatetasks(size) {
taskarray = [];
for (var i = 0; i < size; i++) {
taskarray.push({
delay: 500 + Math.ceil(Math.random() * 50) * 10,
text: 'Item ' + (i + 1)
});
}
}
function clean() {
document.getElementById("result").innerHTML = '';
}
var init = Promise.resolve();
function start() {
var bsize = parseInt(document.getElementById("batchsize").value, 10),
tsize = parseInt(document.getElementById("taskssize").value, 10);
populatetasks(tsize);
init = batch(taskarray.slice() /*tasks array*/ , init /*starting promise*/ , bsize /*batch size*/ , taskrunner /*task runner*/ );
}
<input type="button" onclick="start()" value="Start" />
<input type="button" onclick="clean()" value="Clear" /> Batch size:
<input id="batchsize" value="4" size="2"/> Tasks:
<input id="taskssize" value="10" size="2"/>
<pre id="result" />
You can do it recursively, for example here I needed to put about 60k documents in mongo, but it was too big, to do it in one step, therefore I take 1k documents, send them to the mongo, after it is finished I take another 1k documents etc.
exports.rawRecursive = (arr, start) => {
//ending condition
if (start > arr.length) {
return;
}
Rawmedicament.insertManyAsync(_.slice(arr, start, start + 1000)).then(() => {
//recursive
exports.rawRecursive(arr, start + 1000);
});
};
If you want to notice, when everything is done, you can in ending condition put callback or if you like Promises you can call resolve() there.
Dynamically batching more promises
A simple implementation where you can have a queue of tasks batched to run in parallel and add more dynamically:
class TaskQueue {
constructor ({
makeTask,
initialData = [],
getId = data => data.id,
batchSize = 15,
onComplete = () => {},
}) {
if (!makeTask) throw new Error('The "makeTask" parameter is required');
this.makeTask = makeTask;
this.getId = getId;
this.batchSize = batchSize;
this.onComplete = onComplete;
this.queue = new Map();
this.add(initialData);
}
add(...data) {
data.forEach(item => {
const id = this.getId(item);
if (this.queue.has(id)) return;
this.queue.set(id, item);
});
// running automatically on create or additional items added
this.runNextBatch();
}
runNextBatch () {
if (this.queueStarted) return;
if (this.queue.size === 0) return;
this.queueStarted = true;
const currentBatchData = Array.from(this.queue.values()).slice(0, this.batchSize);
const tasks = currentBatchData.map(data => {
const id = this.getId(data);
// Have some error handling implemented in `makeTask`
this.makeTask(data)
.finally(() => this.queue.delete(id));
});
return Promise.all(tasks)
.then(() => {
this.queueStarted = false;
this.runNextBatch();
})
.finally(() => {
this.queueStarted = false;
if (this.queue.size === 0) this.onComplete();
});
}
}
// Usage
const lotOfFilesForUpload = [{ uri: 'file://some-path' }, { uri: 'file://some-other-path' }];
const upload = (file) => console.log('fake uploading file: ', file);
const taskQueue = new TaskQueue({
initialData: lotOfFilesForUpload,
getId: file => file.uri,
makeTask: file => upload(file),
onComplete: () => console.log('Queue completed'),
});
// You can add more tasks dynamically
taskQueue.add({ uri: 'file://yet-another-file' });