Process an array (which updates while processing it) synchronously - javascript

I have an array arr and I need to run a function on each of its value. However the array is updated by the time the loop process finished processing the array.
For example, arr has 1000 usernames, with 10 new usernames per second.
How can you run a sync task on this constantly updating array?
It is also possible that no more usernames get added to the array, so it should have a completion phase. The usernames can then start coming in the array again even if it has been completed, so I will need to handle the re-start of the task as well.
The function that I run on the array elements (usernames) are async, IE there's a setTimeout in it.

You could use a queue to have a list of waiting items and complete items.
The guts of the posted code is
while (this.queue.length) {
this.complete.push(this.mapper(this.queue.pop()))
}
We are pulling the latest value from the queue, modifying it with the mapper function and adding it to the complete list.
class Queue {
constructor(queue, mapper) {
this.queue = queue || []
this.complete = []
this.mapper = mapper
// start running the stack processing
this.map()
}
// start processing the stack
map() {
// loop over the stack until it's empty
while (this.queue.length) {
this.complete.push(this.mapper(this.queue.pop()))
}
console.log('complete processing', this.complete.length, 'items')
}
add(val) {
console.log('add', val)
// add value to the stack
this.queue.unshift(val)
// run the stack processing
this.map()
}
// get the complete stack
completed() {
return this.complete
}
}
// just a random function to modify the stack contents
const toHex = item => {
const hex = item.toString(16)
return '0x' + (hex < 10 ? '0' + hex : hex)
}
// instantiate your new stack
const queue = new Queue([1, 2, 3, 4, 5, 6, 7], toHex)
// nothing to see here, it's just to mock up the asynchronous adding
// of items to the stack
const startTime = Date.now()
const timer = () => {
const now = Date.now()
queue.add(now - startTime)
if (now - startTime < 1000) {
setTimeout(timer, parseInt(Math.random() * 30))
}
}
timer()

Related

Why does setInterval never run, in my NodeJs code that streams an generator to file?

I have this situation in my NodeJs code, which calculates permutations (code from here), but no matter what I don't get any output from setInterval.
const { Readable } = require('stream');
const { intervalToDuration, formatDuration, format } = require('date-fns');
const { subsetPerm } = require('./permutation');
function formatLogs(counter, permStart) {
const newLocal = new Date();
const streamTime = formatDuration(intervalToDuration({
end: newLocal.getTime(),
start: permStart.getTime()
}));
const formattedLogs = `wrote ${counter.toLocaleString()} patterns, after ${streamTime}`;
return formattedLogs;
}
const ONE_MINUTES_IN_MS = 1 * 60 * 1000;
let progress = 0;
let timerCallCount = 1;
let start = new Date();
const interval = setInterval(() => {
console.log(formatLogs(progress, start));
}, ONE_MINUTES_IN_MS);
const iterStream = Readable.from(subsetPerm(Object.keys(Array.from({ length: 200 })), 5));
console.log(`Stream started on: ${format(start, 'PPPPpppp')}`)
iterStream.on('data', () => {
progress++;
if (new Date().getTime() - start.getTime() >= (ONE_MINUTES_IN_MS * timerCallCount)) {
console.log(`manual timer: ${formatLogs(progress, start)}`)
timerCallCount++;
if (timerCallCount >= 3) iterStream.destroy();
}
});
iterStream.on('error', err => {
console.log(err);
clearInterval(interval);
});
iterStream.on('close', () => {
console.log(`closed: ${formatLogs(progress, start)}`);
clearInterval(interval);
})
console.log('done!');
But what I find is that it prints 'done!' (expected) and then the script seems to end, even though if I put a console.log in my on('data') callback I get data printed to the terminal. But even hours later the console.log in the setInterval never runs, as nothing ends up on file, besides the output from the on('close',...).
The output log looks like:
> node demo.js
Stream started on: Sunday, January 30th, 2022 at 5:40:50 PM GMT+00:00
done!
manual timer: wrote 24,722,912 patterns, after 1 minute
manual timer: wrote 49,503,623 patterns, after 2 minutes
closed: wrote 49,503,624 patterns, after 2 minutes
The timers in node guide has a section called 'leaving timeouts behind' which looked relevant. But where I though using interval.ref(); told the script to not garbage collect the object until .unref() is called on the same timeout object, on second reading that's not quite right, and doesn't make a difference.
I'm running this using npm like so npm run noodle which just points to the file.
The generator is synchronous and blocks the event loop
Readable.from processes the whole generator in one go, so if the generator is synchronous and long running it blocks the event loop.
Here is the annotated code that it runs:
async function next() {
for (;;) {
try {
const { value, done } = isAsync ?
await iterator.next() : // our generator is not asynchronous
iterator.next();
if (done) {
readable.push(null); // generator not done
} else {
const res = (value &&
typeof value.then === 'function') ?
await value :
value; // not a thenable
if (res === null) {
reading = false;
throw new ERR_STREAM_NULL_VALUES();
} else if (readable.push(res)) { // readable.push returns false if it's been paused, or some other irrelevant cases.
continue; // we continue to the next item in the iterator
} else {
reading = false;
}
}
} catch (err) {
readable.destroy(err);
}
break;
}
}
Here is the api for readable.push, which explains how this keeps the generator running:
Returns: true if additional chunks of data may continue to be pushed; false otherwise.
Nothing has told NodeJs not to continue pushing data, so it carries on.
Between each run of the event loop, Node.js checks if it is waiting for any asynchronous I/O or timers and shuts down cleanly if there are not any.
I raised this as a NodeJs Github Issue and ended up workshopping this solution:
cosnt yieldEvery = 1e5;
function setImmediatePromise() {
return new Promise(resolve => setImmediate(resolve));
}
const iterStream = Readable.from(async function* () {
let i = 0
for await (const item of baseGenerator) {
yield item;
i++;
if (i % yieldEvery === 0) await setImmediatePromise();
}
}());
This is partly inspired by this snyk.io blog, which goes into more detail on this issue.

Forking tasks workflow in Javascript

I'm doing some tests to learn to fork different tasks in JavaScript as I'm new to the lenguage. I'm trying to sum every thre number group from a plain text file formated as following:
199
200
208
210
200
207
(199, 200, 208) is the first group, (200, 208, 210) is the second one, etc...
I read from the file, splited the string and got my array of strings. Now I want to do the adding in a loop that forks every iteration (in the subprocess is where the sum is being made) and print the resulting array of summed numbers.
parent.js
const fs = require('fs');
const { fork } = require('child_process');
const readString = fs.readFileSync('depth_readings_p2.txt', 'utf8');
const readArray = readString.split('\n');
var numArrayDef = [];
for (let i = 0; i < readArray.length - 2; i++) {
let msg = {
i,
readArray
};
let childProcess = fork('function.js');
childProcess.send(msg);
childProcess.on('message', (m) => {
console.log(m);
numArrayDef.push(m);
});
console.log(numArrayDef[i]);
}
As you see I'm sending the subprocess and object that includes the index, the array of strings and the array where the summed number will be stored. The parent process recieves the summed number and stores it in numArrayDef.
function.js
process.on('message', (msg) => {
let num = 0;
if ((msg.i + 2) < msg.readArray.length) {
num += parseInt(msg.readArray[msg.i]);
num += parseInt(msg.readArray[msg.i + 1]);
num += parseInt(msg.readArray[msg.i + 2]);
process.send(num);
}
process.exit();
});
In the output I can see that the parent is receiving everything correctly, but the program isn't pushing the received values into the result array. Also, the order of execution is weird:
- First, everything in the loop but the message receiving block.
- Second, everything after the loop ends.
- Finally, the message receiving block.
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
[]
607
618
618
617
647
716
769
792
I know I'm missing something about forking processes, but I don't know what is it and I don't see it in the fork documentation.
What you have to understand in nodejs is it's asynchronious nature, the code is not really executed in order as you have written it! (atleast, a lot of times..)
The childProcess is a process handle which will be returned immediatly. But the forked process itself may take some time to start. What you do, is to add a callback which will be executed every time, a message event is received. Check this code:
parent.js
let childProcess = fork('function.js');
// this line is executed immedatly after the handle is created.
// You pass a newly created function to the ".on()" function which will be
// called everytime, the child process sends a "message" event.
// you want to understand, that you just declare an anonymious `function`
// and pass it as argument. So the executed function has actually to decide
// when to call it!.
childProcess.on('message', (m) => {
console.log('message received in parent:', m)
console.log('closing the process')
childProcess.kill('SIGINT')
});
childProcess.on('exit', () => {
console.log('child is done!')
})
childProcess.send('I will come back!')
console.log('last line reached. Program still running.')
function.js
process.on('message', (msg) => {
// wait a few seconds, and return the message!
setTimeout(() => {
process.send(msg)
// wait 2000ms
}, 2000)
}
output
last line reached. Program still running.
message received in parent: I will come back!
closing the process
child is done!
execution order
Fork a process and get it's handle. Code execution goes on!
Register callback listeners which will be called on given events like message OR exit. These are actually asynchronious. You don't know when they kick in.
log that all lines have been executed
Some time later, the message listener and after it, the exit listener kick in.
your code
You code basically executes till the end (only adding handlers to a process handle) and will log data from numArrayDef which is not currently added to it. So if no element is present at numArrayDef[5], it will log undefined per default.
callbacks
Since nodejs is single threaded per default, it's common to xecute an asynchronious function and pass it a callback (just another function) which will be executed when your called function is done!
The fixed code
parent.js
const fs = require('fs');
const { fork } = require('child_process');
const { EOL } = require('os')
const readString = fs.readFileSync('file.txt', 'utf8');
const readArray = readString.split(EOL);
var numArrayDef = [];
for (let i = 0; i < readArray.length - 2; i++) {
// msg building. Done instantly.
let msg = {
i,
readArray
};
// forking a childprocess. The handle is retunred immediatly
// but starting the process may be taking some time, and
// the code won't wait for it!.
let childProcess = fork('function.js');
// this line is executed immedatly after the handle is created.
// You add a so
childProcess.on('message', (m) => {
console.log('message recevied', m)
numArrayDef.push(m);
// log if all numbers are done.
if(numArrayDef.length === readArray.length -2) {
console.log('Done. Here\'s the array:', numArrayDef)
}
});
childProcess.send(msg);
}
function.js
process.on('message', (msg) => {
let num = 0;
if ((msg.i + 2) < msg.readArray.length) {
num += parseInt(msg.readArray[msg.i]);
num += parseInt(msg.readArray[msg.i + 1]);
num += parseInt(msg.readArray[msg.i + 2]);
process.send(num);
}
process.exit();
});
This should give you an idea. I recommend, going for some tutorials in the beginning to understand the nature of the language.
What you should learn about nodejs
Learn what a callback is
Basic understanding of async/await and Promises is a must
You should learn, what operations are sync and which ones are async
Eventemitter class is also used very often
Learning how to handle childprocess or fork and other similar stuff, is not really required to get the base understanding of nodejs
Function declaration
Just an addon to the syntax. These are almost exactly the same. Except that with the arrow function style the this context will be correctly applied to newly created function:
// variant1
function abc(fn) {
// eexcute the argument which is a function. But only after
// a timeout!.
setTimeout(fn, 2000)
}
// variant2
const abc = function(fn) {
// eexcute the argument which is a function. But only after
// a timeout!.
setTimeout(fn, 2000)
}
// variant3
const abc = (fn) => {
// eexcute the argument which is a function. But only after
// a timeout!.
setTimeout(fn, 2000)
}
// call it like so:
abc(function() {
console.log('I was passed!!.')
})
console.log('The abc function was called. Let\'s wait for it to call the passed function!\')

Go to next for of loop iteration only when

I have a for of loop:
for (const element of $array) {
let my_value = await GM.getValue("my_value");
}
Works fine.
Now, I only want to go to the next loop iteration when my_value is not empty. If it is empty, I want it to wait for my_value to have content before moving on to the next iteration.
So far, I've tried a do..while approach:
for (const element of $array) {
do {
let my_value = await GM.getValue("my_value");
if (my_value) continue;
} while (0);
}
But it's not working.
What's the correct approach to do this?
The short answer here is that there should be nothing more to do, as GM.getValue should only be resolving it's promise when it has a value to return. You shouldn't need to poll that method to "wait" for it to have a response.
Let me illustrate this with a mockup. In this example, my getValue method resolves after a short delay. This simulates it doing some work (perhaps loading some data from a database, or receiving something from a 3rd party system - it doesn't matter). My loop only continues after that action has taken place
const GM = {
getValue: function(input) {
return new Promise(resolve => setTimeout(resolve,2000, input))
}
}
const $array = [1,2,3,4,5];
(async () => {
for (const element of $array) {
let my_value = await GM.getValue("my_value");
console.log(element, my_value);
}
})()
Base on discuses on comments above, We can say base on your code no need for do while and nested loop, you can handling it via control async/await only...I set a condition for break loop and make a wait behave...
What its do when loop is start via async/awais the loop will wait the promise and you can what you need to do...
const sleep = ms => {
return new Promise(resolve => setTimeout(resolve, ms))
}
const forLoop = async () => {
let loop = true;
for (const variable of [1, 2, 3, 4, 5]) {
await sleep(1000);// Wait
console.log(variable * 2, "DoWhile");// Then do somthing
// Break when you need and base any condition...
if(variable * 2 === 6){
break;
}
console.log(variable, "above")
}
}
forLoop()
https://jsfiddle.net/75o3xh2a/

How to limit async task until theres no job in loop?

In my code i trying to learn async, i have 700 async task to do, the job will done in random time.
My question how to limit async task in loop?
Let say i want it to do 30 job at start time. and watch event each async task done it will start 1 task to fill limit task 30 at time again until 700 task, or theres no task again.
For now loop will execute all async task in same time. its not i want.
function JobdeskAsync(){
console.log(Math.floor(Math.random() * 1000));
}
function finishedTime(max, min){
return Math.floor(Math.random() * (max - min) ) + min;
}
for ( let i = 0; i < 700; i++){
setTimeout(JobdeskAsync, finishedTime(5000, 1000));
}
#Wendelin has a great example there. For how it directly applies to your application, you currently are:
Looping through 700 asynchronous Jobs
Executing them all immediately (to finish as and when)
What you need to do is be able to recognise when you have reached your pool maximum (30) and not add/execute any more until that pool depletes. I'm not writing the code for you because there's a thousand ways to do it, but at it's base what you need:
Functionality to store executing jobs (call it a "store")
Functionality to add jobs to the store
Functionality to see if that store is full
Combine 2/3 so that when #3 is not true you can do #2
You need to start 30 jobs that continue processing data until there is no more.
Here there is a queue of data that each job pulls from. If there is no data left, it returns a simple promise that resolves to 'done'. If there is data, it processes it and returns the promise from a new call to createJob().
const queue = [];
// fill queue with data to process
for (let i = 0; i < 70; i++) {
queue.push(i);
}
function createJob() {
// pull data to work on from the queue
const data = queue.shift();
// if no data is remaining, return promsie that resolves to 'done'
if (data === undefined) {
return Promise.resolve('done');
}
// otherwise we do some work on the data
return new Promise((resolve, reject) => {
// simulate work by waiting 500-1500ms
setTimeout(() => {
console.log(`handled ${data}`);
// this resolves our promise with another promise, createJob()
resolve(createJob());
}, 500 + Math.random() * 1000);
});
}
// create 30 jobs and wait for all to complete, each job will
// create a new job when it has finished and chain the results
const jobs = [];
for (let i = 0; i < 30; i++) {
jobs.push(createJob());
}
console.log('30 jobs running...');
Promise.all(jobs).then(() => console.log('ALL JOBS COMPLETED'));

Timed promise queue / throttle

I have a request-promise function that makes a request to an API. I'm rate-limited by this API and I keep getting the error message:
Exceeded 2 calls per second for api client. Reduce request rates to resume uninterrupted service.
I'm running a couple of Promise.each loops in parallel which is causing the issue, if I run just one instance of Promise.each everything runs fine. Within these Promise.each calls they lead to the same function a with a request-promise call. I want to wrap this function with another queue function and set the interval to 500 milliseconds so that a request isn't made after one another, or parallel, but set to that time, on queue. The thing is I still need these promises to get their contents even if it takes a rather long time to get a response.
Is there anything that will do this for me? Something I can wrap a function in and it will respond at a set interval and not in parallel or fire functions one after another?
Update: Perhaps it does need to be promise specific, I tried to use underscore's throttle function
var debug = require("debug")("throttle")
var _ = require("underscore")
var request = require("request-promise")
function requestSite(){
debug("request started")
function throttleRequest(){
return request({
"url": "https://www.google.com"
}).then(function(response){
debug("request finished")
})
}
return _.throttle(throttleRequest, 100)
}
requestSite()
requestSite()
requestSite()
And all I got back was this:
$ DEBUG=* node throttle.js
throttle request started +0ms
throttle request started +2ms
throttle request started +0ms
Update
The last answer was wrong, this works but I still think I can do better:
// call fn at most count times per delay.
const debounce = function (fn, delay, count) {
let working = 0, queue = [];
function work() {
if ((queue.length === 0) || (working === count)) return;
working++;
Promise.delay(delay).tap(() => working--).then(work);
let {context, args, resolve} = queue.shift();
resolve(fn.apply(context, args));
}
return function debounced() {
return new Promise(resolve => {
queue.push({context: this, args: arguments, resolve});
if (working < count) work();
});
};
};
function mockRequest() {
console.log("making request");
return Promise.delay(Math.random() * 100);
}
var bounced = debounce(mockRequest, 800, 5);
for (var i = 0; i < 5; i++) bounced();
setTimeout(function(){
for (var i = 0; i < 20; i++) bounced();
},2000);
So you need to make the requests throttle function-wide - that's fine. Promises have queueing pretty much built in.
var p = Promise.resolve(); // our queue
function makeRequest(){
p = p.then(function(){ // queue the promise, wait for the queue
return request("http://www.google.com");
});
var p2 = p; // get a local reference to the promise
// add 1000 ms delay to queue so the next caller has to wait
p = p.delay(1000);
return p2;
};
Now makeRequest calls will be at least 1000ms apart.
jfriend has pointed out that you need two requests per second and not a single one - this is just as easily solvable with a second queue:
var p = Promise.resolve(1); // our first queue
var p2 = Promise.resolve(2); // our second queue
function makeRequest(){
var turn = Promise.any([p, p2]).then(function(val){
// add 1000 ms delay to queue so the next caller has to wait
// here we wait for the request too although that's not really needed,
// check both options out and decide which works better in your case
if(val === 1){
p = p.return(turn).delay(1, 1000);
} else {
p2 = p2.return(turn).delay(1, 1000);
}
return request("http://www.google.com");
});
return turn; // return the actual promise
};
This can be generalized to n promises using an array similarly

Categories

Resources