Javascript: Infinite loop in webworker [duplicate] - javascript

I want to sort an array, using Web Workers. But this array might receive new values over time, while the worker is still performing the sort function.
So my question is, how can I "stop" the sorting computation on the worker after receiving the new item, so it can perform the sort on the array with that item, while still keeping the sorting that was already made?
Example:
let worker = new Worker('worker.js');
let list = [10,1,5,2,14,3];
worker.postMessage({ list });
setInterval(() => worker.postMessage({ num: SOME_RANDOM_NUM, list }), 100);
worker.onmessage = event => {
list = event.data.list;
}
So lets say that, I've passed 50, the worker made some progress in the sorting before that and now I have something like this:
[1, 2, 3, 10, 5, 14, 50]. Which means the sorting stopped at index 3. So I pass this new array back to the worker, so it can continue the sorting from position 3.
How can I accomplish that, since there is no way to pause/resume a web worker?

Even though the Worker works on an other thread than the one of your main page, and can thus run continuously without blocking the UI, it still runs on a single thread.
This means that until your sort algorithm has finished, the Worker will delay the execution of the message event handler; it is as blocked as would be the main thread.
Even if you made use of an other Worker from inside this worker, the problem would be the same.
The only solution would be to use a kind of generator function as the sorter, and to yield it every now and then so that the events can get executed.
But doing this will drastically slow down your sorting algorithm.
To make it better, you could try to hook to each Event Loop, thanks to a MessageChannel object: you talk in one port and receive the message in the next Event loop. If you talk again to the other port, then you have your own hook to each Event loop.
Now, the best would be to run a good batch in every of these Event loop, but for demo, I'll call only one instance of our generator function (that I borrowed from this Q/A)
const worker = new Worker(getWorkerURL());
worker.onmessage = draw;
onclick = e => worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel
// every frame we request the current state from Worker
function requestFrame() {
worker.postMessage('gimme a frame');
requestAnimationFrame(requestFrame);
}
requestFrame();
// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;
function draw(evt) {
// converts 0&1 to black and white pixels
const list = evt.data;
list.forEach((bool, i) =>
data[i] = (bool * 0xFFFFFF) + 0xFF000000
);
ctx.setTransform(1,0,0,1,0,0);
ctx.clearRect(0,0,canvas.width,canvas.height);
ctx.putImageData(img,0,0);
// draw bigger
ctx.scale(5,5);
ctx.drawImage(canvas, 0,0);
}
function getWorkerURL() {
const script = document.querySelector('[type="worker-script"]');
const blob = new Blob([script.textContent]);
return URL.createObjectURL(blob);
}
body{
background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let sorter = bubbleSort(list);
let done = false;
/* inner messaging channel */
const msg_channel = new MessageChannel();
// Hook to every Event loop
msg_channel.port2.onmessage = e => {
// procede next step in sorting algo
// could be a few thousands in a loop
const state = sorter.next();
// while running
if(!state.done) {
msg_channel.port1.postMessage('');
done = false;
}
else {
done = true;
}
}
msg_channel.port1.postMessage("");
/* outer messaging channel (from main) */
self.onmessage = e => {
if(e.data === "gimme a frame") {
self.postMessage(list);
}
else {
list.push(e.data);
if(done) { // restart the sorter
sorter = bubbleSort(list);
msg_channel.port1.postMessage('');
}
}
};
function* bubbleSort(a) { // * is magic
var swapped;
do {
swapped = false;
for (var i = 0; i < a.length - 1; i++) {
if (a[i] > a[i + 1]) {
var temp = a[i];
a[i] = a[i + 1];
a[i + 1] = temp;
swapped = true;
yield swapped; // pause here
}
}
} while (swapped);
}
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>
Note that the same can be achieved with an async function, which may be more practical in some cases:
const worker = new Worker(getWorkerURL());
worker.onmessage = draw;
onclick = e => worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel
// every frame we request the current state from Worker
function requestFrame() {
worker.postMessage('gimme a frame');
requestAnimationFrame(requestFrame);
}
requestFrame();
// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;
function draw(evt) {
// converts 0&1 to black and white pixels
const list = evt.data;
list.forEach((bool, i) =>
data[i] = (bool * 0xFFFFFF) + 0xFF000000
);
ctx.setTransform(1,0,0,1,0,0);
ctx.clearRect(0,0,canvas.width,canvas.height);
ctx.putImageData(img,0,0);
// draw bigger
ctx.scale(5,5);
ctx.drawImage(canvas, 0,0);
}
function getWorkerURL() {
const script = document.querySelector('[type="worker-script"]');
const blob = new Blob([script.textContent]);
return URL.createObjectURL(blob);
}
body{
background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let done = false;
/* outer messaging channel (from main) */
self.onmessage = e => {
if(e.data === "gimme a frame") {
self.postMessage(list);
}
else {
list.push(e.data);
if(done) { // restart the sorter
bubbleSort(list);
}
}
};
async function bubbleSort(a) { // async is magic
var swapped;
do {
swapped = false;
for (var i = 0; i < a.length - 1; i++) {
if (a[i] > a[i + 1]) {
const temp = a[i];
a[i] = a[i + 1];
a[i + 1] = temp;
swapped = true;
}
if( i % 50 === 0 ) { // by batches of 50?
await waitNextTask(); // pause here
}
}
} while (swapped);
done = true;
}
function waitNextTask() {
return new Promise( (resolve) => {
const channel = waitNextTask.channel ||= new MessageChannel();
channel.port1.addEventListener("message", (evt) => resolve(), { once: true });
channel.port2.postMessage("");
channel.port1.start();
});
}
bubbleSort(list);
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>

There are two decent options.
Option 1: Worker.terminate()
The first is just to kill your existing web worker and start a new one. For that you can use Worker.terminate().
The terminate() method of the Worker interface immediately terminates the Worker. This does not offer the worker an opportunity to finish its operations; it is simply stopped at once.
The only downsides of this approach are:
You lose all worker state. If you had to copy a load of data into it for the request you have to do it all again.
It involves thread creation and destruction, which isn't as slow as most people think but if you terminate web workers a lot it might cause issues.
If neither of those are an issue it is probably the easiest option.
In my case I have lots of state. My worker is rendering part of an image, and when the user pans to a different area I want it to stop what it is doing and start rendering the new area. But the data needed to render the image is pretty huge.
In your case you have the state of your (presumably huge) list that you don't want to use.
Option 2: Yielding
The second option is basically to do cooperative multitasking. You run your computation as normal, but every now and then you pause (yield) and say "should I stop?", like this (this is for some nonsense calculation, not sorting).
let requestId = 0;
onmessage = event => {
++requestId;
sortAndSendData(requestId, event.data);
}
function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
This won't work though because sortAndSendData() runs to completion and blocks the web worker's event loop. We need some way to yield just before thisRequestId !== requestId. Unfortunately Javascript doesn't quite have a yield method. It does have async/await so we might try this:
let requestId = 0;
onmessage = event => {
console.log("Got event", event);
++requestId;
sortAndSendData(requestId, event.data);
}
async function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
await Promise.resolve();
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
console.log("Cancelled!");
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
Unfortunately it doesn't work. I think it's because async/await executes things eagerly using "microtasks", which get executed before pending "macrotasks" (our web worker message) if possible.
We need to force our await to become a macrotask, which you can do using setTimeout(0):
let requestId = 0;
onmessage = event => {
console.log("Got event", event);
++requestId;
sortAndSendData(requestId, event.data);
}
function yieldToMacrotasks() {
return new Promise((resolve) => setTimeout(resolve));
}
async function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
await yieldToMacrotasks();
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
console.log("Cancelled!");
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
This works! However it is extremely slow. await yieldToMacrotasks() takes approximately 4 ms on my machine with Chrome! This is because browsers set a minimum timeout on setTimeout(0) of something like 1 or 4 ms (the actual minimum seems to be complicated).
Fortunately another user pointed me to a quicker way. Basically sending a message on another MessageChannel also yields to the event loop, but isn't subject to the minimum delay like setTimeout(0) is. This code works and each loop only takes ~0.04 ms which should be fine.
let currentTask = {
cancelled: false,
}
onmessage = event => {
currentTask.cancelled = true;
currentTask = {
cancelled: false,
};
performComputation(currentTask, event.data);
}
async function performComputation(task, data) {
let total = 0;
let promiseResolver;
const channel = new MessageChannel();
channel.port2.onmessage = event => {
promiseResolver();
};
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
// Yield to the event loop.
const promise = new Promise(resolve => {
promiseResolver = resolve;
});
channel.port1.postMessage(null);
await promise;
// Check if this task has been superceded by another one.
if (task.cancelled) {
return;
}
}
// Return the result.
postMessage(total);
}
I'm not totally happy about it - it relies on postMessage() events being processed in FIFO order, which I doubt is guaranteed. I suspect you could rewrite the code to make it work even if that isn't true.

You can do it with some trick – with the help of setTimeout function interrupting. For example it is not possible without an addition thread to execute 2 functions parallel, but with setTimeout function interrupting trick we can do it like follows:
Example of parallel execution of functions
var count_0 = 0,
count_1 = 0;
function func_0()
{
if(count_0 < 3)
setTimeout(func_0, 0);//the same: setTimeout(func_0);
console.log('count_0 = '+count_0);
count_0++
}
function func_1()
{
if(count_1 < 3)
setTimeout(func_1, 0);
console.log('count_1 = '+count_1)
count_1++
}
func_0();
func_1();
You will get this output:
count_0 = 0
count_1 = 0
count_0 = 1
count_1 = 1
count_0 = 2
count_1 = 2
count_0 = 3
count_1 = 3
Why is it possible? Because the setTimeout function needs some time to be executed. And this time is even enought for the execution of some part from your following code.
Solution for you
For this case you have to write your own array sort function (or you can also use the following function from me) because we can not interrupt the native sort function. And in this your own function you have to use this setTimeout function interrupting trick. And you can receive your message event notification.
In the following example I have the interrupting in the half length of my array, and you can change it if you want.
Example with custom sort function interrupting
var numbers = [4, 2, 1, 3, 5];
// this is my bubble sort function with interruption
/**
* Sorting an array. You will get the same, but sorted array.
* #param {array[]} arr – array to sort
* #param {number} dir – if dir = -1 you will get an array like [5,4,3,2,1]
* and if dir = 1 in opposite direction like [1,2,3,4,5]
* #param {number} passCount – it is used only for setTimeout interrupting trick.
*/
function sortNumbersWithInterruption(arr, dir, passCount)
{
var passes = passCount || arr.length,
halfOfArrayLength = (arr.length / 2) | 0; // for ex. 2.5 | 0 = 2
// Why we need while loop: some values are on
// the end of array and we have to change their
// positions until they move to the first place of array.
while(passes--)
{
if(!passCount && passes == halfOfArrayLength)
{
// if you want you can also not write the following line for full break of sorting
setTimeout(function(){sortNumbersWithInterruption(arr, dir, passes)}, 0);
/*
You can do here all what you want. Place 1
*/
break
}
for(var i = 0; i < arr.length - 1; i++)
{
var a = arr[i],
b = arr[i+1];
if((a - b) * dir > 0)
{
arr[i] = b;
arr[i+1] = a;
}
}
console.log('array is: ' + arr.join());
}
if(passCount)
console.log('END sring is: ' + arr.join());
}
sortNumbersWithInterruption(numbers, -1); //without passCount parameter
/*
You can do here all what you want. Place 2
*/
console.log('The execution is here now!');
You will get this output:
array is: 4,2,3,5,1
array is: 4,3,5,2,1
The execution is here now!
array is: 4,5,3,2,1
array is: 5,4,3,2,1
END sring is: 5,4,3,2,1

You can do it with insertion sort (kind of).
Here is the idea:
Start your worker with an internal empty array (empty array is sorted obviously)
Your worker receives only elements not the entire array
Your worker insert any received element right in correct position into the array
Every n seconds, the worker raises a message with the current array if it has changed after the last event. (If you prefer, you can send the array on every insertion, but is more efficient to buffer somehow)
Eventually, you get the entire array, if any item is added, you will receive the updated array to.
NOTE: Because your array is always sorted, you can insert in correct position using binary search. This is very efficient.

I think the case comes down to careful management of postMessage calls and amount of data passed to be processed at a time. Was dealing with problem of this kind - think about not sending all new data into the function at once but rather creating your own queue and when small enough portion of the task has been acomplished by webworker thread send a message back to the main thread and decide to send the next portion, wait or quit.
In Your case, e.g. one time You get 9000 new items, next 100k - maybe create a queue/buffer that adds next 10k new elements each time webworker is done processing last data change.
const someWorker = new Worker('abc.js');
var processingLock = false;
var queue = [];
function newDataAction(arr = null) {
if (arr != null) {
queue = queue.concat(arr);
}
if (!processingLock) {
processingLock = true;
var data = [];
for (let i = 0; i < 10000 && queue.length > 0; i++) {
data.push(queue.pop());
}
worker.postMessage(data);
}
}
someWorker.addEventListener('message', function(e) {
if (e.data == 'finished-last-task') {
processingLock = false;
if (queue.length > 0) {
newDataAction();
}
}
});
Worked through many sorting algorithms and I don't see how sending new data into an sorting algorithm with partially sorted array makes much difference in terms of compuation time from sorting them both sequentially and performing a merge.

Related

Associative array not filling up in server-sent-event environement

I think I'm on the wrong track here:
I have an event source that gives me updates on the underlying system oprations. The page is intended to show said events in a jquery powered treetable. I receieve the events perfectly but I realized that there were a case I did not handle, the case where an event arrives but is missing it's parent. In this case I need to fetch the missing root plus all potentially missing children of that root node from the database. This works fine too.
//init fct
//...
eventSource.addEventListener("new_node", onEventSourceNewNodeEvent);
//...
function onEventSourceNewNodeEvent(event) {
let data = event.data;
if (!data)
return;
let rows = $(data).filter("tr");
rows.each(function (index, row) {
let parentEventId = row.getAttribute("data-tt-parent-id");
let parentNode = _table.treetable("node", parentEventId);
// if headless state is not fully
// resolved yet keep adding new rows to array
if (headlessRows[parentEventId]) {
headlessRows[parentEventId].push(row);
return;
} else if (parentEventId && !parentNode) { // headless state found
if (!headlessRows[parentEventId])
headlessRows[parentEventId] = [];
headlessRows[parentEventId].push(row);
fetchMissingNodes(parentEventId);
return;
}
insertNode(row, parentNode);
});
}
function fetchMissingNodes(parentEventId) {
let url = _table.data("url") + parentEventId;
$.get(url, function (data, textStatus, request) {
if (!data)
return;
let rows = $(data).filter("tr");
//insert root and children into table
_table.treetable("loadBranch", null, rows);
let parentNode = _table.treetable("node", parentEventId);
let lastLoadedRow = $(rows.last());
let headlessRowsArray = headlessRows[parentEventId];
while (headlessRowsArray && headlessRowsArray.length > 0) {
let row = headlessRowsArray.shift();
let rowId = row.getAttribute("data-tt-id");
if (rowId <= lastLoadedRow) // already loaded event from previous fetch
continue;
insertNode(row, parentNode);
let pendingUpdatesArray = pendingUpdates[rowId];
// shouldn't be more than one but who know future versions
while (pendingUpdatesArray && pendingUpdatesArray.length > 0) {
let updateEvent = headlessRowsArray.shift();
updateNode(updateEvent)
}
delete pendingUpdates[rowId]; // <- something better here?
}
delete headlessRows[parentEventId]; // <- something better here too?
});
}
The problem is around the line if (headlessRows[parentEventId]).
When I run it step by step (putting a debugger instruction just before) everything works fine, the headless array is created and filled correctly.
But as soon as I let it run full speed everything breaks.
The logs I printed seems to indicate that the array is not behaving in the way I was expecting it to. If I print the array with a console.log it shows as follow :
(2957754) [empty × 2957754]
length : 2957754
__proto__ : Array(0)
It seems to be missing any actual data. whereas it shows as follow when I execute it step by step:
(2957748) [empty × 2957747, Array(1)]
2957747:[tr.node.UNDETERMINED]
length:2957748
__proto__:Array(0)
I'm missing something but it is still eluding me.
your code is async, you do http request but you treat him as synchronized code.
try this fix
//init fct
//...
eventSource.addEventListener("new_node", onEventSourceNewNodeEvent);
//...
async function onEventSourceNewNodeEvent(event) {
let data = event.data;
if (!data)
return;
let rows = $(data).filter("tr");
rows.each(function (index, row) {
let parentEventId = row.getAttribute("data-tt-parent-id");
let parentNode = _table.treetable("node", parentEventId);
// if headless state is not fully
// resolved yet keep adding new rows to array
if (headlessRows[parentEventId]) {
headlessRows[parentEventId].push(row);
return;
} else if (parentEventId && !parentNode) { // headless state found
if (!headlessRows[parentEventId])
headlessRows[parentEventId] = [];
headlessRows[parentEventId].push(row);
await fetchMissingNodes(parentEventId);
return;
}
insertNode(row, parentNode);
});
}
function fetchMissingNodes(parentEventId) {
return new Promise((resolve,reject) =>{
let url = _table.data("url") + parentEventId;
$.get(url, function (data, textStatus, request) {
if (!data){
resolve()
return;
}
let rows = $(data).filter("tr");
//insert root and children into table
_table.treetable("loadBranch", null, rows);
let parentNode = _table.treetable("node", parentEventId);
let lastLoadedRow = $(rows.last());
let headlessRowsArray = headlessRows[parentEventId];
while (headlessRowsArray && headlessRowsArray.length > 0) {
let row = headlessRowsArray.shift();
let rowId = row.getAttribute("data-tt-id");
if (rowId <= lastLoadedRow) // already loaded event from previous fetch
continue;
insertNode(row, parentNode);
let pendingUpdatesArray = pendingUpdates[rowId];
// shouldn't be more than one but who know future versions
while (pendingUpdatesArray && pendingUpdatesArray.length > 0) {
let updateEvent = headlessRowsArray.shift();
updateNode(updateEvent)
}
delete pendingUpdates[rowId]; // <- something better here?
}
delete headlessRows[parentEventId]; // <- something better here too?
resolve()
});
})
}

Prevent browser freezing and crashing for long time calculation

I need check in my database names who are duplicated and change this name to avoid duplicates. I using script suggested by #Jefré N.
function eliminateDuplicates() {
var repeats = {};
var error = false;
//cache inputs
var $inputs = $("input[type='text']");
//loop through inputs and update repeats
for (i = 0; i < $inputs.length; ++i) {
//cache current element
var cur = $inputs[i];
//remove class
$(cur).removeClass("double-error");
//get text of this element
var text = $(cur).val();
//no text -- continue
if (text === "") {
continue;
}
//first time we've came across this value -- intialize it's counter to 1
if ((text in repeats) === false) {
repeats[text] = 1;
}
//repeat offender. Increment its counter.
else {
repeats[text] = repeats[text] + 1;
}
//update the the value for this one
$(cur).val(text + "-" + repeats[text]);
}
return error; // always returns false since I'm not sure
// when it's supposed to return true.
}
So script working fine, but if I have up to hundred entries. But if I have several thousands of records, browser is freezing. Firefox crashing at all. How to prevent browser freezing and crashing by adding for example some loading line o some clock pointer? Maybe I need to use some setTimeout() function or something else. Please help to prevent this browser freezing and crashing problem.
I tried this:
function processLargeArrayAsync(array, fn, maxTimePerChunk, context) {
context = context || window;
maxTimePerChunk = maxTimePerChunk || 200;
var index = 0;
function now() {
return new Date().getTime();
}
function doChunk() {
var startTime = now();
while (index < array.length && (now() - startTime) <= maxTimePerChunk) {
// callback called with args (value, index, array)
fn.call(context, array[index], index, array);
++index;
}
if (index < array.length) {
// set Timeout for async iteration
setTimeout(doChunk, 1);
}
}
doChunk();
}
-
processLargeArrayAsync(veryLargeArray, myCallback);
No success. Chrome freezing, IE11 explorer also, Firefox crashing. Whats wrong?
My records appear in the HTML table.
Some people advise to use a web worker. Maybe someone here has the practice and have a functioning example?
I think that the most cumbersome part of your code is the DOM access: getting input values and updating them.
According to the webworkers documentation, webworkers have their limitations and one of them is DOM manipulation. So I'd discard that option.
In order to fix things, I'd do as follows:
Improve your eliminateDuplicates algorithm (make it faster).
Make eliminateDuplicates asynchronous: divide the set of elements in smaller ones and perform each calculation in a different event loop tick (setTimeout).
Here I present you a solution I've come up with. Hope it gives you some ideas and help you to solve your problem.
First, I tweaked a bit eliminateDuplicates (I called it modifyDOM)
function modifyDOM(elements, repeats) {
var input, text, i = 0;
for (; i < elements.length; i++) {
input = elements[i];
text = input.value;
// Remove class.
input.className = input.className.replace(/\bdouble-error\b/, '');
if (text) {
repeats[text] = ~~repeats[text] + 1;
input.value = text + "-" + repeats[text];
}
}
}
I avoided using jQuery inside the main loop because its wrapper makes things slower and in your case it wasn't worth using it. These small changes improved performance in 100ms per 10.000 elements (give it or take).
I created two functions that use modifyDOM: one asynchronous and other synchronous.
function parseElementsNonBlocking(elements, maxChunkSize) {
var repeats = {},
nChunks = Math.floor(elements/maxChunkSize),
i = 0,
j = 1;
//loop through inputs and update repeats
for(; i < nChunks; i++, j++) {
setTimeout(modifyDOM.bind(null, elements.slice(i, j*maxChunkSize), repeats), 0);
}
// Rest
setTimeout(modifyDOM.bind(null, elements.slice(i), repeats), 0);
}
function parseElementsBlocking(elements) {
var repeats = {};
//loop through inputs and update repeats
modifyDOM(elements, repeats);
}
Lastly and in order to test everything, a function that executes when the DOM is ready and creates 10.000 inputs. It then outputs how long it takes to run any of the above methods.
$(function () {
var inputsDiv = $('#inputs'), i, time;
for (i = 0; i < 10000; i++) {
var val = i % 3 === 0 ? 'Mickey' : (i % 3 === 1 ? 'Mouse' : '');
inputsDiv.append('<input type="text" class="double-error" name="FirstName" value="' + val + '">');
}
time = Date.now();
//parseElementsBlocking($("input[type='text']"));
parseElementsNonBlocking($("input[type='text']"), 100);
console.log(Date.now() - time);
});
Here you have the fiddle to test it all.
here is a solution using OODK-JS to calculate the sum of an array of 1.000.000 entries through webworkers.
This solution implements the producer/consumer design pattern using the SynchronizedQueue foundation class: the producer (main thread) generate a task for each chunk of the array and add it to queue. The consumer (webworker) take a task in the queue and execute it until no one left. Once all tasks are executed, the producer display the final result
// main.js (producer)
OODK.config({
'path': {
'oodk': '../src',
'workspace': 'workspace'
}
});
OODK(function($, _){
$.import('{oodk}/foundation/utility/Thread', '[util.concurrent]', '{workspace}/project/Task');
// array helper class to handle arrays
var ArrayHelper = $.class(function($, µ, _){
$.static(function($, µ, _){
// slice an array into chunks using chunkLength argument
// as delimiter
$.public(function slice(arr, chunkLength){
return arr.reduce(function(arr, val, index){
var chunkIndex = Math.floor(index/chunkLength);
if(!arr[chunkIndex]) {
arr[chunkIndex] = [];
}
arr[chunkIndex].push(val);
return arr;
}, []);
});
// generate an array of len argument length
// containing random values
$.public(function random(len){
var arr = [];
for(var i =0; i<len; i++){
arr.push(Math.random()*10);
}
return arr;
})
});
});
// class to handle a pool of thread
var ThreadPool = $.class(function($, µ, _){
// number of threads to instantiate
$.private('num');
// queue to works with
$.private('queue');
$.public(function __initialize(num, queue){
_.num = num;
_.queue = queue;
});
// start the pool
$.public(function start(){
// bind listeners
var threadListener= $.new(Producer);
for(var i=0; i<_.num; i++){
// instantiate consumers
var consumer = $.new(OODK.foundation.util.Thread, "consumer.js");
$.on(consumer, 'thread.ready', threadListener);
consumer.start();
}
$.on(_.queue, 'synchronizedQueue.taskDone', threadListener);
});
});
// Event Listener for the thread
var Producer = $.implements(OODK.foundation.EventListener).class(function($, µ, _){
// number of task done
$.private('taskDone', 0);
// final result
$.private('finalResult', 0);
$.private(function __processEvent(evt){
if(evt.getType() === 'thread.ready'){
// the thread is ready, synchronize the queue with the current thread
queue.synchronize(evt.getTarget());
}else if(evt.getType() == 'synchronizedQueue.taskDone'){
//message received from the consumer that it has performed a task
_.taskDone++;
var cqueue = evt.getTarget();
var chunkResult = evt.getData();
_.finalResult += chunkResult;
jQuery('#chunksDone').text(_.taskDone);
if(cqueue.getCapacity() == _.taskDone){
// once all tasks are performed display the final result
$.log('final sum is ' + _.finalResult);
}else{
// each time a chunk is calculated display the intermediate result
$.log('intermediate result ' + _.finalResult);
}
}
});
});
// generate a large array of 1.000.000 random values
var myHugeArray = ArrayHelper.self.random(1000000);
// split this array into chunks of 2500 length
var chunks = ArrayHelper.self.slice(myHugeArray, 25000);
// instantiate a synchronized queue setted as size the number of chunks
var queue = $.new(OODK.foundation.util.concurrent.SynchronizedQueue, chunks.length);
// for each chunk create a task and add it to queue
for(var i=0; i<chunks.length; i++){
var chunk = chunks[i];
// create a task for each chunk of the array
var task = OODK.project.Task.self.factory(chunk);
// and add it to the queue
queue.put(task);
}
// instantiate a pool of 2 threads working on the given queue
var threadPool = $.new(ThreadPool, 2, queue);
// start the pool
threadPool.start();
$.log('calculate the sum of an array of 1.000.000 entries using 2 threads ...');
});
The consumer (webworker)
//consumer.js
OODK.config({
'path': {
'oodk': '../src',
'workspace': 'workspace'
}
});
OODK(function($, _){
// import the concurrent API package as well as the task class
$.import('[util.concurrent]', '{workspace}/project/Task');
// start the synchronizer
OODK.foundation.util.concurrent.SynchronizedObject.self.start();
// EventListener Class to handle synchronized queue events
$.implements(OODK.foundation.EventListener).class(function Consumer($, µ, _){
$.protected(function __processEvent(evt){
if(evt.getType() == 'synchronizedQueue.ready'){
//queue is synchronized
var queue = evt.getTarget();
// bind listener
$.on(queue, 'synchronizedQueue.elementRetrieved', this);
// take a task: get the heap of the stack and delete it
queue.take();
}else if(evt.getType() == 'synchronizedQueue.elementRetrieved'){
// task is retrieved from the queue
var task = evt.getData();
var queue = evt.getTarget();
// execute the task
var result = task.execute();
// notify the producer that the task is done
queue.notify('synchronizedQueue.taskDone', result);
if(queue.remainingElements()>0){
// at least one task is still in the queue, take it
queue.take();
}
}
});
});
var threadListener = $.new(_.Consumer);
// global listener for the synchronizedQueue.ready event
// triggered when the synchronzied queue is synchronized with this thread
$.on('synchronizedQueue.ready', threadListener);
});
The task class to implement the custom logic
OODK('project', function($, _){
$.public().implements(OODK.foundation.Serializable).class(function Task($, µ, _){
// the array chunk to calculate
$.private('chunk');
$.public(function __initialize(chunk){
_.chunk = chunk;
});
// calculate the sum of all entries of a chunk
// implements the custom logic here
$.public(function execute(){
var result = 0;
for(var i=0; i<_.chunk.length; i++){
result += _.chunk[i];
}
return result;
});
$.static(function($, µ, _){
$.public(function factory(chunk){
var task = $.new($.ns.Task, chunk);
return task;
});
});
});
});

Throttle / Debounce number of calls per second

I'm working with an API that only allows you to make 200 calls per second (1000ms) using a promise request library like request-promise or axios how can you debounce / throttle the requests that are made to a URL / server using rx.js? I noticed a throttle method in the rx documentation, but it won't calculate calls per second.
This is a function that wraps a promise and will queue them to account for a API-rate limit. I'm looking for similar functionality with Rx.
var Promise = require("bluebird")
// http://stackoverflow.com/questions/28459812/way-to-provide-this-to-the-global-scope#28459875
// http://stackoverflow.com/questions/27561158/timed-promise-queue-throttle
module.exports = promiseDebounce
function promiseDebounce(fn, delay, count) {
var working = 0, queue = [];
function work() {
if ((queue.length === 0) || (working === count)) return;
working++;
Promise.delay(delay).tap(function () { working--; }).then(work);
var next = queue.shift();
next[2](fn.apply(next[0], next[1]));
}
return function debounced() {
var args = arguments;
return new Promise(function(resolve){
queue.push([this, args, resolve]);
if (working < count) work();
}.bind(this));
}
}
So I had a similar issue the other day for rate limiting access to a resource. I came across this repo bahmutov/node-rx-cycle. Here is the example in a Plunker Demo I put together to demonstrate this. It takes input from a text input field and prepends it to a <ul>. Each <li> is only prepended every 1000ms, and any others are queued.
// Impure
const textInput = document.querySelector('.example-input');
const prependToOutput = function(item) {
const ul = document.querySelector('.example-output');
const li = document.createElement('li');
li.appendChild(document.createTextNode(item));
ul.insertBefore(li, ul.firstChild);
};
// Pure
const eventTargetValue = function(ele) { return ele.target.value; };
const textInputKeyUpStream = Rx.Observable
.fromEvent(textInput, 'keyup')
.map(eventTargetValue);
// Stream
rateLimit(textInputKeyUpStream, 1000)
.timeInterval()
.map(function(ti) { return ti.value + ' (' + ti.interval + 'ms)'; })
.subscribe(prependToOutput);
Hope this helps.

Should I bother cleaning array in node.js?

In one of my script, I make extensive use of array to temporary store data. The problem I m facing is that I have a lot of code handling the array just so I make economic use of the space.
Should I even bother since Node.js array are associative array?
My current solution is:
//Get the minimum empty id in array
function get_id(callback) {
var i = 0;
while(array[i] != null) {
i = i + 1;
}
array[i] = 0;
callback(i);
}
get_id(function (i) {
array[i] = {large object};
//...
array[i] = null;
});
But I feel it is wrong and bug prone.
Can I just do:
array[i] = {large object};
i = i + 1;
//...
array[i] = null;
Or would it lead to large consumption of memory?
array is a global variable of the module using it.
Cut down code (I ve removed all computing not linked to the array player.active_mission):
var player = {},
missions = [{time: 1000}];
function end_mission(mission, squad, mission_log, callback) {
//Make all the computing of the mission to know if the player won...
callback(mission_log);
}
function get_ami(callback) {
var i = 0;
while(player.active_mission[i] != null) {
i = i + 1;
}
player.active_mission[i] = 0;
callback(i);
}
function wait_mission(mission, squad, mission_log, i, time, callback) {
setTimeout(function () {
console.log('End of mission');
player.active_mission[i] = null;
end_mission(mission, squad, mission_log, callback);
}, time);
}
function start_mission(mission, squad, callback) {
var mission_log = {mission: mission, time_start: new Date(), completed: false, read: false};
//Verify if the player can start the mission...
console.log('start_mission');
get_ami(function (i) {
player.active_mission[i] = {mission: mission, squad: squad, mission_log: mission_log}
wait_mission(mission, squad, mission_log, i, missions[mission].time, callback);
});
}
player.active_mission = [];
//This part is inside get request, after sanitizing all input
start_mission(0, [0, 1], function (r) {
//r.id = req.session.player_id;
if(r.error) {
console.log('start: error: ' + r.error);
} else {
console.log('start: Success: ' + r.result);
}
});
player.active_mission hold all uncompleted request of the player, and need to be saved if the player quit before completion. My problem is just if I should try to keep it with small id, or just go on with .push() and get the id with .length()?
In short: If a array have nothing but null for the 1000 first id, and start having data only at array[1000]`, am I wasting memory?
Can I just do:
i = i + 1;
array[i] = null;
Or would it lead to large consumption of memory?
Yes, considering that array is a global variable and won't get garbage-collected itself, filling it constantly with values (even if only null ones) will eventually let you run out of memory.
Your get_id approach that recycles unused ids does work, but is horribly inperformant - it requires linear time to find a new id. So it'll work for few users with few concurrent missions, but it won't scale.
You'll rather want to use an object and delete keys from it, then you don't get into problems when just counting up:
var count = 0;
var missions = {};
function somethingThatNeedsTheStore() {
var id = count++;
missions[id] = …;
// later
delete missions[id];
}
// repeatedly call somethingThatNeedsTheStore()
Or actually, on recent node versions, you should consider using a Map instead:
var count = 0;
var missions = new Map;
function somethingThatNeedsTheStore() {
var id = count++;
missions.set(id, …);
// later
missions.delete(id);
}
// repeatedly call somethingThatNeedsTheStore()
NodeJS has a garbage collector to destroy unreachable object/array/variable.
So when you do array[i] = {large object};, the large object will be in the memory and it will stay here. When you do array[i] = null;, the garbage collector will erase the large object (only if there's no other reference to this object of course).
So yes, it is always good to remove references to useless objects to let the garbage collector clean it.
The impact on the memory of an array of 1000 null (or undefined) will not be very big.
If you want to preserve your memory, you should use an object instead of an array. You can use it with this syntax :
var obj = {};
obj[id] = {large object};
// Free the id
delete obj[id];

Load dictionary file with ajax and don't crash iPhone Mobile Safari

I have a web application where I load (via ajax) a dictionary file (1MB) into the javascript array. I found the reason why the Mobile Safari crashes after 10 seconds. But now what I'm wondering is how do I get around this issue?
On the link above the answer suggest using setInterval, but this would mean I would have to have a dictionary file chunked into pieces and have them loaded one by one. This surely could be done, but I would have to make a lot of chunks taking into account the internet speed and too many requests would take forever for the page to load (and if I make the chunks too big it could happen that some mobile users wouldn't be able to download the chunk in a given 10second period).
So, my question is: has anyone encountered this kind of problem and how did you go about it? A general push in the right direction is appreciated.
edit:
This is the js code which I use to load the dictionary:
var dict = new Trie();
$.ajax({
url: 'data/dictionary_342k_uppercase.txt',
async: true,
success: function (data) {
var words = data.split('\n');
for (var i = words.length - 1; i >= 0; i--) {
dict.insert(words[i]);
}
},
error: function(){
$('#loading-message').text("Problem s rječnikom");
}
});
Trie.js:
function Trie () {
var ALPHABET_SIZE = 30;
var ASCII_OFFSET = 'A'.charCodeAt();
this.children = null;
this.isEndOfWord = false;
this.contains = function (str) {
var curNode = this;
for (var i = 0; i < str.length; i++) {
var idx = str.charCodeAt(i) - ASCII_OFFSET;
if (curNode.children && curNode.children[idx]) {
curNode = curNode.children[idx];
} else {
return false;
}
}
return curNode.isEndOfWord;
}
this.has = function (ch) {
if (this.children) {
return this.children[ch.charCodeAt() - ASCII_OFFSET] != undefined;
}
return false;
}
this.next = function (ch) {
if (this.children) {
return this.children[ch.charCodeAt() - ASCII_OFFSET];
}
return undefined;
}
this.insert = function (str) {
var curNode = this;
for (var i = 0; i < str.length; i++) {
var idx = str.charCodeAt(i) - ASCII_OFFSET;
if (curNode.children == null) {
curNode.children = new Array(ALPHABET_SIZE);
curNode = curNode.children[idx] = new Trie();
} else if (curNode.children[idx]) {
curNode = curNode.children[idx];
} else {
curNode = curNode.children[idx] = new Trie();
}
}
curNode.isEndOfWord = true;
return curNode;
}
}
This is a very common issue once you start doing processing in JS. If the Mobile Safari issue is the cause then what you want to do is figure out where the CPU time is going here.
I'm assuming it's the dict.insert() loop and not the data.split() call (that would be a bit more difficult to manage).
The idea here is to split up the dict.insert() loop into functional blocks that can be called asynchronously in a sequenced loop (which is what the setupBuildActions function does). After the first block each subsequent block is called via setTimeout, which effectively resets the function-time counter in the JS runtime (which seems to be what's killing your process).
Using the Sequencer function means you also keep control of the order in which the functions are run (they always run in the sequence they are generated in here and no two or more functions are scheduled for execution at the same time). This is much more effective than firing off thousands of setTimeout calls without callbacks. Your code retains control over the order of execution (which also means you can make changes during execution) and the JS runtime isn't overloaded with scheduled execution requests.
You might also want to check the node project at https://github.com/michiel/sequencer-js for more sequencing examples and http://ejohn.org/blog/how-javascript-timers-work/ for an explanation on setTimeout on different platforms.
var dict = new Trie();
// These vars are accessible from all the other functions we're setting up and
// running here
var BLOCKSIZE = 500;
var words = [];
var buildActions = [];
function Sequencer(funcs) {
(function() {
if (funcs.length !== 0) {
funcs.shift()(arguments.callee);
}
})();
}
// Build an Array with functions that can be called async (using setTimeout)
function setupBuildActions() {
for (var offset=0; offset<words.length; offset+= BLOCKSIZE) {
buildActions.push((function(offset) {
return function(callback) {
for (var i=offset; i < offset + BLOCKSIZE ; i++) {
if (words[i] !== null) { // ugly check for code brevity
dict.insert(words[i]);
}
}
// This releases control before running the next dict.insert loop
setTimeout(callback, 0);
};
})(offset));
}
}
$.ajax({
url: 'data/dictionary_342k_uppercase.txt',
async: true,
success: function (data) {
// You might want to split and setup these calls
// in a setTimeout if the problem persists and you need to narrow it down
words = data.split('\n');
setupBuildActions();
new Sequencer(buildActions);
},
error: function(){
$('#loading-message').text("Problem s rječnikom");
}
});
Here's an example using setTimeout to defer the actual insertion of words into your trie. It breaks up the original string into batches, and uses setTimeout to defer processing of inserting each batch of words. The batch size in my example is 5 words.
The actual batch insertion happens as subsequent event handlers in the browser.
It's possible that just breaking the words up into batches might take too long. If you hit this problem, remember you can chain setTimeout() calls, eg iterating for a while then using setTimeout to schedule another event to iterate over some more, then setTimeout again, etc.
function addBatch(batch)
{
console.log("Processing batch:");
for (var i = 0; i < batch.length; i++)
console.log(batch[i]);
console.log("Return from processing batch");
}
var str = "alpha\nbravo\ncharlie\ndelta\necho\nfoxtrot\n" +
"golf\nhotel\nindia\njuliet\nkilo\nlima\n" +
"mike\nnovember\noscar\npapa\nquebec\n" +
"romeo\nsierra\ntango\nuniform\n" +
"victor\nwhiskey\nxray\nyankee\nzulu";
var batch = []
var wordend;
for (var wordstart = 0; wordstart < str.length; wordstart = wordend+1)
{
wordend = str.indexOf("\n", wordstart);
if (wordend < 0)
wordend = str.length;
var word = str.substring(wordstart, wordend);
batch.push(word);
if (batch.length > 5)
{
setTimeout(addBatch, 0, batch);
batch = [ ];
}
}
setTimeout(addBatch, 0, batch);
batch = [ ];

Categories

Resources