Searching element faster using document.querySelector in a large DOM - javascript

In a huge DOM with hundreds of elements, finding elements using document.querySelector("input[name='foo'][value='bar']") takes about 3-5 seconds for each element. Is there a way I can reduce this time? may be by giving full path of the element like say, document.querySelector("parent child grandchild and so on and then input[name='foo'][value='Modem']") or any other way?
I'm using CasperJS to test a large webpage and it takes really long to fetch each element and this is making my test run for an hour.. I've also tried __utils__.findOne() but the result is same 3-4 secs for each element. Since my test is focused on a very small part of the entire page, I wish if there's some way I could tell the document.querySelector to focus the element search on a particular portion of the page.
So could someone tell me whats the fastest way if any to fetch elements from a large DOM?
Update: This is how I measured the time
var init = (new Date()).getTime();
var element=this.evaluate(function() {
return document.querySelector('input[value='somethin'][name='somethin']');
});
this.echo('Time Taken :'+((new Date()).getTime() - init));
somehow the time is very high when I fetch radio buttons from the form, select elements and text boxes however returns within few milliseconds(I noticed this only today).
When I run the document.querySelector('input[value='somethin'][name='somethin']') in modern browser consoles like the chrome's , the time is less than a second.
I don't know if it has to do with the phantomjs's headless browser or something. Only for a particular page in that website, fetching elements is slowing down..
And yes, the page is very large with hundreds of thousands of elements. It's a legacy webapp thats a decade old. While on that page with IE 8 , pressing F12 to view source hangs IE for 5 minutes, but not chrome or firefox..maybe it's phantomjs's memory overload or something, rarely phantomjs crashes when I run the test on that particular page. I don't know if this info helps , but I'm not sure whats relevant.

General considerations
The fastest selector would be the id selector, but even if you had ids higher up the tree, they would not get you much. As Ian pointed out in the comments, selector are parsed/evaluated right to left. It means that the engine would look up all inputs that have the matching attributes even if it is only one, and only then search up the tree to see if the previous elements match.
I found that if you can know in what enclosing element the inputs are, you can use JavaScript DOM properties to walk over the DOM and run querySelector over a smaller part of the tree. At least in my tests, this reduces the time by more than half.
Memory problem
Judging by your updated question, it seems that it is really a memory problem. When you have hundreds of thousands of elements the relatively old PhantomJS WebKit engine will try to allocate enough memory. When it takes more memory than is free or even more than your machine has, the OS compensates by using swap memory on the hard disk.
When your script tries to query an element that is currently only in swap, this query takes very long, because it has to fetch the data from the high latency hard disk which is very slow compared to memory.
My tests run for 100k forms with one elements each in under 30 msec per query. When I increased the number of elements the execution time has grown linearly until at some point I got (by registering to onError)
runtime error R6016
- not enough space for thread data
So I cannot reproduce your problem of 3-5 seconds per query on windows.
Possible solutions
1. Better hardware:
Try to run it on a machine with more memory and see if it runs better.
2. Reduce used memory by closing unnecessary applications
3. Manipulate the page to reduce the memory footprint:
If there are parts of the page that you don't need to test, you can simply remove them from the DOM before running the tests. If you need to test all of it, you could run multiple tests on the same page, but every time remove everything that is currently not tested.
Don't load images if this is a image heavy site by setting casper.options.pageSettings.loadImages = false;.
Test script
var page = require('webpage').create();
var content = "",
max = 100000,
i;
for(i = 0; i < max; i++) {
content += '<form id="f' + i + '"><input type="hidden" name="in' + i + '" valuate"iv' + i + '"></form>';
}
page.evaluate(function(content){
document.body.innerHTML = content;
}, content);
console.log("FORMS ADDED");
setTimeout(function(){
var times = page.evaluate(function(max){
var obj = {
cssplain: 0,
cssbyForm: 0,
cssbyFormChild: 0,
cssbyFormJsDomChild: 0,
cssbyFormChildHybridChild: 0,
cssbyFormHybridChild: 0,
xpathplain: 0,
xpathbyForm: 0
},
idx, start, el, i,
repeat = 100;
function runTest(name, obj, test) {
var idx = Math.floor(Math.random()*max);
var start = (new Date()).getTime();
var el = test(idx);
obj[name] += (new Date()).getTime() - start;
return el;
}
for(i = 0; i < repeat; i++){
runTest('cssplain', obj, function(idx){
return document.querySelector('input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('cssbyForm', obj, function(idx){
return document.querySelector('#f'+idx+' input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('cssbyFormChild', obj, function(idx){
return document.querySelector('form:nth-child('+(idx+1)+') input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('cssbyFormJsDomChild', obj, function(idx){
return document.body.children[max-1].querySelector('input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('cssbyFormChildHybridChild', obj, function(idx){
return document.querySelector('form:nth-child('+(idx+1)+')').querySelector('input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('cssbyFormHybridChild', obj, function(idx){
return document.querySelector('#f'+idx).querySelector('input[name="in'+idx+'"][value="iv'+idx+'"]');
});
runTest('xpathplain', obj, function(idx){
return document.evaluate('//input[#name="in'+idx+'" and #value="iv'+idx+'"]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
});
runTest('xpathbyForm', obj, function(idx){
return document.evaluate('//form[#id="f'+idx+'"]//input[#name="in'+idx+'" and #value="iv'+idx+'"]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
});
}
for(var type in obj) {
obj[type] /= repeat;
}
return obj;
}, max);
console.log("TIMES");
for(var type in times) {
console.log(type+":\t"+times[type]);
}
phantom.exit();
}, 0); // just in case the content is not yet evaluated
Output on my machine (nicer):
cssbyForm: 29.55
cssbyFormChild: 29.97
cssbyFormChildHybridChild: 11.51
cssbyFormHybridChild: 10.17
cssbyFormJsDomChild: 11.73
cssplain: 29.39
xpathbyForm: 206.66
xpathplain: 207.05
Note: I used PhantomJS directly. It should not have different results when the same technique is used in CasperJS.

Related

Best practice: Is it more efficient to replace each element or to check and replace only what has changed?

This question stems from a conundrum I am facing in Javascript, though a more general scientific response would be extremely helpful.
If an object or array is being iterated over for another purpose—and it is known that only one element of interest has changed which can be acted upon for manipulation—is it best to:
Simply replace every element with new data to reflect the change
Rigorously check each element and replace only that which has changed
(In this example, heights of all bars of a graph are being adjusted—as they are relative—though only one textual piece of information is targeted for change.)
Array.from(result['data']).forEach(row => {
const bar = document.getElementById('bar-' + row['date']);
bar.style.height = 'calc(1.6rem + ' + row['percentage'] + '%)';
bar.firstChild.textContent = row['distance'];
});
Or:
Array.from(result['data']).forEach(row => {
const bar = document.getElementById('bar-' + row['date']);
bar.style.height = 'calc(1.6rem + ' + row['percentage'] + '%)';
if (bar.firstChild.textContent !== row['distance']) bar.firstChild.textContent = row['distance'];
});
I suppose this is a question that exposes my ignorance and it has made it difficult for me to research a conclusion: Is it more computationally exhausting to replace all elements when a difference is known to exist somewhere in the set, or is it cheaper to seek out the offending individual and change only that value?
(Setting timers, i.e. console.timeEnd(), has proved inconclusive.)
Any education would be throughly appreciated. I can't get my head around it.
It depends on the browser.
On Chrome and Opera, at least, plain assignment without checking looks to be more performant than looking up the existing text, even without possible assignment on top of looking up the existing text, by an order of around 3x:
(warning: running the following code will block your browser for some time, only press "Run" if you're sure)
const fn1 = () => {
const bar = document.querySelector('#bar');
for (let i = 0; i < 9999999; i++) bar.textContent = 'bar1';
};
const fn2 = () => {
const bar = document.querySelector('#bar');
for (let i = 0; i < 9999999; i++) {
// The following condition will never be fulfilled:
if (bar.textContent !== 'bar2') bar.textContent = 'bar2';
}
};
const now0 = performance.now();
fn1();
const now1 = performance.now();
fn2();
const now2 = performance.now();
console.log(now1 - now0);
console.log(now2 - now1);
<div id="bar"></div>
On the other hand, on Firefox 56, the lookup seems to take next to no time at all (whereas assignment is computationally expensive)
But this is only really something to worry about if you have tons and tons of elements. Unless you're dealing with thousands or tens of thousands of elements, it's not something worth optimizing for.
It's not necessary to check if the property already has the value you're assigning to it. The browser will determine if the value actually changed and handle it accordingly.
Since in your example you have already called DOM method getElementById which is the slowest part checking the property is way faster than performing a change to it. So it's always better to keep DOM-manipulations as little as possible.
UPD #CertainPerformance's test shows that performance varies amongst browsers =)

Reordering/move pages using Indesign script

I have a nearly 400 page document that I need to [randomly] reorder the pages. (If you need to know, this is a book of single page stories that need to be randomly distributed. I created a random list of pages to input into the script.)
I've been working with a modified script I found elsewhere on the internet that creates an array and moves the pages around:
var order="...list of new page numbers...";
// Create an array out of the list:
ranges = toSeparate (order);
if (ranges.length != app.activeDocument.pages.length)
{
alert ("Page number mismatch -- "+ranges.length+" given, "+app.activeDocument.pages.length+" in document");
exit(0);
}
// Consistency check:
sorted = ranges.slice().sort(numericSort);
for (a=0; a<sorted.length-1; a++)
{
if (sorted[a] < sorted[a+1]-1 ||
sorted[a] == sorted[a+1])
alert ("Mismatch from "+sorted[a]+" to "+sorted[a+1]);
}
// alert ("New order for "+order+"\nis "+ranges.join(", "));
// Convert from 1..x to 0..x-1:
for (moveThis=0; moveThis<ranges.length; moveThis++)
ranges[moveThis]--;
for (moveThis=0; moveThis<ranges.length; moveThis++)
{
if (moveThis != ranges[moveThis])
{
try{
app.activeDocument.pages[ranges[moveThis]].move (LocationOptions.BEFORE, app.activeDocument.pages[moveThis]);
} catch(_) { alert ("problem with page "+moveThis+"/index "+ranges[moveThis]); }
}
for (updateRest=moveThis+1; updateRest<ranges.length; updateRest++)
if (ranges[updateRest] < ranges[moveThis])
ranges[updateRest]++;
}
function toSeparate (list)
{
s = list.split(",");
for (l=0; l<s.length; l++)
{
try {
if (s[l].indexOf("-") > -1)
{
indexes = s[l].split("-");
from = Number(indexes[0]);
to = Number(indexes[indexes.length-1]);
if (from >= to)
{
alert ("Cannot create a range from "+from+" to "+to+"!");
exit(0);
}
s[l] = from;
while (from < to)
s.splice (++l,0,++from);
}} catch(_){}
}
// s.sort (numericSort);
return s;
}
function numericSort(a,b)
{
return Number(a) - Number(b);
}
This code worked, except that it was consistently rearranging them into the wrong random order, which, at the end of the day, is workable, but it'll just be a bigger pain in the ass to index the stories.
I suspected the problem might be caused by starting at the begginning of the document rather than the end, so I modified the script to start at the end, but then app.activeDocument.pages[ranges[moveThis]] kept coming up as undefined.
So I gave up and tried this:
app.activeDocument.pages[298].move (LocationOptions.BEFORE, app.activeDocument.pages[366]);
app.activeDocument.pages[33].move (LocationOptions.BEFORE, app.activeDocument.pages[365]);
app.activeDocument.pages[292].move (LocationOptions.BEFORE, app.activeDocument.pages[364]);
And so on for every page. (This reminds me of my time in junior high using sendKeys to create programs in Visual Basic. Had I bothered to seriously learn JavaScript instead of creating shitty AOL chatroom scrollers, I probably wouldn't be on here today.)
Nevertheless, I received the following error:
Error Number: 30477
Error String: Invalid value for parameter 'reference' of method 'move'. Expected Page or Spread, but received nothing.
I'm trying to avoid having to manually move the pages, especially considering the amount of time I've already been working on this. Any suggestions on what I need to change? Thank you!
The issue might be that you are using more than one page per spread and then trying to shuffle them across spread. The better way is to use single page per spread.
Here is a small snippet that works on my machine
var doc = app.activeDocument;
doc.documentPreferences.facingPages = false;
for (var i =0; i < 100; i++){
var index = parseInt((Math.random() * doc.spreads.length) % doc.spreads.length + '' , 10);
doc.spreads[index].move();
}
What this does is
Disables the facing pages options and makes one page per spread. A desirable condition as you mentioned that your stories are one page each(I am assuming that your stories will not violate this assumption).
Takes a random spread from the doc and sends it to the end of the spreads in the doc. It does so 100 times.
The result is what you wanted. A script to shuffle the current SPREADS randomly.

Javascript runs out of memory major browsers due to array size

I am trying to create an array, that will be MAASSSIVVEE...i read somewhere that javascript can create an array of up to 4.xx Billion or so. The array i am trying to create will likely be in the quadrillions or higher. I don't even know where to go from here. I am going to assume that JS is not the proper solution for this, but i'd like to give it a try...it is for client side, and i would prefer not to bog down my server with this if there are multiple people using it at once. Also, not looking to learn a new language as i am just getting into JS, and code in general.
could i possibly use setTimeout(),0 breaks in the totalcombos function? Time is not really an issue, i wouldn't mind if it took a few minutes to calculate, but right now it just crashes.
i have tried this using a dedicated worker, but it still crashes the host. the worker code is what i am posting, as the host code is irrelevant to this question (it only compiles the original objects and posts them, then receives the messages back).
The code: (sorry for the mess...im coding noob and just an enthusiast)
onmessage = function(event){
//this has been tested on the very small sample size below, and still runs out of memory
//all the objects in these first arrays are formatted as follows.
// {"pid":"21939","name":"John Smith","position":"QB","salary":"9700","fppg":"23"}
// "PID" is unique to each object, everything else could appear in another object.
// There are no repeated objects.
var qbs = **group of 10 objects like above**
var rbs = **group of 10 objects like above**
var wrs = **group of 10 objects like above**
var tes = **group of 10 objects like above**
var ks = **group of 10 objects like above**
var ds = **group of 10 objects like above**
//This code works great and fast with small sets. ie (qbs, rbs, wrs)
function totalcombos() {
var r = [], arg = arguments, max = arg.length-1;
function helper(arr, i) {
for (var j=0; j<arg[i].length; j++) {
var a = arr.slice(0); // clone arr
if(a.indexOf(arg[i][j]) != -1){
j++;
} else
a.push(arg[i][j]);
if (i==max) {
r.push(a);
} else
helper(a, i+1);
}
}
helper([], 0);
return r;
};
//WAY TOO BIG...commented out so as not to crash when run
//var tCom = totalcombos(qbs, rbs, wrs, tes, ks, ds);
//postMessage(tCom.length);
}
When the sets get to be larger like 50 objects in each, it just crashes as it is out of memory. I reduce the set with other code but it will still be very large. How would i fix it?
I am trying to create all the possible combinations and then go through and reduce from there based on total salary of each group.
When working with data, regardless of language or platform, its usually best practice to only load the data that's otherwise you encounter errors or bottlenecks etc. as you are finding.
If your data is being stored somewhere like a Database, a JSON file, or a Web Service or an API etc. (anything basically), you'd be better of searching that set of data to retrieve only that which you need, or to at least reduce the size of the Array data your're trying to traverse.
As an analogy, if you're trying to load the whole internet into memory on a PC with only 2GB of RAM, you're going to have a really bad time. :)

Improving performance of javascript intervals on IE8

I'm using javascript loop (using setInterval) that runs through a list of search results, highlighting the search term by adding a css styled <span> around search hits as it goes. I'm using setInterval like this to release control of the browser while it does this.
In Chrome and Firefox this works well - even with a setInterval parameter of 10-20ms; and the user has full control of the browser (i.e. scrolling, clicking links etc.) while the results are rapidly highlighted:
mylooper = setInterval(function() {
// my functionality is here
},15); // 15ms
Unfortunately, when using the dreaded IE8, the browser locks up and takes a really long time to add the <span>'s and style the search results. It also takes a long time just to load the page in the first place - shortened a great deal when this script is removed.
So far I've tried:
changing the interval values (I've read that IE8 doesn't detect intervals of sub 15ms);
using setTimeout instead of setInterval;
removing the interval to check that this is in fact what is causing the slow-down (it is!); and
swearing about Internet Explorer a lot;
var highlightLoop;
var index = 0;
highlightLoop = setInterval(function () {
var regex = RegExp(regexPhrase, "gi"); // regexPhase created elsewhere
var searchResults = resultElements.eq(index).get(0); // run through resultElements which contain alll the nodes with search results in them.
findAndReplaceDOMText( // a function that does the searching and inserting of styling
regex,
searchResults,
function (fill, matchIndex) {
called = true;
var span = document.createElement("span");
span.className = "result-highlight";
span.innerHTML = fill;
return span;
}
);
if (index == resultElements.length || searchTermUpdated == true) { // stop interval loop when search term changes or we reach the end of results - variable set elsewhere.
searchTermUpdated = false;
clearInterval(highlightLoop); // stop the loop
}
index++
}
}, 50); // 50ms does not improve performance.
Any advice on workarounds for this kind of javascripting in IE would be massively appreciated. Thanks all.
I believe you may be able to improve the performance by tweaking findAndReplaceDOMText, and maybe its callback too. I suppose findAndReplaceDOMText appends the element returned by the callback to the DOM, from within a loop of all matches. If it's doing that inside a loop, try to move it outside the loop, and apply the all changes to the DOM at once. That should result in better performance, as repainting the page after each DOM update is expensive.
Try this recursive approach instead:
get a list of all elements to be acted upon into array X (one time cost)
while the array X has length, keep repeating the next actions
shift the first element off the array
process the single element
start this process again with the new array X (now Xn - 1 length) on a setTimeout
The code looks like this in general
function processArray(array) {
var element = array.shift();
processElement(element);
if (array)
setTimeout(function(){processArray(array);},15ms);
}
There might be something else to be done with this recursion, but it works fairly well in all browsers and never blocks, because you're only initiating the repeat when the last one has had time to finish.

Optimizing Javascript Loop for Wheel Game

I have a game I'm creating where lights run around the outside of a circle, and you must try and stop the light on the same spot three times in a row. Currently, I'm using the following code to loop through the lights and turn them "on" and "off":
var num_lights = 20;
var loop_speed = 55;
var light_index = 0;
var prevent_stop = false; //If true, prevents user from stopping light
var loop = setTimeout(startLoop, loop_speed);
function startLoop() {
prevent_stop = false;
$(".light:eq(" + light_index + ")").css("background-color", "#fff");
light_index++;
if(light_index >= num_lights) {
light_index = 0;
}
$(".light:eq(" + light_index + ")").css("background-color", "red");
loop = setTimeout(startLoop, loop_speed);
}
function stopLoop() {
clearTimeout(loop);
}
For the most part, the code seems to run pretty well, but if I have a video running simultaneously in another tab, the turning on and off of the lights seems to chug a bit. Any input on how I could possibly speed this up would be great.
For an example of the code from above, check out this page: http://ericditmer.com/wheel
When optimizing the thing to look at first is not doing twice anything you only need to do once. Looking up an element from the DOM can be expensive and you definitely know which elements you want, so why not pre-fetch all of them and void doing that multiple times?
What I mean is that you should
var lights = $('.light');
So that you can later just say
lights.eq(light_index).css("background-color", "red");
Just be sure to do the first thing in a place which keeps lights in scope for the second.
EDIT: Updated per comment.
I would make a global array of your selector references, so they selector doesn't have to be executed every time the function is called. I would also consider swapping class names, rather than attributes.
Here's some information of jQuery performance:
http://www.componenthouse.com/article-19
EDIT: that article id quite old though and jQuery has evolved a lot since. This is more recent: http://blog.dynatrace.com/2009/11/09/101-on-jquery-selector-performance/
You could try storing the light elements in an array instead of using a selector each time. Class selectors can be a little slow.
var elements = $('.light');
function startLoop() {
prevent_stop = false;
$(elements[light_index]).css('background-color', '#fff');
...
}
This assumes that the elements are already in their intended order in the DOM.
One thing I will note is that you have used a setTimeout() and really just engineered it to behave like setInterval().
Try using setInterval() instead. I'm no js engine guru but I would like to think the constant reuse of setTimeout has to have some effect on performance that would not be present using setInterval() (which you only need to set once).
Edit:
Curtousy of Diodeus, a related post to back my statement:
Related Stack Question - setTimeout() vs setInterval()
OK, this includes some "best practice" improvements, if it really optimizes the execution speed should be tested. At least you can proclaim you're now coding ninja style lol
// create a helper function that lend the array reverse function to reverse the
// order of a jquery sets. It's an object by default, not an array, so using it
// directly would fail
$.fn.reverse = Array.prototype.reverse;
var loop,
loop_speed = 55,
prevent_stop = false,
// prefetch a jquery set of all lights and reverses it to keep the right
// order when iterating backwards (small performance optimization)
lights = $('.light').reverse();
// this named function executes as soon as it's initialized
// I wrapped everything into a second function, so the variable prevent_stop is
// only set once at the beginning of the loop
(function startLoop() {
// keep variables always in the scope they are needed
// changed the iteration to count down, because checking for 0 is faster.
var num_lights = light_index = lights.length - 1;
prevent_stop = false;
// This is an auto-executing, self-referencing function
// which avoids the 55ms delay when starting the loop
loop = setInterval((function() {
// work with css-class changing rather than css manipulation
lights.eq( light_index ).removeClass('active');
// if not 0 iterate else set to num_lights
light_index = (light_index)? --light_index:num_lights;
lights.eq( light_index ).addClass('active');
// returns a referenze to this function so it can be executed by setInterval()
return arguments.callee;
})(), loop_speed);
})();
function stopLoop() {
clearInterval(loop);
}
Cheers neutronenstern

Categories

Resources