I'm using the document.evaluate() JavaScript method to get an element pointed to by an XPath expression:
var element = document.evaluate(
path,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
But how do I get a list of elements in case the XPath expression points to more than one element on the page?
I tried the following code, but it is not working:
var element = document.evaluate(
path,
document,
null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE,
null
);
I found the following solution in the book I am currently reading. It says that the code is from the Prototype library.
function getElementsByXPath(xpath, parent)
{
let results = [];
let query = document.evaluate(xpath, parent || document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i = 0, length = query.snapshotLength; i < length; ++i) {
results.push(query.snapshotItem(i));
}
return results;
}
Use it like this:
let items = getElementsByXPath("//*"); // return all elements on the page
From the documentation
var iterator = document.evaluate('//phoneNumber', documentNode, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
try {
var thisNode = iterator.iterateNext();
while (thisNode) {
alert( thisNode.textContent );
thisNode = iterator.iterateNext();
}
}
catch (e) {
dump( 'Error: Document tree modified during iteration ' + e );
}
Try this:
function getListOfElementsByXPath(xpath) {
var result = document.evaluate(xpath, document, null, XPathResult.ANY_TYPE, null);
return result;
}
Then call it:
var results = getListOfElementsByXPath("//YOUR_XPATH");
while (node = results.iterateNext()) {
console.log(node);
}
In Chrome, there is a simpler solution, described in this document, at least in the console:
$x(path)
It does the same as the getElementsByXPath function above, but much easier for debugging.
I was working hard with the same problem some weeks ago. I found out, that the result already represents a list of elements (if any) and one can iterate trough it. I needed to build a jQuery plugin for realize a search of partial or full text strings, which means the inner text of any DOM element like LI or H2. I got the initial understanding on his page : Document.evaluate() | MDN
After some hours I got the plugin running: Search for the word "architecture" only in "p" elements, find partial matching strings ("true" for <p>todays architecture in Europe</p>) instead of matches of entire text (<h2>architecture</h2>).
var found = $('div#pagecontent').findtext('architecture','p',true);
Found results are regular jQuery objects, which can be used as usual.
found.css({ backgroundColor: 'tomato'});
The example of usage above may be altered like this for search trough entire document and all node types like this (partial results)
var found = $('body').findtext('architecture','',true);
or only exact matches
var found = $('div#pagecontent').findtext('architecture');
The plugin itself shows a variable "es" which is the plural of a single "e" for "element". And you can see, how the results are iterated, and collected into a bunch of objects with f = f.add($(e)) (where "f" stands for "found"). The beginning of the function deals with different conditions, like full or partial search ("c" for condition) and the document range for the search ("d").
It may be optimized whereever needed, may not represent the maximum of possibilities, but it represents my best knowledge at the moment, is running without errors and it may answer your question, hopefully. And here is it:
(function($) {
$.fn.findtext = function(s,t,p) {
var c, d;
if (!this[0]) d = document.body;
else d = this[0];
if (!t || typeof t !== 'string' || t == '') t = '*';
if (p === true) c = './/'+t+'[contains(text(), "'+s+'")]';
else c = './/'+t+'[. = "'+s+'"]';
var es = document.evaluate(c, d, null, XPathResult.ANY_TYPE, null);
var e = es.iterateNext();
var f = false;
while (e) {
if (!f) f = $(e);
else f = f.add($(e));
e = es.iterateNext();
}
return f || $();
};
})(jQuery);
Related
I'm writing a Chrome extension that will search the DOM and highlight all email addresses on the page. I found this to look for at symbols on the page but it only returns correctly when there is one email address, it breaks when there are multiple addresses found.
found = document.evaluate('//*[contains(text(),"#")]', document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null).snapshotItem(0);
What is the correct way to have this return multiples if more than one is found?
If you want to handle multiple results, don’t call .snapshotItem(0) on document.evaluate() but instead loop through the results using a for loop and snapshotLength():
Example: Loop through results using snapshotLength() with snapshotItem()
var nodesSnapshot = document.evaluate('//*[contains(text(),"#")]',
document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null );
for ( var i=0 ; i < nodesSnapshot.snapshotLength; i++ )
{
console.dir( nodesSnapshot.snapshotItem(i) );
}
Either that, or specify the XPathResult.UNORDERED_NODE_ITERATOR_TYPE argument (instead of XPathResult.ORDERED_NODE_SNAPSHOT_TYPE), and use a while loop with iterateNext():
Example: Iterate over results using iterateNext()
var iterator = document.evaluate('//*[contains(text(),"#")]',
document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
try {
var thisNode = iterator.iterateNext();
while (thisNode) {
console.dir( thisNode );
thisNode = iterator.iterateNext();
}
}
catch (e) {
console.log( 'Error: Document tree modified during iteration ' + e );
}
In cases that are sorta the reverse of the one in this question—cases when you really do want just get the first matching node—you can specify the XPathResult.FIRST_ORDERED_NODE_TYPE value, to return just a single node, and then use the property (not method) singleNodeValue:
Example: Use XPathResult.FIRST_ORDERED_NODE_TYPE and singleNodeValue
var firstMatchingNode = document.evaluate('// [contains(text(),"#")]',
document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null );
console.dir( firstMatchingNode.singleNodeValue );
Getting text or counts back instead, or testing true/false conditions
Note that among the other values (constants) you can specify as the second-to-last argument to document.evaluate() to get other results types, you can make it directly return:
a single string (XPathResult.STRING_TYPE) slurped from some part of the document
a number representing a count of some kind
(XPathResult.NUMBER_TYPE); for example, a count of the number of
e-mail addresses found in the document
a boolean value (XPathResult.BOOLEAN_TYPE) representing some true/false aspect of the document; e.g., an indicator whether or not the document contains any e-mail addresses
Of course to get those other result types back, the XPath expression you give as the first argument to document.evaluate() needs to be an expression that will actually return a string, or a number, or a boolean value (instead of returning a set of attribute nodes or element nodes).
More at MDN
The examples above are all based on the MDN Introduction to using XPath in JavaScript tutorial, which is highly recommended to anybody trying to work with XPath and document.evaluate().
Through the code below, you can have your XPath selector results as an array.
const xpath = `//*[contains(text(),"#")]`;//your special XPath
const elements = Array.from((function*(){ let iterator = document.evaluate(xpath, document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null); let current = iterator.iterateNext(); while(current){ yield current; current = iterator.iterateNext(); } })());
//Use the simple array
Also, you can have it as a function, for more calls...
function getElementsByXPath(xpath) {
return Array.from((function*(){ let iterator = document.evaluate(xpath, document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null); let current = iterator.iterateNext(); while(current){ yield current; current = iterator.iterateNext(); } })());
}
Enjoy...
I'm trying to write a jQuery or pure Javascript function (preferring the more readable solution) that can count the length of a starting tag or ending tag in an HTML document.
For example,
<p>Hello.</p>
would return 3 and 4 for the starting and ending tag lengths. Adding attributes,
<span class="red">Warning!</span>
would return 18 and 7 for the starting and ending tag lengths. Finally,
<img src="foobar.png"/>
would return 23 and 0 (or -1) for the starting and ending tag lengths.
I'm looking for a canonical, guaranteed-to-work-according-to-spec solution, so I'm trying to use DOM methods rather than manual text manipulations. For example, I would like the solution to work even for weird cases like
<p>spaces infiltrating the ending tag</ p >
and
<img alt="unended singleton tags" src="foobar.png">
and such. That is, my hope is that as long as we use proper DOM methods, we should be able to find the number of characters between < and > no matter how weird things get, even
<div data-tag="<div>">HTML-like strings within attributes</div>
I have looked at the jQuery API (especially the Manipulation section, including DOM Insertion and General Attributes subsections), but I don't see anything that would help.
Currently the best idea I have, given an element node is
lengthOfEndTag = node.tagName.length + 3;
lengthOfStartTag = node.outerHTML.length
- node.innerHTML.length
- lengthOfEndTag;
but of course I don't want to make such an assumption for the end tag.
(Finally, I'm familiar with regular expressions—but trying to avoid them if at all possible.)
EDIT
#Pointy and #squint helped me understand that it's not possible to see </ p >, for example, because the HTML is discarded once the DOM is created. That's fine. The objective, adjusted, is to find the length of the start and end tags as would be rendered in outerHTML.
An alternate way to do this could be to use XMLSerializer's serializeToString on a clone copy of the node (with id set) to avoid having to parse innerHTML, then split over "><"
var tags = (function () {
var x = new XMLSerializer(); // scope this so it doesn't need to be remade
return function tags(elm) {
var s, a, id, n, o = {open: null, close: null}; // spell stuff with var
if (elm.nodeType !== 1) throw new TypeError('Expected HTMLElement');
n = elm.cloneNode(); // clone to get rid of innerHTML
id = elm.getAttribute('id'); // re-apply id for clone
if (id !== null) n.setAttribute('id', id); // if it was set
s = x.serializeToString(n); // serialise
a = s.split('><');
if (a.length > 1) { // has close tag
o.close = '<' + a.pop();
o.open = a.join('><') + '>'; // join "just in case"
}
else o.open = a[0]; // no close tag
return o;
}
}()); // self invoke to init
After running this, you can access .length of open and close properties
tags(document.body); // {open: "<body class="question-page">", close: "</body>"}
What if an attribute's value has >< in it? XMLSerializer escapes this to >< so it won't change the .split.
What about no close tag? close will be null.
This answer helped me understand what #Pointy and #squint were trying to say.
The following solution works for me:
$.fn.lengthOfStartTag = function () {
var node = this[0];
if (!node || node.nodeType != 1) {
$.error("Called $.fn.lengthOfStartTag on non-element node.");
}
if (!$(node).is(":empty")) {
return node.outerHTML.indexOf(node.innerHTML);
}
return node.outerHTML.length;
}
$.fn.lengthOfEndTag = function () {
var node = this[0];
if (!node || node.nodeType != 1) {
$.error("Called $.fn.lengthOfEndTag on non-element node.");
}
if (!$(node).is(":empty")) {
var indexOfInnerHTML = node.outerHTML.indexOf(node.innerHTML);
return node.outerHTML.length - (indexOfInnerHTML + node.innerHTML.length);
}
return -1;
}
Sample jsFiddle here.
I made this Greasemonkey script:
var maxpi = 250;
var p1 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[1]/td[11]";
var p2 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[2]/td[11]";
..
var p25 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[25]/td[11]";
var r1 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[1]";
var r2 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[2]";
..
var r25 = "/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[25]";
var xpathPI1 = document.evaluate(p1, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
..
var xpathPI25 = document.evaluate(p25, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
var xpathrow1 = document.evaluate(r1, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
..
var xpathrow25 = document.evaluate(r25, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
if (xpathPI1.singleNodeValue.textContent >maxpi ){
xpathrow1.singleNodeValue.style.display='none';}
..
if (xpathPI25.singleNodeValue.textContent >maxpi ){
xpathrow25.singleNodeValue.style.display='none';}
Basically, it checks a table row's 11th field and if its contents > than 250 it hides the row.
With my limited javascript knowledge took quite some time get this working.
The problem is that I have to rewrite every single line if I want to check-hide another row.
I want to make it more usable so I can use it on similar tables without rewriting the whole thing.
Maybe I need to use a different XPath type or use some kind of changing variable?
Of course, there are more ways to improve your script.
Firstly, you need to thoroughly think through WHAT exactly you want to look for. Is is every row and column? Is it rows/columns with some text, class, any other attribute? You can even select only those nodes that have their text value greater than your maxpi!
Read something about XPath, the possibly best resource is the official one.
Some random thoughts on what could be useful regarding XPath:
//table//tr[5]/td[2] ... the double slash is the deal here
//table//tr/td[number(text()) > 250] ... the number() and text() functions
When talking about JavaScript, that would be a little tougher, because there are so many things you could use!
Just for starters - you can create dynamically changing xpath expressions by String concatenation and For loop, like this:
for (var i = 1; i <= maxNumberOfRows; i++) {
var p1 = "//table/tbody/tr[" + i + "]";
// more work goes here...
}
Also, you could use arrays to store multiple nodes returned by your XPath expressions and work on them all with just a single command.
For more JavaScript, I would recommend the first chapters of some JavaScript tutorial, that will boost your productivity by a lot.
Use a loop and functions. Here's one way:
hideRowsWithLargeCellValue (
"/html/body/div/div[2]/div/div[2]/table[2]/tbody/tr[",
25,
"]/td[11]",
250
);
function hideRowsWithLargeCellValue (xpathPre, maxRows, xpathPost, maxpi) {
for (var J = maxRows; J >= 1; --J) {
var srchRez = document.evaluate (
xpathPre + J + xpathPost,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
if (srchRez.singleNodeValue && srchRez.singleNodeValue.textContent > maxpi) {
var rowToHide = srchRez.singleNodeValue.parentNode;
rowToHide.style.display='none';
}
}
}
Then read "Dont Repeat Yourself" (sic).
I mean an array of them. That is a chain from top HTML to destination element including the element itself.
for example for element <A> it would be:
[HTML, BODY, DIV, DIV, P, SPAN, A]
A little shorter (and safer, since target may not be found):
var a = document.getElementById("target");
var els = [];
while (a) {
els.unshift(a);
a = a.parentNode;
}
You can try something like:
var nodes = [];
var element = document.getElementById('yourelement');
nodes.push(element);
while(element.parentNode) {
nodes.unshift(element.parentNode);
element = element.parentNode;
}
I like this method:
[...(function*(e){do { yield e; } while (e = e.parentNode);})($0)]
... where $0 is your element.
An upside of this method is that it can be used as a value in expressions.
To get an array without the target element:
[...(function*(e){while (e = e.parentNode) { yield e; }})($0)]
You can walk the chain of element.parentNodes until you reach an falsey value, appending to an array as you go:
const getParents = el => {
for (var parents = []; el; el = el.parentNode) {
parents.push(el);
}
return parents;
};
const el = document.querySelector("b");
console.log(getParents(el).reverse().map(e => e.nodeName));
<div><p><span><b>Foo</b></span></div>
Note that reversing is done in the caller because it's not essential to the lineage algorithm. Mapping to e.nodeName is purely for presentation and also non-essential.
Note that this approach means you'll wind up with the document element as the last element in the chain. If you don't want that, you can add && el !== document to the loop stopping condition.
The overall time complexity of the code above is linear and reverse() is in-place, so it doesn't require an extra allocation. unshift in a loop, as some of the other answers recommend, is quadratic and may harm scalability on uncommonly-deep DOM trees in exchange for a negligible gain in elegance.
Another alternative (based on this):
for(var e = document.getElementById("target"),p = [];e && e !== document;e = e.parentNode)
p.push(e);
I believe this will likely be the most performant in the long run in the most scenarios if you are making frequent usage of this function. The reason for why t will be more performant is because it initially checks to see what kind of depths of ancestry it might encounter. Also, instead of creating a new array every time you call it, this function will instead efficiently reuse the same array, and slice it which is very optimized in some browsers. However, since there is no really efficient way I know of to check the maximum depth, I am left with a less efficient query-selector check.
// !IMPORTANT! When moving this coding snippet over to your production code,
// do not run the following depthtest more than once, it is not very performant
var kCurSelector="*|*", curDepth=3;
while (document.body.querySelector(kCurSelector += '>*|*')) curDepth++;
curDepth = Math.pow(2, Math.ceil(Math.log2(startDepth))),
var parentsTMP = new Array(curDepth);
function getAllParentNodes(Ele){
var curPos = curDepth;
if (Ele instanceof Node)
while (Ele !== document){
if (curPos === 0){
curPos += curDepth;
parentsTMP.length <<= 1;
parentsTMP.copyWithin(curDepth, 0, curDepth);
curDepth <<= 1;
}
parentsTMP[--curPos] = Ele;
Ele = Ele.parentNode;
}
return retArray.slice(curPos)
}
The browser compatibility for the above function is that it will work in Edge, but not in IE. If you want IE support, then you will need a Array.prototype.copyWithin polyfill.
get all parent nodes of child in javascript array
let selectedTxtElement = document.getElementById("target");
let els = [];
while (selectedTxtElement) {
els.unshift(selectedTxtElement);
selectedTxtElement = selectedTxtElement.parentNode;
}
know more
I have a scenario where I need to split a node up to a given ancestor, e.g.
<strong>hi there, how <em>are <span>you</span> doing</em> today?</strong>
needs to be split into:
<strong>hi there, how <em>are <span>y</span></em></strong>
and
<strong><em><span>ou</span> doing</em> today?</strong>
How would I go about doing this?
Here is a solution that will work for modern browsers using Range. Something similar could be done for IE < 9 using TextRange, but I use Linux so I don't have easy access to those browsers. I wasn't sure what you wanted the function to do, return the nodes or just do a replace inline. I just took a guess and did the replace inline.
function splitNode(node, offset, limit) {
var parent = limit.parentNode;
var parentOffset = getNodeIndex(parent, limit);
var doc = node.ownerDocument;
var leftRange = doc.createRange();
leftRange.setStart(parent, parentOffset);
leftRange.setEnd(node, offset);
var left = leftRange.extractContents();
parent.insertBefore(left, limit);
}
function getNodeIndex(parent, node) {
var index = parent.childNodes.length;
while (index--) {
if (node === parent.childNodes[index]) {
break;
}
}
return index;
}
Demo: jsbin
It expects a TextNode for node, although it will work with an Element; the offset will just function differently based on the behavior of Range.setStart
See the method Text.splitText.
Not sure if this helps you, but this is what I came up with...
Pass the function an element and a node tag name string you wish to move up to.
<strong>hi there, how <em>are <span id="span">you</span> doing</em> today?</strong>
<script type="text/javascript">
function findParentNode(element,tagName){
tagName = tagName.toUpperCase();
var parentNode = element.parentNode;
if (parentNode.tagName == tagName){
//Erase data up to and including the node name we passed
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
parentNode.firstChild.data = '';
return parentNode;
}
else{
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
//Erase the first child's data (the first text node and leave the other nodes intact)
parentNode.firstChild.data = '';
//Move up chain of parents to find the tag we want. Return the results so we can do things with it after
return findParentNode(parentNode, tagName)
}
}
var ourNode = document.getElementById("span");
alert(findParentNode(ourNode,'strong').innerHTML);
</script>