getElementsByTagName() equivalent for textNodes - javascript

Is there any way to get the collection of all textNode objects within a document?
getElementsByTagName() works great for Elements, but textNodes are not Elements.
Update: I realize this can be accomplished by walking the DOM - as many below suggest. I know how to write a DOM-walker function that looks at every node in the document. I was hoping there was some browser-native way to do it. After all it's a little strange that I can get all the <input>s with a single built-in call, but not all textNodes.

Update:
I have outlined some basic performance tests for each of these 6 methods over 1000 runs. getElementsByTagName is the fastest but it does a half-assed job, as it does not select all elements, but only one particular type of tag ( i think p) and blindly assumes that its firstChild is a text element. It might be little flawed but its there for demonstration purpose and comparing its performance to TreeWalker. Run the tests yourselves on jsfiddle to see the results.
Using a TreeWalker
Custom Iterative Traversal
Custom Recursive Traversal
Xpath query
querySelectorAll
getElementsByTagName
Let's assume for a moment that there is a method that allows you to get all Text nodes natively. You would still have to traverse each resulting text node and call node.nodeValue to get the actual text as you would do with any DOM Node. So the issue of performance is not with iterating through text nodes, but iterating through all nodes that are not text and checking their type. I would argue (based on the results) that TreeWalker performs just as fast as getElementsByTagName, if not faster (even with getElementsByTagName playing handicapped).
Ran each test 1000 times.
Method Total ms Average ms
--------------------------------------------------
document.TreeWalker 301 0.301
Iterative Traverser 769 0.769
Recursive Traverser 7352 7.352
XPath query 1849 1.849
querySelectorAll 1725 1.725
getElementsByTagName 212 0.212
Source for each method:
TreeWalker
function nativeTreeWalker() {
var walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);
var node;
var textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node.nodeValue);
}
}
Recursive Tree Traversal
function customRecursiveTreeWalker() {
var result = [];
(function findTextNodes(current) {
for(var i = 0; i < current.childNodes.length; i++) {
var child = current.childNodes[i];
if(child.nodeType == 3) {
result.push(child.nodeValue);
}
else {
findTextNodes(child);
}
}
})(document.body);
}
Iterative Tree Traversal
function customIterativeTreeWalker() {
var result = [];
var root = document.body;
var node = root.childNodes[0];
while(node != null) {
if(node.nodeType == 3) { /* Fixed a bug here. Thanks #theazureshadow */
result.push(node.nodeValue);
}
if(node.hasChildNodes()) {
node = node.firstChild;
}
else {
while(node.nextSibling == null && node != root) {
node = node.parentNode;
}
node = node.nextSibling;
}
}
}
querySelectorAll
function nativeSelector() {
var elements = document.querySelectorAll("body, body *"); /* Fixed a bug here. Thanks #theazureshadow */
var results = [];
var child;
for(var i = 0; i < elements.length; i++) {
child = elements[i].childNodes[0];
if(elements[i].hasChildNodes() && child.nodeType == 3) {
results.push(child.nodeValue);
}
}
}
getElementsByTagName (handicap)
function getElementsByTagName() {
var elements = document.getElementsByTagName("p");
var results = [];
for(var i = 0; i < elements.length; i++) {
results.push(elements[i].childNodes[0].nodeValue);
}
}
XPath
function xpathSelector() {
var xpathResult = document.evaluate(
"//*/text()",
document,
null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE,
null
);
var results = [], res;
while(res = xpathResult.iterateNext()) {
results.push(res.nodeValue); /* Fixed a bug here. Thanks #theazureshadow */
}
}
Also, you might find this discussion helpful - http://bytes.com/topic/javascript/answers/153239-how-do-i-get-elements-text-node

Here's a modern Iterator version of the fastest TreeWalker method:
function getTextNodesIterator(el) { // Returns an iterable TreeWalker
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
walker[Symbol.iterator] = () => ({
next() {
const value = walker.nextNode();
return {value, done: !value};
}
});
return walker;
}
Usage:
for (const textNode of getTextNodesIterator(document.body)) {
console.log(textNode)
}
Safer version
Using the iterator directly might get stuck if you move the nodes around while looping. This is safer, it returns an array:
function getTextNodes(el) { // Returns an array of Text nodes
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
const nodes = [];
while (walker.nextNode()) {
nodes.push(walker.currentNode);
}
return nodes;
}

I know you specifically asked for a collection, but if you just meant that informally and didn't care if they were all joined together into one big string, you can use:
var allTextAsString = document.documentElement.textContent || document.documentElement.innerText;
...with the first item being the DOM3 standard approach. Note however that innerText appears to exclude script or style tag contents in implementations that support it (at least IE and Chrome) while textContent includes them (in Firefox and Chrome).

Here's an alternative that's a bit more idiomatic and (hopefully) easier to understand.
function getText(node) {
// recurse into each child node
if (node.hasChildNodes()) {
node.childNodes.forEach(getText);
}
// get content of each non-empty text node
else if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent.trim();
if (text) {
console.log(text); // do something
}
}
}

after createTreeWalker is deprecated you can use
/**
* Get all text nodes under an element
* #param {!Element} el
* #return {Array<!Node>}
*/
function getTextNodes(el) {
const iterator = document.createNodeIterator(el, NodeFilter.SHOW_TEXT);
const textNodes = [];
let currentTextNode;
while ((currentTextNode = iterator.nextNode())) {
textNodes.push(currentTextNode);
}
return textNodes;
}

document.deepText= function(hoo, fun){
var A= [], tem;
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
if(typeof fun== 'function'){
tem= fun(hoo);
if(tem!= undefined) A[A.length]= tem;
}
else A[A.length]= hoo;
}
else A= A.concat(document.deepText(hoo, fun));
hoo= hoo.nextSibling;
}
}
return A;
}
/*
You can return an array of all the descendant text nodes of some parent element,
or you can pass it some function and do something (find or replace or whatever)
to the text in place.
This example returns the text of the non-whitespace textnodes in the body:
var A= document.deepText(document.body, function(t){
var tem= t.data;
return /\S/.test(tem)? tem: undefined;
});
alert(A.join('\n'))
*/
Handy for search and replace, highlighting and so on

var el1 = document.childNodes[0]
function get(node,ob)
{
ob = ob || {};
if(node.childElementCount)
{
ob[node.nodeName] = {}
ob[node.nodeName]["text"] = [];
for(var x = 0; x < node.childNodes.length;x++)
{
if(node.childNodes[x].nodeType == 3)
{
var txt = node.childNodes[x].nodeValue;
ob[node.nodeName]["text"].push(txt)
continue
}
get(node.childNodes[x],ob[node.nodeName])
};
}
else
{
ob[node.nodeName] = (node.childNodes[0] == undefined ? null :node.childNodes[0].nodeValue )
}
return ob
}
var o = get(el1)
console.log(o)

Related

Are there faster ways for collecting all text nodes than using NodeIterator?

The goal is to collect all text nodes that are direct or indirect ancestors of the body element that meet certain criteria.
The problem is that collecting nodes via the method nextNode of a NodeIterator object seems really slow.
I did some performance tests, the first group with a NodeIterator that accepts all text nodes, in the second group nodes are accepted only if 3 criteria are being met.
The first group is about 3 times faster than the second one - but it is still too slow for my purpose.
const iterator = window.document.createNodeIterator(
document.body,
NodeFilter.SHOW_TEXT,
{
acceptNode(node) {
return (satisfies1(node) && satisfies2(node) && satisfies3(node))
}
},
false);
const matchingTextNodes = [];
let n;
while (n = iterator.nextNode()) matchingTextNodes.push(n);
Are there faster alternatives to NodeIterators for this szenario?
I'm only interested in text nodes.
Update: Manually traversing the DOM seems slightly faster, still not a sufficient improvement thogh:
const matchingTextNodes = [];
function collect(el) {
const children = el.childNodes;
let c = children.length, child;
for (let i = 0; i < c; i++) {
child = children[i];
if (satisfies1(child) && satisfies2(child) && satisfies3(child)) {
matchingTextNodes.push(child);
} else {
collect(child);
}
}
}
collect(document.body);

How to write Javascript to search nodes - without getElementsByClassName

I'm very new at recursion, and have been tasked with writing getElementsByClassName in JavaScript without libraries or the DOM API.
There are two matching classes, one of which is in the body tag itself, the other is in a p tag.
The code I wrote isn't working, and there must be a better way to do this. Your insight would be greatly appreciated.
var elemByClass = function(className) {
var result = [];
var nodes = document.body; //<body> is a node w/className, it needs to check itself.
var childNodes = document.body.childNodes; //then there's a <p> w/className
var goFetchClass = function(nodes) {
for (var i = 0; i <= nodes; i++) { // check the parent
if (nodes.classList == className) {
result.push(i);
console.log(result);
}
for (var j = 0; j <= childNodes; j++) { // check the children
if (childNodes.classList == className) {
result.push(j);
console.log(result);
}
goFetchClass(nodes); // recursion for childNodes
}
goFetchClass(nodes); // recursion for nodes (body)
}
return result;
};
};
There are some errors, mostly logical, in your code, here's what it should have looked like
var elemByClass = function(className) {
var result = [];
var pattern = new RegExp("(^|\\s)" + className + "(\\s|$)");
(function goFetchClass(nodes) {
for (var i = 0; i < nodes.length; i++) {
if ( pattern.test(nodes[i].className) ) {
result.push(nodes[i]);
}
goFetchClass(nodes[i].children);
}
})([document.body]);
return result;
};
Note the use of a regex instead of classList, as it makes no sense to use classList which is IE10+ to polyfill getElementsByClassName
Firstly, you'd start with the body, and check it's className property.
Then you'd get the children, not the childNodes as the latter includes text-nodes and comments, which can't have classes.
To recursively call the function, you'd pass the children in, and do the same with them, check for a class, get the children of the children, and call the function again, until there are no more children.
Here are some reasons:
goFetchClass needs an initial call after you've defined it - for example, you need a return goFetchClass(nodes) statement at the end of elemByClass function
the line for (var i = 0; i <= nodes; i++) { will not enter the for loop - did you mean i <= nodes.length ?
nodes.classList will return an array of classNames, so a direct equality such as nodes.classList == className will not work. A contains method is better.
Lastly, you may want to reconsider having 2 for loops for the parent and children. Why not have 1 for loop and then call goFetchClass on the children? such as, goFetchClass(nodes[i])?
Hope this helps.

Replace text in the middle of a TextNode with an element

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.
To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});
Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

How to know if there is a link element within the selection

In Javascript, I'd like determine whether an element, say an A element, exists inside a given range/textRange. The aim is to determine if the user's current selection contains a link. I am building a rich text editor control.
The range object has a commonAncestorContainer (W3C) or parentElement() (Microsoft) method which returns the closest common anscestor of all elements in the range. However, looking inside this element for A elements won't work, since this common ancestor may also contain elements that aren't in the range, since the range can start or end part way through a parent.
How would you achieve this?
How about selection.containsNode?
https://developer.mozilla.org/en/DOM/Selection/containsNode
something like:
var selection = window.getSelection();
var range = selection.getRangeAt(0);
var result = $('a', range.commonAncestorContainer).filter(function() {
return selection.containsNode(this);
});
console.log(result);
I ended up going with a solution like this:
var findinselection = function(tagname, container) {
var
i, len, el,
rng = getrange(),
comprng,
selparent;
if (rng) {
selparent = rng.commonAncestorContainer || rng.parentElement();
// Look for an element *around* the selected range
for (el = selparent; el !== container; el = el.parentNode) {
if (el.tagName && el.tagName.toLowerCase() === tagname) {
return el;
}
}
// Look for an element *within* the selected range
if (!rng.collapsed && (rng.text === undefined || rng.text) &&
selparent.getElementsByTagName) {
el = selparent.getElementsByTagName(tagname);
comprng = document.createRange ?
document.createRange() : document.body.createTextRange();
for (i = 0, len = el.length; i < len; i++) {
// determine if element el[i] is within the range
if (document.createRange) { // w3c
comprng.selectNodeContents(el[i]);
if (rng.compareBoundaryPoints(Range.END_TO_START, comprng) < 0 &&
rng.compareBoundaryPoints(Range.START_TO_END, comprng) > 0) {
return el[i];
}
}
else { // microsoft
comprng.moveToElementText(el[i]);
if (rng.compareEndPoints("StartToEnd", comprng) < 0 &&
rng.compareEndPoints("EndToStart", comprng) > 0) {
return el[i];
}
}
}
}
}
};
Where getrange() is another function of mine to get the current selection as a range object.
To use, call it like
var link = findselection('a', editor);
Where editor is the contenteditable element, or body in a designmode iframe.
This is a bit of a pain in the bum to do cross-browser. You could use my Rangy library, which is probably overkill for just this task but does make it more straightforward and works in all major browsers. The following code assumes only one Range is selected:
var sel = rangy.getSelection();
if (sel.rangeCount) {
var range = sel.getRangeAt(0);
var links = range.getNodes([1], function(node) {
return node.tagName.toLowerCase() == "a" && range.containsNode(node);
});
}
I'm using this code that works with IE / Chrome / FF: (I'm using it to select rows <tr> in a table)
// yourLink is the DOM element you want to check
var selection = window.getSelection().getRangeAt(0)
var node = document.createRange()
node.selectNode(yourLink)
var s2s = selection.compareBoundaryPoints(Range.START_TO_END, node)
var s2e = selection.compareBoundaryPoints(Range.START_TO_START, node)
var e2s = selection.compareBoundaryPoints(Range.END_TO_START, node)
var e2e = selection.compareBoundaryPoints(Range.END_TO_END, node)
if ((s2s != s2e) || (e2s != e2e) || (s2s!=e2e))
console.log("your node is inside selection")
In the case of the range on the searched element, what I wrote is useful. (But only for that case!)
First I wrote a function that returns the Node found in the range: getNodeFromRange(rangeObject). Using this function it was already easy to write the function that returns the desired Node: findTagInRange(tagName).
function getNodeFromRange(range) {
if(range.endContainer.nodeType==Node.ELEMENT_NODE) {
return range.endContainer;
}
if(range.endContainer.nodeType==Node.TEXT_NODE) {
return range.endContainer.parentNode;
}
else {
// the 'startContainer' it isn't on an Element (<p>, <div>, etc...)
return;
}
}
function findTagInRange(tagName, range) {
var node = getNodeFromRange(range);
if(node && typeof(node.tagName)!='undefiend' && node.tagName.toLowerCase()==tagName.toLowerCase()) {
return $(node);
}
return;
}
And then I can use it as follows:
var link = findTagInRange('A', range);
And I see the determination of the range you've already solved. :)

How do I make this loop all children recursively?

I have the following:
for (var i = 0; i < children.length; i++){
if(hasClass(children[i], "lbExclude")){
children[i].parentNode.removeChild(children[i]);
}
};
I would like it to loop through all children's children, etc (not just the top level). I found this line, which seems to do that:
for(var m = n.firstChild; m != null; m = m.nextSibling) {
But I'm unclear on how I refer to the current child if I make that switch? I would no longer have i to clarify the index position of the child. Any suggestions?
Thanks!
Update:
I'm now using the following, according to answer suggestions. Is this the correct / most efficient way of doing so?
function removeTest(child) {
if (hasClass(child, "lbExclude")) {
child.parentNode.removeChild(child);
}
}
function allDescendants(node) {
for (var i = 0; i < node.childNodes.length; i++) {
var child = node.childNodes[i];
allDescendants(child);
removeTest(child);
}
}
var children = temp.childNodes;
for (var i = 0; i < children.length; i++) {
allDescendants(children[i]);
};
function allDescendants (node) {
for (var i = 0; i < node.childNodes.length; i++) {
var child = node.childNodes[i];
allDescendants(child);
doSomethingToNode(child);
}
}
You loop over all the children, and for each element, you call the same function and have it loop over the children of that element.
Normally you'd have a function that could be called recursively on all nodes. It really depends on what you want to do to the children. If you simply want to gather all descendants, then element.getElementsByTagName may be a better option.
var all = node.getElementsByTagName('*');
for (var i = -1, l = all.length; ++i < l;) {
removeTest(all[i]);
}
There's no need for calling the 'allDescendants' method on all children, because the method itself already does that. So remove the last codeblock and I think that is a proper solution (á, not thé =])
function removeTest(child){
if(hasClass(child, "lbExclude")){
child.parentNode.removeChild(child);
}
}
function allDescendants (node) {
for (var i = 0; i < node.childNodes.length; i++) {
var child = node.childNodes[i];
allDescendants(child);
removeTest(child);
}
}
var children = allDescendants(temp);
You can use BFS to find all the elements.
function(element) {
// [].slice.call() - HTMLCollection to Array
var children = [].slice.call(element.children), found = 0;
while (children.length > found) {
children = children.concat([].slice.call(children[found].children));
found++;
}
return children;
};
This function returns all the children's children of the element.
The most clear-cut way to do it in modern browsers or with babel is this. Say you have an HTML node $node whose children you want to recurse over.
Array.prototype.forEach.call($node.querySelectorAll("*"), function(node) {
doSomethingWith(node);
});
The querySelectorAll('*') on any DOM node would give you all the child nodes of the element in a NodeList. NodeList is an array-like object, so you can use the Array.prototype.forEach.call to iterate over this list, processing each child one-by-one within the callback.
If you have jquery and you want to get all descendant elements you can use:
var all_children= $(parent_element).find('*');
Just be aware that all_children is an HTML collection and not an array. They behave similarly when you're just looping, but collection doesn't have a lot of the useful Array.prototype methods you might otherwise enjoy.
if items are being created in a loop you should leave a index via id="" data-name or some thing. You can then index them directly which will be faster for most functions such as (!-F). Works pretty well for 1024 bits x 100 items depending on what your doing.
if ( document.getElementById( cid ) ) {
return;
} else {
what you actually want
}
this will be faster in most cases once the items have already been loaded. only scrub the page on reload or secure domain transfers / logins / cors any else and your doing some thing twice.
If you use a js library it's as simple as this:
$('.lbExclude').remove();
Otherwise if you want to acquire all elements under a node you can collect them all natively:
var nodes = node.getElementsByTagName('*');
for (var i = 0; i < nodes.length; i++) {
var n = nodes[i];
if (hasClass(n, 'lbExclude')) {
node.parentNode.removeChild(node);
}
}
To get all descendants as an array, use this:
function getAllDescendants(node) {
var all = [];
getDescendants(node);
function getDescendants(node) {
for (var i = 0; i < node.childNodes.length; i++) {
var child = node.childNodes[i];
getDescendants(child);
all.push(child);
}
}
return all;
}
TreeNode node = tv.SelectedNode;
while (node.Parent != null)
{
node = node.Parent;
}
CallRecursive(node);
private void CallRecursive(TreeNode treeNode)
{
foreach (TreeNode tn in treeNode.Nodes)
{
//Write whatever code here this function recursively loops through all nodes
CallRecursive(tn);
}
}

Categories

Resources