Getting the full HTML for an Element excluding innerHTML

Getting the full HTML for an Element excluding innerHTML - javascript

Is there a better way to get just the tag information (tagName, classes, styles, other attributes, whether it is empty or not, etc.) without the innerHTML content, with starting and ending tag separated than:
const outer = el.outerHTML
const inner = el.innerHTML
const tag_only = outer.replace(inner, '');
const MATCH_END = /^<([a-zA-Z][a-zA-Z0-9_-]*)\b[^>]*>(<\/\1>)$/;
const match = MATCH_END.exec(tag_only);
if (match === null) { // empty tag, like <input>
return [tag_only, inner, ''];
} else {
const end_tag = match[2];
const start_tag = tag_only.replace(end_tag, '');
return [start_tag, inner, end_tag];
}
This works, but it does not seem particularly efficient, requiring two calls to query the DOM, two replace calls, and a regular expression search (ugg) to get back some information that the browser/DOM already has separately.
(FWIW, I'm working on an Element/Node processor that needs to walk all childNodes, changing some, before reconstructing mostly the original HTML, so I'm going to need to recursively call this function a lot and it would be good for speed to have a faster way)

methods like innerHTML, outerHTML are expensive since they parse the whole element tree on which they are called, building the DOM tree like this is exponentially expensive, so they should be avoided in performant applications,
in fact a seemingly okay childNodes is expensive too,
so for maximum performance you shoud build the tree node-by-node.
Below is a possible solution for your case:
const collect = function (el) {
const inner = [];
if (el && (el.nodeType === Node.ELEMENT_NODE
|| el.nodeType === Node.TEXT_NODE)) {
let clone = el.cloneNode();
clone.setAttribute?.('data-clone', clone.tagName);
let tag_only = clone.outerHTML;
let elm;
const MATCH_END = /^<([a-zA-Z][a-zA-Z0-9_-]*)\b[^>]*>(<\/\1>)$/;
const match = MATCH_END.exec(tag_only);
if (match === null) { // empty tag, like <input>
elm = [tag_only, inner, ''];
} else {
const end_tag = match[2];
const start_tag = tag_only.replace(end_tag, '');
elm = [start_tag, inner, end_tag];
}
this.push(elm);
}
el = el.firstChild;
while (el) {
collect.call(inner, el);
el = el.nextSibling;
}
return this;
};
console.log(collect.call([], document.body).flat(Infinity).join(''));
<div data-id="a" class="b">
<input type="text">
<div data-id="c" class="d">
<input type="text"/>
<div data-id="e" class="f">
<input type="text"/>
</div>
</div>
</div>

Related

JS: replacing all occurrences of a word in html with <span> element ONLY for p, span & divs. Not working if parent node contains the word

I have this html:
<div>
hello world
<p>
the world is round
<img src="domain.com/world.jpg">
</p>
</div>
And want to replace the word "world" (or mixed case variants thereof) with <span style='color:red;'>BARFOO</span> but only in <p>, <div> and a few other specific elements.
In the following code, it changes the text in the <div>, but not in the <p>. A replace operation is done (on something), but does not show up in the browser's html.
If I just supply p to querySelectorAll, then repeat again for <div>, it works fine.
I am thinking that once the code processes the <div> and finds that it has a child element(s), when that element(s) is put back into the html string, then the element reference for the <p> is lost.
jsfiddle is set up here https://jsfiddle.net/limeygent/t5q8ch23/12/ with more debug statements.
Any thoughts on what is happening & how to fix? (js only solution please)
var newspan = "<span style='color:red;'>BOOFAR</span>";
var regExNameSearch = new RegExp('World','gi');
var lc= 'World'.toLowerCase();
const elements = Array.from(document.querySelectorAll('p, span, div, strong, h1, h2, h3, h4')).filter(
(element) => {
for (let child of element.childNodes) {
if (child.nodeType === Node.TEXT_NODE && child.textContent.toLowerCase().includes(lc)) {
console.log('found ' + child.textContent);
let parent = child.parentNode;
let html = parent.innerHTML;
// Find all the child elements in the element
var excludeElements = parent.querySelectorAll('*');
if (excludeElements.length == 0){
console.log('no child elements');
parent.innerHTML = parent.innerHTML.replace(regExNameSearch, newspan);
// (also tried this) parent.innerHTML = html;
}else{
// Replace the text of each child element with placeholder
excludeElements.forEach(excludeElement => {
console.log('phase 1 - replacing - BEFORE');
html = html.replace(excludeElement.outerHTML, 'FOOBAR');
console.log('phase 1 - replacing - AFTER');
});
html = html.replace(regExNameSearch, newspan);
// Replace the text of each child element back to its original HTML
excludeElements.forEach(excludeElement => {
console.log('phase 2 - replacing - BEFORE:');
html = html.replace('FOOBAR', excludeElement.outerHTML);
console.log('phase 2 - replacing - AFTER:');
});
// Update the element's innerHTML with the updated HTML
parent.innerHTML = html;
}
return true;
}
}
return false;
}
);
edit: if you supply an answer recc. editing the innerHTML, make sure it doesn't affect any child nodes. The code I present here got super complex because I had to avoid editing anything further inside the node.
Oh, and if you present reccs from chatGPT (while it can be useful), please test what you post first ;-)

You can use the TreeWalker API to achieve the desired results.
The essential logic is this:
Iterate text nodes that meet the specified criteria: the text content matches the case-insensitive regular expression pattern and the node is the direct child (or, if desired, a descendant) of an element that matches your selector.
For each matched text node: remove it from its parent, but first split the node's text content on the regular expression pattern, and for each resulting string:
If it is non-empty, re-insert it into the parent node (just before the matched node) as a new text node. Before each string (except the first): create a copy of your substitute <span> node and insert it as well.
TS Playground
function assert (expr: unknown, msg?: string): asserts expr {
if (!expr) throw new Error(msg);
}
function createTextNodeFilterFn (regexp: RegExp, ancestorSelector: string): (textNode: Text) => number {
return ((textNode: Text): number => {
if (!(
textNode.textContent
&& regexp.test(textNode.textContent)
)) return NodeFilter.FILTER_REJECT;
// To find any matching ancestor (not just the direct parent):
// const valid = Boolean(textNode.parentElement?.closest(ancestorSelector));
const valid = textNode.parentElement?.matches(ancestorSelector);
if (valid) return NodeFilter.FILTER_ACCEPT;
return NodeFilter.FILTER_REJECT;
});
}
function createSubstituteNode (): HTMLSpanElement {
const span = document.createElement("span");
span.textContent = "BARFOO";
span.style.setProperty("color", "red");
return span;
}
function transformTextNode (node: Node, regexp: RegExp): void {
const {parentNode, textContent} = node;
assert(parentNode, "Parent node not found");
assert(textContent, "Text content not found");
const iter = textContent.split(regexp)[Symbol.iterator]();
const firstResult = iter.next();
if (firstResult.done) return;
if (firstResult.value.length > 0) {
parentNode.insertBefore(new Text(firstResult.value), node);
}
for (const str of iter) {
parentNode.insertBefore(createSubstituteNode(), node);
if (str.length === 0) continue;
parentNode.insertBefore(new Text(str), node);
}
parentNode.removeChild(node);
}
function main () {
const TARGET_REGEXP = /world/i;
const TARGET_SELECTOR = "div, h1, h2, h3, h4, p, span, strong";
const tw = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
{acceptNode: createTextNodeFilterFn(TARGET_REGEXP, TARGET_SELECTOR)},
);
let node = tw.nextNode();
while (node) {
// Advance the TreeWalker's iterator state before mutating the current node:
const memo = node;
node = tw.nextNode();
transformTextNode(memo, TARGET_REGEXP);
}
}
main();
The TS code above, compiled to plain JavaScript in a runnable snippet:
"use strict";
function assert(expr, msg) {
if (!expr)
throw new Error(msg);
}
function createTextNodeFilterFn(regexp, ancestorSelector) {
return ((textNode) => {
if (!(textNode.textContent
&& regexp.test(textNode.textContent)))
return NodeFilter.FILTER_REJECT;
// To find any matching ancestor (not just the direct parent):
// const valid = Boolean(textNode.parentElement?.closest(ancestorSelector));
const valid = textNode.parentElement?.matches(ancestorSelector);
if (valid)
return NodeFilter.FILTER_ACCEPT;
return NodeFilter.FILTER_REJECT;
});
}
function createSubstituteNode() {
const span = document.createElement("span");
span.textContent = "BARFOO";
span.style.setProperty("color", "red");
return span;
}
function transformTextNode(node, regexp) {
const { parentNode, textContent } = node;
assert(parentNode, "Parent node not found");
assert(textContent, "Text content not found");
const iter = textContent.split(regexp)[Symbol.iterator]();
const firstResult = iter.next();
if (firstResult.done)
return;
if (firstResult.value.length > 0) {
parentNode.insertBefore(new Text(firstResult.value), node);
}
for (const str of iter) {
parentNode.insertBefore(createSubstituteNode(), node);
if (str.length === 0)
continue;
parentNode.insertBefore(new Text(str), node);
}
parentNode.removeChild(node);
}
function main() {
const TARGET_REGEXP = /world/i;
const TARGET_SELECTOR = "div, h1, h2, h3, h4, p, span, strong";
const tw = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, { acceptNode: createTextNodeFilterFn(TARGET_REGEXP, TARGET_SELECTOR) });
let node = tw.nextNode();
while (node) {
// Advance the TreeWalker's iterator state before mutating the current node:
const memo = node;
node = tw.nextNode();
transformTextNode(memo, TARGET_REGEXP);
}
}
main();
<div>
hello world
<p>
the world is round
<img src="domain.com/world.jpg">
</p>
</div>

With the help of a friend, explaining that the nodelist "array" returned by querySelectorAll is static, that explains why nodes were being missed or overwritten. The suggestion was to start at the lowest level of the DOM tree, perform the innerHTML replacement, then work up the tree.
Hat tip to Rob for his explanation: document.querySelectorAll returns a static nodelist which is accurate when the function is called but isn't accurate if the document is changed. Using .innerHTML to make the replacement of "world" deletes and recreates all existing content in the tag including the <p> tag and its contents the <p> tag that is now on the page is a completely new one that isn't referenced by the node returned by document.querySelectorAll
querySelectorAll returns an "array" (not quite, but the term is used loosely for purposes of this answer) using the depth-first traversal in pre-order operation. Read more here https://en.wikipedia.org/wiki/Tree_traversal for tree traversal methods.
I needed to start at the lowest levels of the node arrays so as to not mangle any references to child nodes.
Here is the change:
(old)
const elements = Array.from(document.querySelectorAll('p, span, div, strong, h1, h2, h3, h4')).filter(
(new)
const elements = Array.from(document.querySelectorAll('p, span, div, strong, h1, h2, h3, h4')).reverse().filter(
On the sample html code in this question, and some other variations, it works fine. I'll continue to test further.
Comments / pitfalls welcomed.
New fiddle https://jsfiddle.net/9vwo6a3q/

How to ignore any element in Javascript? [duplicate]

<div class="title">
I am text node
<a class="edit">Edit</a>
</div>
I wish to get the "I am text node", do not wish to remove the "edit" tag, and need a cross browser solution.

var text = $(".title").contents().filter(function() {
return this.nodeType == Node.TEXT_NODE;
}).text();
This gets the contents of the selected element, and applies a filter function to it. The filter function returns only text nodes (i.e. those nodes with nodeType == Node.TEXT_NODE).

You can get the nodeValue of the first childNode using
$('.title')[0].childNodes[0].nodeValue
http://jsfiddle.net/TU4FB/

Another native JS solution that can be useful for "complex" or deeply nested elements is to use NodeIterator. Put NodeFilter.SHOW_TEXT as the second argument ("whatToShow"), and iterate over just the text node children of the element.
var root = document.querySelector('p'),
iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT),
textnode;
// print all text nodes
while (textnode = iter.nextNode()) {
console.log(textnode.textContent)
}
<p>
<br>some text<br>123
</p>
You can also use TreeWalker. The difference between the two is that NodeIterator is a simple linear iterator, while TreeWalker allows you to navigate via siblings and ancestors as well.

ES6 version that return the first #text node content
const extract = (node) => {
const text = [...node.childNodes].find(child => child.nodeType === Node.TEXT_NODE);
return text && text.textContent.trim();
}

If you mean get the value of the first text node in the element, this code will work:
var oDiv = document.getElementById("MyDiv");
var firstText = "";
for (var i = 0; i < oDiv.childNodes.length; i++) {
var curNode = oDiv.childNodes[i];
if (curNode.nodeName === "#text") {
firstText = curNode.nodeValue;
break;
}
}
You can see this in action here: http://jsfiddle.net/ZkjZJ/

Pure JavaScript: Minimalist
First off, always keep this in mind when looking for text in the DOM.
MDN - Whitespace in the DOM
This issue will make you pay attention to the structure of your XML / HTML.
In this pure JavaScript example, I account for the possibility of multiple text nodes that could be interleaved with other kinds of nodes. However, initially, I do not pass judgment on whitespace, leaving that filtering task to other code.
In this version, I pass a NodeList in from the calling / client code.
/**
* Gets strings from text nodes. Minimalist. Non-robust. Pre-test loop version.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #param nodeList The child nodes of a Node, as in node.childNodes.
* #param target A positive whole number >= 1
* #return String The text you targeted.
*/
function getText(nodeList, target)
{
var trueTarget = target - 1,
length = nodeList.length; // Because you may have many child nodes.
for (var i = 0; i < length; i++) {
if ((nodeList[i].nodeType === Node.TEXT_NODE) && (i === trueTarget)) {
return nodeList[i].nodeValue; // Done! No need to keep going.
}
}
return null;
}
Of course, by testing node.hasChildNodes() first, there would be no need to use a pre-test for loop.
/**
* Gets strings from text nodes. Minimalist. Non-robust. Post-test loop version.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #param nodeList The child nodes of a Node, as in node.childNodes.
* #param target A positive whole number >= 1
* #return String The text you targeted.
*/
function getText(nodeList, target)
{
var trueTarget = target - 1,
length = nodeList.length,
i = 0;
do {
if ((nodeList[i].nodeType === Node.TEXT_NODE) && (i === trueTarget)) {
return nodeList[i].nodeValue; // Done! No need to keep going.
}
i++;
} while (i < length);
return null;
}
Pure JavaScript: Robust
Here the function getTextById() uses two helper functions: getStringsFromChildren() and filterWhitespaceLines().
getStringsFromChildren()
/**
* Collects strings from child text nodes.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #version 7.0
* #param parentNode An instance of the Node interface, such as an Element. object.
* #return Array of strings, or null.
* #throws TypeError if the parentNode is not a Node object.
*/
function getStringsFromChildren(parentNode)
{
var strings = [],
nodeList,
length,
i = 0;
if (!parentNode instanceof Node) {
throw new TypeError("The parentNode parameter expects an instance of a Node.");
}
if (!parentNode.hasChildNodes()) {
return null; // We are done. Node may resemble <element></element>
}
nodeList = parentNode.childNodes;
length = nodeList.length;
do {
if ((nodeList[i].nodeType === Node.TEXT_NODE)) {
strings.push(nodeList[i].nodeValue);
}
i++;
} while (i < length);
if (strings.length > 0) {
return strings;
}
return null;
}
filterWhitespaceLines()
/**
* Filters an array of strings to remove whitespace lines.
* Generic, cross platform solution.
*
* #author Anthony Rutledge
* #version 6.0
* #param textArray a String associated with the id attribute of an Element.
* #return Array of strings that are not lines of whitespace, or null.
* #throws TypeError if the textArray param is not of type Array.
*/
function filterWhitespaceLines(textArray)
{
var filteredArray = [],
whitespaceLine = /(?:^\s+$)/; // Non-capturing Regular Expression.
if (!textArray instanceof Array) {
throw new TypeError("The textArray parameter expects an instance of a Array.");
}
for (var i = 0; i < textArray.length; i++) {
if (!whitespaceLine.test(textArray[i])) { // If it is not a line of whitespace.
filteredArray.push(textArray[i].trim()); // Trimming here is fine.
}
}
if (filteredArray.length > 0) {
return filteredArray ; // Leave selecting and joining strings for a specific implementation.
}
return null; // No text to return.
}
getTextById()
/**
* Gets strings from text nodes. Robust.
* Generic, cross platform solution.
*
* #author Anthony Rutledge
* #version 6.0
* #param id A String associated with the id property of an Element.
* #return Array of strings, or null.
* #throws TypeError if the id param is not of type String.
* #throws TypeError if the id param cannot be used to find a node by id.
*/
function getTextById(id)
{
var textArray = null; // The hopeful output.
var idDatatype = typeof id; // Only used in an TypeError message.
var node; // The parent node being examined.
try {
if (idDatatype !== "string") {
throw new TypeError("The id argument must be of type String! Got " + idDatatype);
}
node = document.getElementById(id);
if (node === null) {
throw new TypeError("No element found with the id: " + id);
}
textArray = getStringsFromChildren(node);
if (textArray === null) {
return null; // No text nodes found. Example: <element></element>
}
textArray = filterWhitespaceLines(textArray);
if (textArray.length > 0) {
return textArray; // Leave selecting and joining strings for a specific implementation.
}
} catch (e) {
console.log(e.message);
}
return null; // No text to return.
}
Next, the return value (Array, or null) is sent to the client code where it should be handled. Hopefully, the array should have string elements of real text, not lines of whitespace.
Empty strings ("") are not returned because you need a text node to properly indicate the presence of valid text. Returning ("") may give the false impression that a text node exists, leading someone to assume that they can alter the text by changing the value of .nodeValue. This is false, because a text node does not exist in the case of an empty string.
Example 1:
<p id="bio"></p> <!-- There is no text node here. Return null. -->
Example 2:
<p id="bio">
</p> <!-- There are at least two text nodes ("\n"), here. -->
The problem comes in when you want to make your HTML easy to read by spacing it out. Now, even though there is no human readable valid text, there are still text nodes with newline ("\n") characters in their .nodeValue properties.
Humans see examples one and two as functionally equivalent--empty elements waiting to be filled. The DOM is different than human reasoning. This is why the getStringsFromChildren() function must determine if text nodes exist and gather the .nodeValue values into an array.
for (var i = 0; i < length; i++) {
if (nodeList[i].nodeType === Node.TEXT_NODE) {
textNodes.push(nodeList[i].nodeValue);
}
}
In example two, two text nodes do exist and getStringFromChildren() will return the .nodeValue of both of them ("\n"). However, filterWhitespaceLines() uses a regular expression to filter out lines of pure whitespace characters.
Is returning null instead of newline ("\n") characters a form of lying to the client / calling code? In human terms, no. In DOM terms, yes. However, the issue here is getting text, not editing it. There is no human text to return to the calling code.
One can never know how many newline characters might appear in someone's HTML. Creating a counter that looks for the "second" newline character is unreliable. It might not exist.
Of course, further down the line, the issue of editing text in an empty <p></p> element with extra whitespace (example 2) might mean destroying (maybe, skipping) all but one text node between a paragraph's tags to ensure the element contains precisely what it is supposed to display.
Regardless, except for cases where you are doing something extraordinary, you will need a way to determine which text node's .nodeValue property has the true, human readable text that you want to edit. filterWhitespaceLines gets us half way there.
var whitespaceLine = /(?:^\s+$)/; // Non-capturing Regular Expression.
for (var i = 0; i < filteredTextArray.length; i++) {
if (!whitespaceLine.test(textArray[i])) { // If it is not a line of whitespace.
filteredTextArray.push(textArray[i].trim()); // Trimming here is fine.
}
}
At this point you may have output that looks like this:
["Dealing with text nodes is fun.", "Some people just use jQuery."]
There is no guarantee that these two strings are adjacent to each other in the DOM, so joining them with .join() might make an unnatural composite. Instead, in the code that calls getTextById(), you need to chose which string you want to work with.
Test the output.
try {
var strings = getTextById("bio");
if (strings === null) {
// Do something.
} else if (strings.length === 1) {
// Do something with strings[0]
} else { // Could be another else if
// Do something. It all depends on the context.
}
} catch (e) {
console.log(e.message);
}
One could add .trim() inside of getStringsFromChildren() to get rid of leading and trailing whitespace (or to turn a bunch of spaces into a zero length string (""), but how can you know a priori what every application may need to have happen to the text (string) once it is found? You don't, so leave that to a specific implementation, and let getStringsFromChildren() be generic.
There may be times when this level of specificity (the target and such) is not required. That is great. Use a simple solution in those cases. However, a generalized algorithm enables you to accommodate simple and complex situations.

.text() - for jquery
$('.title').clone() //clone the element
.children() //select all the children
.remove() //remove all the children
.end() //again go back to selected element
.text(); //get the text of element

This will ignore the whitespace as well so, your never got the Blank textNodes..code using core Javascript.
var oDiv = document.getElementById("MyDiv");
var firstText = "";
for (var i = 0; i < oDiv.childNodes.length; i++) {
var curNode = oDiv.childNodes[i];
whitespace = /^\s*$/;
if (curNode.nodeName === "#text" && !(whitespace.test(curNode.nodeValue))) {
firstText = curNode.nodeValue;
break;
}
}
Check it on jsfiddle : - http://jsfiddle.net/webx/ZhLep/

Simply via Vanilla JavaScript:
const el = document.querySelector('.title');
const text = el.firstChild.textContent.trim();

You can also use XPath's text() node test to get the text nodes only. For example
var target = document.querySelector('div.title');
var iter = document.evaluate('text()', target, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
var node;
var want = '';
while (node = iter.iterateNext()) {
want += node.data;
}

There are some overcomplicated solutions here but the operation is as straightforward as using .childNodes to get children of all node types and .filter to extract e.nodeType === Node.TEXT_NODEs. Optionally, we may want to do it recursively and/or ignore "empty" text nodes (all whitespace).
These examples convert the nodes to their text content for display purposes, but this is technically a separate step from filtering.
const immediateTextNodes = el =>
[...el.childNodes].filter(e => e.nodeType === Node.TEXT_NODE);
const immediateNonEmptyTextNodes = el =>
[...el.childNodes].filter(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
);
const firstImmediateTextNode = el =>
[...el.childNodes].find(e => e.nodeType === Node.TEXT_NODE);
const firstImmediateNonEmptyTextNode = el =>
[...el.childNodes].find(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
);
// example usage:
const text = el => el.textContent;
const p = document.querySelector("p");
console.log(immediateTextNodes(p).map(text));
console.log(immediateNonEmptyTextNodes(p).map(text));
console.log(text(firstImmediateTextNode(p)));
console.log(text(firstImmediateNonEmptyTextNode(p)));
// if you want to trim whitespace:
console.log(immediateNonEmptyTextNodes(p).map(e => text(e).trim()));
<p>
<span>IGNORE</span>
<b>IGNORE</b>
foo
<br>
bar
</p>
Recursive alternative to a NodeIterator:
const deepTextNodes = el => [...el.childNodes].flatMap(e =>
e.nodeType === Node.TEXT_NODE ? e : deepTextNodes(e)
);
const deepNonEmptyTextNodes = el =>
[...el.childNodes].flatMap(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
? e : deepNonEmptyTextNodes(e)
);
// example usage:
const text = el => el.textContent;
const p = document.querySelector("p");
console.log(deepTextNodes(p).map(text));
console.log(deepNonEmptyTextNodes(p).map(text));
<p>
foo
<span>bar</span>
baz
<span><b>quux</b></span>
</p>
Finally, feel free to join the text node array into a string if you wish using .join(""). But as with trimming and text content extraction, I'd probably not bake this into the core filtering function and leave it to the caller to handle as needed.

Replace text in the middle of a TextNode with an element

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.

To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});

Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

how to get all parent nodes of given element in pure javascript?

I mean an array of them. That is a chain from top HTML to destination element including the element itself.
for example for element <A> it would be:
[HTML, BODY, DIV, DIV, P, SPAN, A]

A little shorter (and safer, since target may not be found):
var a = document.getElementById("target");
var els = [];
while (a) {
els.unshift(a);
a = a.parentNode;
}

You can try something like:
var nodes = [];
var element = document.getElementById('yourelement');
nodes.push(element);
while(element.parentNode) {
nodes.unshift(element.parentNode);
element = element.parentNode;
}

I like this method:
[...(function*(e){do { yield e; } while (e = e.parentNode);})($0)]
... where $0 is your element.
An upside of this method is that it can be used as a value in expressions.
To get an array without the target element:
[...(function*(e){while (e = e.parentNode) { yield e; }})($0)]

You can walk the chain of element.parentNodes until you reach an falsey value, appending to an array as you go:
const getParents = el => {
for (var parents = []; el; el = el.parentNode) {
parents.push(el);
}
return parents;
};
const el = document.querySelector("b");
console.log(getParents(el).reverse().map(e => e.nodeName));
<div><p><span><b>Foo</b></span></div>
Note that reversing is done in the caller because it's not essential to the lineage algorithm. Mapping to e.nodeName is purely for presentation and also non-essential.
Note that this approach means you'll wind up with the document element as the last element in the chain. If you don't want that, you can add && el !== document to the loop stopping condition.
The overall time complexity of the code above is linear and reverse() is in-place, so it doesn't require an extra allocation. unshift in a loop, as some of the other answers recommend, is quadratic and may harm scalability on uncommonly-deep DOM trees in exchange for a negligible gain in elegance.

Another alternative (based on this):
for(var e = document.getElementById("target"),p = [];e && e !== document;e = e.parentNode)
p.push(e);

I believe this will likely be the most performant in the long run in the most scenarios if you are making frequent usage of this function. The reason for why t will be more performant is because it initially checks to see what kind of depths of ancestry it might encounter. Also, instead of creating a new array every time you call it, this function will instead efficiently reuse the same array, and slice it which is very optimized in some browsers. However, since there is no really efficient way I know of to check the maximum depth, I am left with a less efficient query-selector check.
// !IMPORTANT! When moving this coding snippet over to your production code,
// do not run the following depthtest more than once, it is not very performant
var kCurSelector="*|*", curDepth=3;
while (document.body.querySelector(kCurSelector += '>*|*')) curDepth++;
curDepth = Math.pow(2, Math.ceil(Math.log2(startDepth))),
var parentsTMP = new Array(curDepth);
function getAllParentNodes(Ele){
var curPos = curDepth;
if (Ele instanceof Node)
while (Ele !== document){
if (curPos === 0){
curPos += curDepth;
parentsTMP.length <<= 1;
parentsTMP.copyWithin(curDepth, 0, curDepth);
curDepth <<= 1;
}
parentsTMP[--curPos] = Ele;
Ele = Ele.parentNode;
}
return retArray.slice(curPos)
}
The browser compatibility for the above function is that it will work in Edge, but not in IE. If you want IE support, then you will need a Array.prototype.copyWithin polyfill.

get all parent nodes of child in javascript array
let selectedTxtElement = document.getElementById("target");
let els = [];
while (selectedTxtElement) {
els.unshift(selectedTxtElement);
selectedTxtElement = selectedTxtElement.parentNode;
}
know more

Splitting node content in JavaScript DOM

I have a scenario where I need to split a node up to a given ancestor, e.g.
<strong>hi there, how <em>are <span>you</span> doing</em> today?</strong>
needs to be split into:
<strong>hi there, how <em>are <span>y</span></em></strong>
and
<strong><em><span>ou</span> doing</em> today?</strong>
How would I go about doing this?

Here is a solution that will work for modern browsers using Range. Something similar could be done for IE < 9 using TextRange, but I use Linux so I don't have easy access to those browsers. I wasn't sure what you wanted the function to do, return the nodes or just do a replace inline. I just took a guess and did the replace inline.
function splitNode(node, offset, limit) {
var parent = limit.parentNode;
var parentOffset = getNodeIndex(parent, limit);
var doc = node.ownerDocument;
var leftRange = doc.createRange();
leftRange.setStart(parent, parentOffset);
leftRange.setEnd(node, offset);
var left = leftRange.extractContents();
parent.insertBefore(left, limit);
}
function getNodeIndex(parent, node) {
var index = parent.childNodes.length;
while (index--) {
if (node === parent.childNodes[index]) {
break;
}
}
return index;
}
Demo: jsbin
It expects a TextNode for node, although it will work with an Element; the offset will just function differently based on the behavior of Range.setStart

See the method Text.splitText.

Not sure if this helps you, but this is what I came up with...
Pass the function an element and a node tag name string you wish to move up to.
<strong>hi there, how <em>are <span id="span">you</span> doing</em> today?</strong>
<script type="text/javascript">
function findParentNode(element,tagName){
tagName = tagName.toUpperCase();
var parentNode = element.parentNode;
if (parentNode.tagName == tagName){
//Erase data up to and including the node name we passed
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
parentNode.firstChild.data = '';
return parentNode;
}
else{
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
//Erase the first child's data (the first text node and leave the other nodes intact)
parentNode.firstChild.data = '';
//Move up chain of parents to find the tag we want. Return the results so we can do things with it after
return findParentNode(parentNode, tagName)
}
}
var ourNode = document.getElementById("span");
alert(findParentNode(ourNode,'strong').innerHTML);
</script>

Develop Reference

JavaScript is the programming language of the Web.

Getting the full HTML for an Element excluding innerHTML - javascript

Related

JS: replacing all occurrences of a word in html with <span> element ONLY for p, span & divs. Not working if parent node contains the word

How to ignore any element in Javascript? [duplicate]

Replace text in the middle of a TextNode with an element

how to get all parent nodes of given element in pure javascript?

Splitting node content in JavaScript DOM

Categories

Resources