Replace text in the middle of a TextNode with an element - javascript

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.

To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});

Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

Related

how to replace all matching plain text strings in string using javascript (but not tags or attributes)?

imagine this html on a page
<div id="hpl_content_wrap">
<p class="foobar">this is one word and then another word comes in foobar and then more words and then foobar again.</p>
<p>this is a link with foobar in an attribute but only the foobar inside of the link should be replaced.</p>
</div>
using javascript, how to change all 'foobar' words to 'herpderp' without changing any inside of html tags?
ie. only plain text should be changed.
so the successful html changed will be
<div id="hpl_content_wrap">
<p class="foobar">this is one word and then another word comes in herpderp and then more words and then herpderp again.</p>
<p>this is a link with herpderp in an attribute but only the herpderp inside of the link should be replaced. </p>
</div>
Here is what you need to do...
Get a reference to a bunch of elements.
Recursively walk the children, replacing text in text nodes only.
Sorry for the delay, I was sidetracked before I could add the code.
var replaceText = function me(parentNode, find, replace) {
var children = parentNode.childNodes;
for (var i = 0, length = children.length; i < length; i++) {
if (children[i].nodeType == 1) {
me(children[i], find, replace);
} else if (children[i].nodeType == 3) {
children[i].data = children[i].data.replace(find, replace);
}
}
return parentNode;
}
replaceText(document.body, /foobar/g, "herpderp");​​​
jsFiddle.
It's a simple matter of:
identifying all text nodes in the DOM tree,
then replacing all foobar strings in them.
Here's the full code:
// from: https://stackoverflow.com/questions/298750/how-do-i-select-text-nodes-with-jquery
var getTextNodesIn = function (el) {
return $(el).find(":not(iframe)").andSelf().contents().filter(function() {
return this.nodeType == 3;
});
};
var replaceAllText = function (pattern, replacement, root) {
var nodes = getTextNodesIn(root || $('body'))
var re = new RegExp(pattern, 'g')
nodes.each(function (i, e) {
if (e.textContent && e.textContent.indexOf(pattern) != -1) {
e.textContent = e.textContent.replace(re, replacement);
}
});
};
// replace all text nodes in document's body
replaceAllText('foobar', 'herpderp');
// replace all text nodes under element with ID 'someRootElement'
replaceAllText('foobar', 'herpderp', $('#someRootElement'));
Note that I do a precheck on foobar to avoid processing crazy long strings with a regexp. May or may not be a good idea.
If you do not want to use jQuery, but only pure JavaScript, follow the link in the code snippet ( How do I select text nodes with jQuery? ) where you'll also find a JS only version to fetch nodes. You'd then simply iterate over the returned elements in a similar fashion.

Creating a regex that ignores everything inside <code> blocks

I'm adding emoticons to user input with:
function emoticons(html){
for(var emoticon in emotes){
for(var i = 0; i < emotes[emoticon].length; i++){
// Escape bad characters like )
var r = RegExp.escape(emotes[emoticon][i]);
// Set the regex up to replace all matches
r_escaped = new RegExp(r, "g");
// Replace the emote with the image
html = html.replace(r_escaped,"<img src=\""+icon_folder+"/face-"+emoticon+".png\" class=\"emoticonimg\" />");
}
}
return html;
}
The problem is sometimes the user input is in <code>xxx</code> blocks. Is there a way to get the emoticons function to ignore everything inside the code blocks if they exists. As they won't always exist?
Thanks
In order to do this easily, I'd work only with text nodes (not serialised HTML) and skip code elements.
You tagged it jquery, so there is some jQuery convenience code to ease cross browser issues with utility functions. It is very easily modified to work without jQuery, however.
var searchText = function(parentNode, regex, callback, skipElements) {
skipElements = skipElements || ['script', 'style'];
var node = parentNode.firstChild;
do {
if (node.nodeType == 1) {
var tag = node.tagName.toLowerCase();
if (~$.inArray(tag, skipElements)) {
continue;
}
searchText.call(this, node, regex, callback);
} else if (node.nodeType == 3) {
while (true) {
// Does this node have a match? If not, break and return.
if (!regex.test(node.data)) {
break;
}
node.data.replace(regex, function(match) {
var args = $.makeArray(arguments),
offset = args[args.length - 2],
newTextNode = node.splitText(offset);
callback.apply(window, [node].concat(args));
newTextNode.data = newTextNode.data.substr(match.length);
node = newTextNode;
});
}
}
} while (node = node.nextSibling);
};
searchText($('body')[0], /:\)/, function(node, match) {
var img = $('<img />')[0];
img.src = 'http://www.gravatar.com/avatar/80200e1488ab252197b7f0f51ae230ef?s=32&d=identicon&r=PG';
img.alt = match;
node.parentNode.insertBefore(img, node.nextSibling);
}, ['code']);
jsFiddle.
I wrote this function recently, it should do what you hope to achieve.

Javascript to replace text in Gmail message (Chrome Extension)

I'm trying to write a Chrome Extension which will replace a string of text in Gmail messages.
If I open a Gmail message and click the print button to see it in print view and run the extension it works well and replaces all instances of the string.
However, if I run the extension in the normal Gmail discussion view it causes the page to refresh and it does not actually replace any of the text.
Here is the code I'm using. Any ideas what I'm doing wrong?
function doIt(){
findAndReplace("Father","Mother");
}
var haystackText = "";
function findAndReplace(needle, replacement) {
if (haystackText.length == 0) {
haystackText = document.body.innerHTML;
}
var match = new RegExp(needle, "ig");
var replaced = "";
if (replacement.length > 0) {
replaced = haystackText.replace(match, replacement);
document.body.innerHTML = replaced;
}
}
doIt();
Update:
Here is the updated code I'm using after Felix Kling's help. Similar issue to the one above, it works in print preview but not in the regular Gmail view. It does not however reload the page.
function findAndReplace(root, needle, replacement) {
var children = root.childNodes;
var pattern = new RegExp(needle, 'ig');
var node;
for(var i = 0, l = children.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
function doIt(){
findAndReplace(document,"Father","Mother");
}
doIt();
It's because you are changing the whole document. You are basically destroying and creating every element of the page.
document.body.innerHTML = replaced;
inserts every element of the page anew, so all JavaScript code is executed again etc.
You should only change those text nodes that contain the text you are looking for. That means you have to recursively traverse the DOM (not the whole, start at the message list), test the node type and value and perform the necessary actions.
Update: Example
function findAndReplace(root, needle, replacement) {
var children = root.childNodes,
pattern = new RegExp(needle, 'ig');
node;
for(var i = 0, l = childNodes.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
This is a simple example. I actually don't know how well it performs, but I think you have no other choice. Further things to consider are e.g. escaping special regular expressions characters in the search string.
To learn more about JavaScript and DOM, I recommend to read:
MDN - JavaScript Guide
MDN - DOM reference (espcially the introduction)

Splitting node content in JavaScript DOM

I have a scenario where I need to split a node up to a given ancestor, e.g.
<strong>hi there, how <em>are <span>you</span> doing</em> today?</strong>
needs to be split into:
<strong>hi there, how <em>are <span>y</span></em></strong>
and
<strong><em><span>ou</span> doing</em> today?</strong>
How would I go about doing this?
Here is a solution that will work for modern browsers using Range. Something similar could be done for IE < 9 using TextRange, but I use Linux so I don't have easy access to those browsers. I wasn't sure what you wanted the function to do, return the nodes or just do a replace inline. I just took a guess and did the replace inline.
function splitNode(node, offset, limit) {
var parent = limit.parentNode;
var parentOffset = getNodeIndex(parent, limit);
var doc = node.ownerDocument;
var leftRange = doc.createRange();
leftRange.setStart(parent, parentOffset);
leftRange.setEnd(node, offset);
var left = leftRange.extractContents();
parent.insertBefore(left, limit);
}
function getNodeIndex(parent, node) {
var index = parent.childNodes.length;
while (index--) {
if (node === parent.childNodes[index]) {
break;
}
}
return index;
}
Demo: jsbin
It expects a TextNode for node, although it will work with an Element; the offset will just function differently based on the behavior of Range.setStart
See the method Text.splitText.
Not sure if this helps you, but this is what I came up with...
Pass the function an element and a node tag name string you wish to move up to.
<strong>hi there, how <em>are <span id="span">you</span> doing</em> today?</strong>
<script type="text/javascript">
function findParentNode(element,tagName){
tagName = tagName.toUpperCase();
var parentNode = element.parentNode;
if (parentNode.tagName == tagName){
//Erase data up to and including the node name we passed
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
parentNode.firstChild.data = '';
return parentNode;
}
else{
console.log('Removing node: '+parentNode.tagName+' DATA: '+parentNode.firstChild.data);
//Erase the first child's data (the first text node and leave the other nodes intact)
parentNode.firstChild.data = '';
//Move up chain of parents to find the tag we want. Return the results so we can do things with it after
return findParentNode(parentNode, tagName)
}
}
var ourNode = document.getElementById("span");
alert(findParentNode(ourNode,'strong').innerHTML);
</script>

What's the best way to strip out only the anchor HTML tags in javascript, given a string of html?

Let's say there's a string of HTML, with script tags, plain text, whatever.
What's the best way to strip out only the <a> tags?
I've been using some methods here, but these are for all tags. Strip HTML from Text JavaScript
Using jQuery:
var content = $('<div>' + htmlString + '</div>');
content.find('a').replaceWith(function() { return this.childNodes; });
var newHtml = content.html();
Adding a wrapping <div> tag allows us to get the desired HTML back.
I wrote a more detailed explanation on my blog.
This approach will preserve existing DOM nodes, minimizing side-effects if you have elements within the anchors that have events attached to them.
function unwrapAnchors() {
if(!('tagName' in this) || this.tagName.toLowerCase() != 'a' || !('parentNode' in this)) {
return;
}
var childNodes = this.childNodes || [], children = [], child;
// Convert childNodes collection to array
for(var i = 0, childNodes = this.childNodes || []; i < childNodes.length; i++) {
children[i] = childNodes[i];
}
// Move children outside element
for(i = 0; i < children.length; i++) {
child = children[i];
if(('tagName' in child) && child.tagName.toLowerCase() == 'a') {
child.parentNode.removeChild(child);
} else {
this.parentNode.insertBefore(child, this);
}
}
// Remove now-empty anchor
this.parentNode.removeChild(this);
}
To use (with jQuery):
$('a').each(unwrapAnchors);
To use (without jQuery):
var a = document.getElementsByTagName('a');
while(a.length) {
unwrapAnchors.call(a[a.length - 1]);
}
A <a> tag is not supposed to hold any other <a> tag, so a simple ungreedy regexp would do the trick (i.e. string.match(/<a>(.*?)<\/a>/), but this example suppose the tags have no attribute).
Here's a native (non-library) solution if performance is a concern.
function stripTag(str, tag) {
var a, parent, div = document.createElement('div');
div.innerHTML = str;
a = div.getElementsByTagName( tag );
while( a[0] ) {
parent = a[0].parentNode;
while (a[0].firstChild) {
parent.insertBefore(a[0].firstChild, a[0]);
}
parent.removeChild(a[0]);
}
return div.innerHTML;
}
Use it like this:
alert( stripTag( my_string, 'a' ) );

Categories

Resources