Creating a regex that ignores everything inside <code> blocks - javascript

I'm adding emoticons to user input with:
function emoticons(html){
for(var emoticon in emotes){
for(var i = 0; i < emotes[emoticon].length; i++){
// Escape bad characters like )
var r = RegExp.escape(emotes[emoticon][i]);
// Set the regex up to replace all matches
r_escaped = new RegExp(r, "g");
// Replace the emote with the image
html = html.replace(r_escaped,"<img src=\""+icon_folder+"/face-"+emoticon+".png\" class=\"emoticonimg\" />");
}
}
return html;
}
The problem is sometimes the user input is in <code>xxx</code> blocks. Is there a way to get the emoticons function to ignore everything inside the code blocks if they exists. As they won't always exist?
Thanks

In order to do this easily, I'd work only with text nodes (not serialised HTML) and skip code elements.
You tagged it jquery, so there is some jQuery convenience code to ease cross browser issues with utility functions. It is very easily modified to work without jQuery, however.
var searchText = function(parentNode, regex, callback, skipElements) {
skipElements = skipElements || ['script', 'style'];
var node = parentNode.firstChild;
do {
if (node.nodeType == 1) {
var tag = node.tagName.toLowerCase();
if (~$.inArray(tag, skipElements)) {
continue;
}
searchText.call(this, node, regex, callback);
} else if (node.nodeType == 3) {
while (true) {
// Does this node have a match? If not, break and return.
if (!regex.test(node.data)) {
break;
}
node.data.replace(regex, function(match) {
var args = $.makeArray(arguments),
offset = args[args.length - 2],
newTextNode = node.splitText(offset);
callback.apply(window, [node].concat(args));
newTextNode.data = newTextNode.data.substr(match.length);
node = newTextNode;
});
}
}
} while (node = node.nextSibling);
};
searchText($('body')[0], /:\)/, function(node, match) {
var img = $('<img />')[0];
img.src = 'http://www.gravatar.com/avatar/80200e1488ab252197b7f0f51ae230ef?s=32&d=identicon&r=PG';
img.alt = match;
node.parentNode.insertBefore(img, node.nextSibling);
}, ['code']);
jsFiddle.
I wrote this function recently, it should do what you hope to achieve.

Related

Replace text in the middle of a TextNode with an element

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.
To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});
Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

How to remove all jQuery validation engine classes from element?

I have a plugin that is cloning an input that may or may not have the jQuery validation engine bound to it.
so, it's classes may contain e.g. validate[required,custom[number],min[0.00],max[99999.99]] or any combination of the jQuery validation engine validators.
The only for sure thing is that the class begins with validate[ and ends with ], but to make it more complicated as in the example above, there can be nested sets of [].
So, my question is, how can I remove these classes (without knowing the full class) using jQuery?
Here is my implementation, It's not using regex, but meh, who said it had too?
//'first validate[ required, custom[number], min[0.00], max[99999.99] ] other another';
var testString = $('#test')[0].className;
function removeValidateClasses(classNames) {
var startPosition = classNames.indexOf("validate["),
openCount = 0,
closeCount = 0,
endPosition = 0;
if (startPosition === -1) {
return;
}
var stringStart = classNames.substring(0, startPosition),
remainingString = classNames.substring(startPosition),
remainingSplit = remainingString.split('');
for (var i = 0; i < remainingString.length; i++) {
endPosition++;
if (remainingString[i] === '[') {
openCount++;
} else if (remainingString[i] === ']') {
closeCount++;
if (openCount === closeCount) {
break;
}
}
}
//concat the strings, without the validation part
//replace any multi-spaces with a single space
//trim any start and end spaces
return (stringStart + remainingString.substring(endPosition))
.replace(/\s\s+/g, ' ')
.replace(/^\s+|\s+$/g, '');
}
$('#test')[0].className = removeValidateClasses(testString);
It might actually be simpler to do that without JQuery. Using the className attribute, you can then get the list of classes using split(), and check whether the class contains "validate[".
var classes = $('#test')[0].className.split(' ');
var newClasses = "";
for(var i = 0; i < classes.length; i++){
if(classes[i].indexOf('validate[') === -1){
newClasses += classes[i];
}
}
$('#test')[0].className = newClasses
I think this solution is even more simple. You just have to replace field_id with the id of that element and if the element has classes like some_class different_class validate[...] it will only remove the class with validate, leaving the others behind.
var that ='#"+field_id+"';
var classes = $(that).attr('class').split(' ');
$.each(classes, function(index, thisClass){
if (thisClass.indexOf('validate') !== -1) {
$(that).removeClass(classes[index])
}
});

How to stop DOM searching loop while the first match is found?

I've modify a dom search/replace script, to replace multiple keywords matching on it, by a link, in a document.
It was working great without any <div> or <p>, but with a complex structure the keywords of each node are replaced...
This an example
As you could see, the same keyword is not linked several times in an element, but while there is some other elements the keywords are linked...
This is the script
(function(){
// don't replace text within these tags
var skipTags = { 'a': 1, 'style': 1, 'script': 1, 'iframe': 1, 'meta':1, 'title':1, 'img':1, 'h1':1 };
// find text nodes to apply replFn to
function findKW( el, term, replFn )
{
var child, tag;
if(!found)var found=false;
for (var i = 0;i<=el.childNodes.length - 1 && !found; i++)
{
child = el.childNodes[i];
if (child.nodeType == 1)
{ // ELEMENT_NODE
tag = child.nodeName.toLowerCase();
if (!(tag in skipTags))
{
findKW(child, term, replFn);
}
}
else if (child.nodeType == 3)
{ // TEXT_NODE
found=replaceKW(child, term, replFn);
}
}
};
// replace terms in text according to replFn
function replaceKW( text, term, replFn)
{
var match,
matches = [],found=false;
while (match = term.exec(text.data))
{
matches.push(match);
}
for (var i = 0;i<=matches.length - 1 && !found; i++)
{
match = matches[i];
// cut out the text node to replace
text.splitText(match.index);
text.nextSibling.splitText(match[1].length);
text.parentNode.replaceChild(replFn(match[1]), text.nextSibling);
if(matches[i])found=true;// To stop the loop
}
return found;
};
// Keywords to replace by a link
var terms=Array('keywords','words');
for(kw in terms)
{
findKW(
document.body,
new RegExp('\\b(' + terms[kw] + ')\\b', 'gi'),
function (match)
{
var link = document.createElement('a');
link.href = 'http://www.okisurf.com/#q=' + terms[kw];
link.id = '1';
link.target = '_blank';
link.innerHTML = match;
return link;
}
);
}
}());​
Please anyone could help me to stop the loop and replace only the first keyword matching ? (I'm going crazy with those nodes and the var found that I can't send like global while the threads are working in loop, for the findKW() function...) And without any library (no jQuery or other)
You can return true when you replaced the the word, and test for it to stop the recursion:
if (child.nodeType == 1) { // ELEMENT_NODE
tag = child.nodeName.toLowerCase();
if (!(tag in skipTags)) {
// If `findKW` returns `true`, a replacement as taken place further down
// the hierarchy and we can stop iterating over the other nodes.
if (findKW(child, term, replFn)) {
return true;
}
}
} else if (child.nodeType == 3) { // TEXT_NODE
if (replaceKW(child, term, replFn)) {
return true;
}
}
And remove any reference to found in this function, it is not needed.
DEMO (I also updated the replaceKW function, you don't need to collect all matches if you are only using the first one anyway).
Use break statement to exist from a loop block.
Example :
for(;;) {
if(condition)
break;
}
In your case you should add this on following position
else if (child.nodeType == 3)
{ // TEXT_NODE
found=replaceKW(child, term, replFn);
if(found)
break; // or alternately use return;
}

Javascript to replace text in Gmail message (Chrome Extension)

I'm trying to write a Chrome Extension which will replace a string of text in Gmail messages.
If I open a Gmail message and click the print button to see it in print view and run the extension it works well and replaces all instances of the string.
However, if I run the extension in the normal Gmail discussion view it causes the page to refresh and it does not actually replace any of the text.
Here is the code I'm using. Any ideas what I'm doing wrong?
function doIt(){
findAndReplace("Father","Mother");
}
var haystackText = "";
function findAndReplace(needle, replacement) {
if (haystackText.length == 0) {
haystackText = document.body.innerHTML;
}
var match = new RegExp(needle, "ig");
var replaced = "";
if (replacement.length > 0) {
replaced = haystackText.replace(match, replacement);
document.body.innerHTML = replaced;
}
}
doIt();
Update:
Here is the updated code I'm using after Felix Kling's help. Similar issue to the one above, it works in print preview but not in the regular Gmail view. It does not however reload the page.
function findAndReplace(root, needle, replacement) {
var children = root.childNodes;
var pattern = new RegExp(needle, 'ig');
var node;
for(var i = 0, l = children.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
function doIt(){
findAndReplace(document,"Father","Mother");
}
doIt();
It's because you are changing the whole document. You are basically destroying and creating every element of the page.
document.body.innerHTML = replaced;
inserts every element of the page anew, so all JavaScript code is executed again etc.
You should only change those text nodes that contain the text you are looking for. That means you have to recursively traverse the DOM (not the whole, start at the message list), test the node type and value and perform the necessary actions.
Update: Example
function findAndReplace(root, needle, replacement) {
var children = root.childNodes,
pattern = new RegExp(needle, 'ig');
node;
for(var i = 0, l = childNodes.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
This is a simple example. I actually don't know how well it performs, but I think you have no other choice. Further things to consider are e.g. escaping special regular expressions characters in the search string.
To learn more about JavaScript and DOM, I recommend to read:
MDN - JavaScript Guide
MDN - DOM reference (espcially the introduction)

How to replace word with javascript?

In some forum I join, they just replace some link with something like spam or deleted. Example: www.rapidshare.com/download/123 will automatically turn to www.spam.com/download/123 OR word MONEY will change to BOSS.
Its really annoyed me because I have to rename back manually if I want to download. Is there any Javascript that can solve this that will replace back www.spam.com to www.rapidshare.com? I mean in client side.
Thanks
If these URLs are in href attributes...
var replaceHrefAttributes = function (element, search, replace) {
var nodes = element.getElementsByTagName('a');
for (var i = 0, length = nodes.length; i < length; i++) {
var node = nodes[i];
if (node.href == undefined) {
continue;
}
node.href = node.href.replace(new RegExp(search, 'g'), replace);
}
}
Your usage may be something like...
replaceHrefAttributes(document.body, 'www.spam.com', 'www.rapidshare.com');
If these URLs are inline text...
You could iterate over all text nodes, using replace() to replace any string with another.
Here is a general purpose recursive function I've written to do this...
var replaceText = function replaceText(element, search, replace) {
var nodes = element.childNodes;
for (var i = 0, length = nodes.length; i < length; i++) {
var node = nodes[i];
if (node.childNodes.length) {
replaceText(node, search, replace);
continue;
}
if (node.nodeType != 3) {
continue;
}
node.data = node.data.replace(new RegExp(search, 'g'), replace);
}
}
Your usage may be something like...
replaceText(document.body, 'www.spam.com', 'www.rapidshare.com');
If you are curious as to how the code works, here is a brief explanation...
Get all child nodes of the element. This will get text nodes and elements.
Iterate over all of them.
If this node has child nodes of its own, call the function again with element as the element in the loop. continue because we can't modify this as it is not a text node.
If this node's nodeType property is not 3 (i.e. a text node), then continue as again we can't modify any text.
We are confident this is a text node now, so replace the text.
You could make this function more flexible by passing search straight to it, allowing it to search for text using a string or a regular expression.
var a = document.getElementsByTagName('a');
for (var i = 0, len = a.length ; i < len ; i += 1) {
a.firstChild.nodeValue = a.href;
}

Categories

Resources