Javascript to replace text in Gmail message (Chrome Extension) - javascript

I'm trying to write a Chrome Extension which will replace a string of text in Gmail messages.
If I open a Gmail message and click the print button to see it in print view and run the extension it works well and replaces all instances of the string.
However, if I run the extension in the normal Gmail discussion view it causes the page to refresh and it does not actually replace any of the text.
Here is the code I'm using. Any ideas what I'm doing wrong?
function doIt(){
findAndReplace("Father","Mother");
}
var haystackText = "";
function findAndReplace(needle, replacement) {
if (haystackText.length == 0) {
haystackText = document.body.innerHTML;
}
var match = new RegExp(needle, "ig");
var replaced = "";
if (replacement.length > 0) {
replaced = haystackText.replace(match, replacement);
document.body.innerHTML = replaced;
}
}
doIt();
Update:
Here is the updated code I'm using after Felix Kling's help. Similar issue to the one above, it works in print preview but not in the regular Gmail view. It does not however reload the page.
function findAndReplace(root, needle, replacement) {
var children = root.childNodes;
var pattern = new RegExp(needle, 'ig');
var node;
for(var i = 0, l = children.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
function doIt(){
findAndReplace(document,"Father","Mother");
}
doIt();

It's because you are changing the whole document. You are basically destroying and creating every element of the page.
document.body.innerHTML = replaced;
inserts every element of the page anew, so all JavaScript code is executed again etc.
You should only change those text nodes that contain the text you are looking for. That means you have to recursively traverse the DOM (not the whole, start at the message list), test the node type and value and perform the necessary actions.
Update: Example
function findAndReplace(root, needle, replacement) {
var children = root.childNodes,
pattern = new RegExp(needle, 'ig');
node;
for(var i = 0, l = childNodes.length; i < l; i++) {
node = children[i];
if(node.nodeType === 3) { // we have a text node
node.nodeValue = node.nodeValue.replace(pattern, replacement);
} else if(node.nodeType === 1) { // Element node
findAndReplace(node, needle, replacement);
}
}
}
This is a simple example. I actually don't know how well it performs, but I think you have no other choice. Further things to consider are e.g. escaping special regular expressions characters in the search string.
To learn more about JavaScript and DOM, I recommend to read:
MDN - JavaScript Guide
MDN - DOM reference (espcially the introduction)

Related

Replace text in the middle of a TextNode with an element

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.
To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});
Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

how to replace all matching plain text strings in string using javascript (but not tags or attributes)?

imagine this html on a page
<div id="hpl_content_wrap">
<p class="foobar">this is one word and then another word comes in foobar and then more words and then foobar again.</p>
<p>this is a link with foobar in an attribute but only the foobar inside of the link should be replaced.</p>
</div>
using javascript, how to change all 'foobar' words to 'herpderp' without changing any inside of html tags?
ie. only plain text should be changed.
so the successful html changed will be
<div id="hpl_content_wrap">
<p class="foobar">this is one word and then another word comes in herpderp and then more words and then herpderp again.</p>
<p>this is a link with herpderp in an attribute but only the herpderp inside of the link should be replaced. </p>
</div>
Here is what you need to do...
Get a reference to a bunch of elements.
Recursively walk the children, replacing text in text nodes only.
Sorry for the delay, I was sidetracked before I could add the code.
var replaceText = function me(parentNode, find, replace) {
var children = parentNode.childNodes;
for (var i = 0, length = children.length; i < length; i++) {
if (children[i].nodeType == 1) {
me(children[i], find, replace);
} else if (children[i].nodeType == 3) {
children[i].data = children[i].data.replace(find, replace);
}
}
return parentNode;
}
replaceText(document.body, /foobar/g, "herpderp");​​​
jsFiddle.
It's a simple matter of:
identifying all text nodes in the DOM tree,
then replacing all foobar strings in them.
Here's the full code:
// from: https://stackoverflow.com/questions/298750/how-do-i-select-text-nodes-with-jquery
var getTextNodesIn = function (el) {
return $(el).find(":not(iframe)").andSelf().contents().filter(function() {
return this.nodeType == 3;
});
};
var replaceAllText = function (pattern, replacement, root) {
var nodes = getTextNodesIn(root || $('body'))
var re = new RegExp(pattern, 'g')
nodes.each(function (i, e) {
if (e.textContent && e.textContent.indexOf(pattern) != -1) {
e.textContent = e.textContent.replace(re, replacement);
}
});
};
// replace all text nodes in document's body
replaceAllText('foobar', 'herpderp');
// replace all text nodes under element with ID 'someRootElement'
replaceAllText('foobar', 'herpderp', $('#someRootElement'));
Note that I do a precheck on foobar to avoid processing crazy long strings with a regexp. May or may not be a good idea.
If you do not want to use jQuery, but only pure JavaScript, follow the link in the code snippet ( How do I select text nodes with jQuery? ) where you'll also find a JS only version to fetch nodes. You'd then simply iterate over the returned elements in a similar fashion.

Creating a regex that ignores everything inside <code> blocks

I'm adding emoticons to user input with:
function emoticons(html){
for(var emoticon in emotes){
for(var i = 0; i < emotes[emoticon].length; i++){
// Escape bad characters like )
var r = RegExp.escape(emotes[emoticon][i]);
// Set the regex up to replace all matches
r_escaped = new RegExp(r, "g");
// Replace the emote with the image
html = html.replace(r_escaped,"<img src=\""+icon_folder+"/face-"+emoticon+".png\" class=\"emoticonimg\" />");
}
}
return html;
}
The problem is sometimes the user input is in <code>xxx</code> blocks. Is there a way to get the emoticons function to ignore everything inside the code blocks if they exists. As they won't always exist?
Thanks
In order to do this easily, I'd work only with text nodes (not serialised HTML) and skip code elements.
You tagged it jquery, so there is some jQuery convenience code to ease cross browser issues with utility functions. It is very easily modified to work without jQuery, however.
var searchText = function(parentNode, regex, callback, skipElements) {
skipElements = skipElements || ['script', 'style'];
var node = parentNode.firstChild;
do {
if (node.nodeType == 1) {
var tag = node.tagName.toLowerCase();
if (~$.inArray(tag, skipElements)) {
continue;
}
searchText.call(this, node, regex, callback);
} else if (node.nodeType == 3) {
while (true) {
// Does this node have a match? If not, break and return.
if (!regex.test(node.data)) {
break;
}
node.data.replace(regex, function(match) {
var args = $.makeArray(arguments),
offset = args[args.length - 2],
newTextNode = node.splitText(offset);
callback.apply(window, [node].concat(args));
newTextNode.data = newTextNode.data.substr(match.length);
node = newTextNode;
});
}
}
} while (node = node.nextSibling);
};
searchText($('body')[0], /:\)/, function(node, match) {
var img = $('<img />')[0];
img.src = 'http://www.gravatar.com/avatar/80200e1488ab252197b7f0f51ae230ef?s=32&d=identicon&r=PG';
img.alt = match;
node.parentNode.insertBefore(img, node.nextSibling);
}, ['code']);
jsFiddle.
I wrote this function recently, it should do what you hope to achieve.

How to replace word with javascript?

In some forum I join, they just replace some link with something like spam or deleted. Example: www.rapidshare.com/download/123 will automatically turn to www.spam.com/download/123 OR word MONEY will change to BOSS.
Its really annoyed me because I have to rename back manually if I want to download. Is there any Javascript that can solve this that will replace back www.spam.com to www.rapidshare.com? I mean in client side.
Thanks
If these URLs are in href attributes...
var replaceHrefAttributes = function (element, search, replace) {
var nodes = element.getElementsByTagName('a');
for (var i = 0, length = nodes.length; i < length; i++) {
var node = nodes[i];
if (node.href == undefined) {
continue;
}
node.href = node.href.replace(new RegExp(search, 'g'), replace);
}
}
Your usage may be something like...
replaceHrefAttributes(document.body, 'www.spam.com', 'www.rapidshare.com');
If these URLs are inline text...
You could iterate over all text nodes, using replace() to replace any string with another.
Here is a general purpose recursive function I've written to do this...
var replaceText = function replaceText(element, search, replace) {
var nodes = element.childNodes;
for (var i = 0, length = nodes.length; i < length; i++) {
var node = nodes[i];
if (node.childNodes.length) {
replaceText(node, search, replace);
continue;
}
if (node.nodeType != 3) {
continue;
}
node.data = node.data.replace(new RegExp(search, 'g'), replace);
}
}
Your usage may be something like...
replaceText(document.body, 'www.spam.com', 'www.rapidshare.com');
If you are curious as to how the code works, here is a brief explanation...
Get all child nodes of the element. This will get text nodes and elements.
Iterate over all of them.
If this node has child nodes of its own, call the function again with element as the element in the loop. continue because we can't modify this as it is not a text node.
If this node's nodeType property is not 3 (i.e. a text node), then continue as again we can't modify any text.
We are confident this is a text node now, so replace the text.
You could make this function more flexible by passing search straight to it, allowing it to search for text using a string or a regular expression.
var a = document.getElementsByTagName('a');
for (var i = 0, len = a.length ; i < len ; i += 1) {
a.firstChild.nodeValue = a.href;
}

Chrome browser extension that detects hashtags in page

As personal project I would like to build a Chrome extension that will find all hashtags on a page. I don't much about JS or jquery so I wonder how I should approach this?
EDIT:
The Chrome Extension injects javascript after the page has loaded, but I need to scan the whole document for a hashtag. It is looking through the whole document that I am not sure how to do.
Thanks!
If you mean simply anchor tags with a href that has a # in it then:
var aTags = document.getElementsByTagName("a");
for(var index = 0; index < aTags.length; index++){
if(aTags[index].href.indexOf("#") != -1){
alert("found one");
}
}
Or if you want something more general, one way to return the entire webpage is simply:
document.body.innerHTML //A string containing all the code/text inside the body of the webpage.
And then you can do some indexOf or a regex search/replace depending on what you want to do specifically.
But if you know that the hashtag you are looking for is always in some container like a anchor or even just a div with particular class then I would go with that instead of working with the entire page. Here is a list of useful methods to parse up a webpage:
document.getElementsByTagName("a"); //already mentioned
document.getElementsById("id");
document.getElementsByName("name");
//and a custom function to get elements by class name(I did not write this)
function getElementsByClass(searchClass, domNode, tagName)
{
if (domNode == null) domNode = document;
if (tagName == null) tagName = '*';
var el = new Array();
var tags = domNode.getElementsByTagName(tagName);
var tcl = " "+searchClass+" ";
for(i=0,j=0; i<tags.length; i++)
{
var test = " " + tags[i].className + " ";
if (test.indexOf(tcl) != -1)
{
el[j++] = tags[i];
}
}
return el;
}

Categories

Resources