Highlighting text in document (JavaScript) Efficiently

Highlighting text in document (JavaScript) Efficiently - javascript

How can I (efficiently - not slowing the computer [cpu]) highlight a specific part of a page?
Lets say that my page is as so:
<html>
<head>
</head>
<body>
"My generic words would be selected here" !.
<script>
//highlight code here
var textToHighlight = 'selected here" !';
//what sould I write here?
</script>
</body>
</html>
My idea is to "clone" all the body into a variable and find via indexOf the specified text, change(insert a span with a background-color) the "cloned" string and replace the "real" body with the "cloned" one.
I just think that it isn't efficient.
Do you have any other ideas? (be creative :) )

I've adapted the following from my answers to several similar questions on SO (example). It's designed to be reusable and has proved to be so. It traverses the DOM within a container node you specify, searching each text node for the specified text and using DOM methods to split the text node and surround the relevant chunk of text in a styled <span> element.
Demo: http://jsfiddle.net/HqjZa/
Code:
// Reusable generic function
function surroundInElement(el, regex, surrounderCreateFunc) {
// script and style elements are left alone
if (!/^(script|style)$/.test(el.tagName)) {
var child = el.lastChild;
while (child) {
if (child.nodeType == 1) {
surroundInElement(child, regex, surrounderCreateFunc);
} else if (child.nodeType == 3) {
surroundMatchingText(child, regex, surrounderCreateFunc);
}
child = child.previousSibling;
}
}
}
// Reusable generic function
function surroundMatchingText(textNode, regex, surrounderCreateFunc) {
var parent = textNode.parentNode;
var result, surroundingNode, matchedTextNode, matchLength, matchedText;
while ( textNode && (result = regex.exec(textNode.data)) ) {
matchedTextNode = textNode.splitText(result.index);
matchedText = result[0];
matchLength = matchedText.length;
textNode = (matchedTextNode.length > matchLength) ?
matchedTextNode.splitText(matchLength) : null;
surroundingNode = surrounderCreateFunc(matchedTextNode.cloneNode(true));
parent.insertBefore(surroundingNode, matchedTextNode);
parent.removeChild(matchedTextNode);
}
}
// This function does the surrounding for every matched piece of text
// and can be customized to do what you like
function createSpan(matchedTextNode) {
var el = document.createElement("span");
el.style.backgroundColor = "yellow";
el.appendChild(matchedTextNode);
return el;
}
// The main function
function wrapText(container, text) {
surroundInElement(container, new RegExp(text, "g"), createSpan);
}
wrapText(document.body, "selected here");

<html>
<head>
</head>
<body>
<p id="myText">"My generic words would be selected here" !.</p>
<script>
//highlight code here
var textToHighlight = 'selected here" !';
var text = document.getElementById("myText").innerHTML
document.getElementById("myText").innerHTML = text.replace(textToHighlight, '<span style="color:red">'+textToHighlight+'</span>');
//what sould I write here?
</script>
</body>
</html>

Use this in combination with this and you should be pretty ok. (It is almost better than trying to implement selection / selection-highlighting logic yourself.)

Related

"False" text node causing trouble

I'm working on a DOM traversal type of script and I'm almost finished with it. However, there is one problem that I've encountered and for the life of me, I can't figure out what to do to fix it. Pardon my ineptitude, as I'm brand new to JS/JQuery and I'm still learning the ropes.
Basically, I'm using Javascript/JQuery to create an "outline", representing the structure of an HTML page, and appending the "outline" to the bottom of the webpage. For example, if the HTML is this...
<html>
<head>
</head>
<body>
<h1>Hello World</h1>
<script src=”http://code.jquery.com/jquery-2.1.0.min.js” type=”text/javascript”>
</script>
<script src=”outline.js” type=”text/javascript”></script>
</body>
</html>
Then the output should be an unordered list like this:
html
head
body
h1
text(Hello World)
script src(”http://code.jquery.com/jquery-2.1.0.min.js”) type(”text/javascript”)
script src(”outline.js”) type(”text/javascript”)
Here's what I've got so far:
var items=[];
$(document).ready(function(){
$("<ul id = 'list'></ul>").appendTo("body");
traverse(document, function (node) {
if(node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
}
else {
var x = "text("+node.nodeValue+")";
if(node.nodeValue == null) {
items.push("<li> document");
}
else if(/[a-z0-9]/i.test(node.nodeValue) && node.nodeValue != null) {
items.push("<ul><li>"+ x +"</ul>");
}
else {
items.push("</ul>");
}
}
});
$('#list').append(items.join(''));
});
function traverse(node, func) {
func(node);
node = node.firstChild;
while (node) {
traverse(node, func);
node = node.nextSibling;
}
}
It works almost perfectly, except it seems to read a carriage return as a text node. For example, if there's
<head><title>
it reads that properly, adding head as an unordered list element, and then creating a new "unordered list" for title, which is nested inside the header. HOWEVER, if it's
<head>
<title>
It makes the new unordered list and its element, "head", but then jumps to the else statement that does items.push(</ul>) . How do I get it to ignore the carriage return? I tried testing to see if the nodeValue was equal to the carriage return, \r, but that didn't seem to do the trick.

I'm having a bit of a hard time understanding exactly which text nodes you want to skip. If you just want to skip a text node that is only whitespace, you can do that like this:
var onlyWhitespaceRegex = /^\s*$/;
traverse(document, function (node) {
if (node.nodeType === 3 && onlyWhitespaceRegex.test(node.nodeValue) {
// skip text nodes that contain only whitespace
return;
}
else if (node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
} else ...
Or, maybe you just want to trim any multiple leading or trailing whitespaces off a text node before displaying it since it may not display in HTML.
var trimWhitespaceRegex = /^\s+|\s+$/g;
traverse(document, function (node) {
if(node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
} else {
var text = node.nodeValue;
if (node.nodeType === 3) {
text = text.replace(trimWhitespaceRegex, " ");
}
var x = "text("+text+")";
if(node.nodeValue == null) {
items.push("<li> document");
} ....
A further description of exactly what you're trying to achieve in the output for various forms of different text nodes would help us better understand your requirements.

Function does not work on second occurence?

I am trying to change the color of multiple texts to a certain color by using this code:
var search = "bar";
$("div:contains('"+search+"')").each(function () {
var regex = new RegExp(search,'gi');
$(this).html($(this).text().replace(regex, "<span class='red'>"+search+"</span>"));
});
However, the code does not work a second time, and I am not sure why--it only changes the newest occurrence of the code.
Here is a JSFiddle using it twice where it is only changing the 2nd occurrence: http://jsfiddle.net/PELkt/189/
Could someone explain why it does not work on the 2nd occurrence?

Could someone explain why it does not work on the 2nd occurrence?
By calling .text() you are removing all the HTML markup, including the <span>s you just inserted.
This is the markup after the first replacement:
<div id="foo">this is a new <span class='red'>bar</span></div>
$(this).text() will return the string "this is a new bar", in which replace "new" with a <span> ("this is a <span class='red'>new</span> bar") and set it as new content of the element.
In order to do this right, you'd have to iterate over all text node descendants of the element instead, and process them individually. See Highlight a word with jQuery for an implementation.

It was easy to fix your jsfiddle. Simply replace both .text() with .html() & you'll see that it highlights new & both bars in red.
jQuery's .text() method will strip all markup each time that it's used, but what you want to do is use .html() to simply change the markup which is already in the DOM.
$(document).ready(function () {
var search = "bar";
$("div:contains('"+search+"')").each(function () {
var regex = new RegExp(search,'gi');
$(this).html($(this).html().replace(regex, "<span class='red'>"+search+"</span>"));
});
search = "new";
$("div:contains('"+search+"')").each(function () {
var regex = new RegExp(search,'gi');
$(this).html($(this).html().replace(regex, "<span class='red'>"+search+"</span>"));
});
});

Here is another way of doing it that will allow you to continue using text if you wish
function formatWord(content, term, className){
return content.replace(new RegExp(term, 'g'), '<span class="'+className+'">'+term+'</span>');
}
$(document).ready(function () {
var content = $('#foo').text();
var change1 = formatWord(content, 'bar', 'red'),
change2 = formatWord(change1, 'foo', 'red');
alert(change2);
$('body').html(change2);
});
http://codepen.io/nicholasabrams/pen/wGgzbR?editors=1010

Use $(this).html() instead of $(this).text(), as $.fn.text() strips off all the html tags, so are the <span class="red">foo</span> stripped off to foo.
But let's say that you apply same highlight multiple times for foo, then I would suggest that you should create a class similar to this to do highlighting
var Highlighter = function ($el, initialArray) {
this._array = initialArray || [];
this.$el = $el;
this.highlight = function (word) {
if (this.array.indexOf(word) < 0) {
this.array.push(word);
}
highlightArray();
}
function highlightArray() {
var search;
// first remove all highlighting
this.$el.find("span[data-highlight]").each(function () {
var html = this.innerHTML;
this.outerHTML = html;
});
// highlight all here
for (var i = 0; i < this._array.length; i += 1) {
search = this._array[i];
this.$el.find("div:contains('"+search+"')").each(function () {
var regex = new RegExp(search,'gi');
$(this).html($(this).html().replace(regex, "<span data-highlight='"+search+"' class='red'>"+search+"</span>"));
});
}
}
}
var highlighter = new HighLighter();
highlighter.highlight("foo");

DOM manipulation while preserving previous handlers

The problem:
Let's say I have a div with a text like this
<div id=”bla”>One Two Three Four Five</div>
I want to be able programmatically take any substring and wrap it into a <div> with some handler attached to it (say onlick). I want to be able to do it multiple times to different part of the text so in the end my div can end up looking like this:
<div id=”bla”>One <div id=”bla2”>Two</div> Three <div id=”bla4”>Four</div> Five</div>
The problem is how to do it?
Some thoughts:
Potentially, if I want to wrap the string Two into div, then if I just take whole div content using html(), do substring before and after Two, then do .empty() on the div and .append(beforeSubstring, <Two wrapper with some handler>, afterString) – it looks good, but it will put the beforeSubstring and afterSubstring into “” and remove all previous handlers. But I want to keep previous handlers and I don’t need “” since it messes things up for me.
Any thoughts? :)

You first need to cache your element's content like var cont = $("#bla").text();
than always use that cont before inserting your wrappers (using RegExp I suppose...) and placing the result back into that element like $("#bla").html(regexModifiedCont);
Example:
var $contentEl = $("#bla");
var content = $contentEl.text(); // Cache the original content!
$("#query").on("input", function(){
var reg = new RegExp("("+ $.trim(this.value) +")", "ig");
$contentEl.html( content.replace(reg, "<span class='clickable'>$1</span>") );
});
$("body").on("click", ".clickable", function(){
alert( $(this).text() );
});
.clickable{
background:gold;
cursor:pointer;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
Write a text to take <input id=query type=text>
<div id=bla>One Two Three Four Five</div>

Perhaps you're looking for something like this? =)
function wrapTag(string, tag) {
return '<'+tag+'>' + string + '</'+tag+'>';
}
// http://stackoverflow.com/questions/5379120/get-the-highlighted-selected-text
function getSelectionText() {
var text = "";
if (window.getSelection) {
text = window.getSelection().toString();
} else if (document.selection && document.selection.type != "Control") {
text = document.selection.createRange().text;
}
return text;
}
function highlightSelection(tag) {
var highlighted = getSelectionText();
var newContent = $('#paragraph').html()
.replace(/\s+/g,' ').trim()
.replace(highlighted, wrapTag(highlighted, tag));
$('#paragraph').html(newContent);
}
Working example:
http://plnkr.co/edit/VCpGScPR9TEjuY3mtP2j?p=preview

Parsing through DOM get all children and values

Container is a div i've added some basic HTML to.
The debug_log function is printing the following:
I'm in a span!
I'm in a div!
I'm in a
p
What happened to the rest of the text in the p tag ("aragraph tag!!"). I think I don't understand how exactly to walk through the document tree. I need a function that will parse the entire document tree and return all of the elements and their values. The code below is sort of a first crack at just getting all of the values displayed.
container.innerHTML = '<span>I\'m in a span! </span><div> I\'m in a div! </div><p>I\'m in a <span>p</span>aragraph tag!!</p>';
DEMO.parse_dom(container);
DEMO.parse_dom = function(ele)
{
var child_arr = ele.childNodes;
for(var i = 0; i < child_arr.length; i++)
{
debug_log(child_arr[i].firstChild.nodeValue);
DEMO.parse_dom(child_arr[i]);
}
}

Generally when traversing the DOM, you want to specify a start point. From there, check if the start point has childNodes. If it does, loop through them and recurse the function if they too have childNodes.
Here's some code that outputs to the console using the DOM form of these nodes (I used the document/HTML element as a start point). You'll need to run an if against window.console if you're allowing non-developers to load this page/code and using console:
recurseDomChildren(document.documentElement, true);
function recurseDomChildren(start, output)
{
var nodes;
if(start.childNodes)
{
nodes = start.childNodes;
loopNodeChildren(nodes, output);
}
}
function loopNodeChildren(nodes, output)
{
var node;
for(var i=0;i<nodes.length;i++)
{
node = nodes[i];
if(output)
{
outputNode(node);
}
if(node.childNodes)
{
recurseDomChildren(node, output);
}
}
}
function outputNode(node)
{
var whitespace = /^\s+$/g;
if(node.nodeType === 1)
{
console.log("element: " + node.tagName);
}else if(node.nodeType === 3)
{
//clear whitespace text nodes
node.data = node.data.replace(whitespace, "");
if(node.data)
{
console.log("text: " + node.data);
}
}
}
Example: http://jsfiddle.net/ee5X6/

In
<p>I\'m in a <span>p</span>aragraph tag!!</p>
you request the first child, which is the text node containing "I\'m in a".
The text "aragraph tag!!" is the third child, which is not logged.
Curiously, the last line containing "p" should never occur, because the span element is not a direct child of container.

I'm not sure it is what you need or if it is possible in your environment but jQuery can accomplish something similar quite easily. Here is a quick jQuery example that might work.
<html>
<head>
<script src="INCLUDE JQUERY HERE">
</script>
</head>
<body>
<span>
<span>I\'m in a span! </span><div> I\'m in a div! </div><p>I\'m in a <span>p</span>aragraph tag!!</p>
</span>
<script>
function traverse(elem){
$(elem).children().each(function(i,e){
console.log($(e).text());
traverse($(e));
});
}
traverse($("body").children().first());
</script>
</body>
<html>
Which gives the following console output:
I\'m in a span!
I\'m in a div!
I\'m in a paragraph tag!!
p

How to select a part of string?

How to select a part of string?
My code (or example):
<div>some text</div>
$(function(){
$('div').each(function(){
$(this).text($(this).html().replace(/text/, '<span style="color: none">$1<\/span>'));
});
});
I tried this method, but in this case is selected all context too:
$(function(){
$('div:contains("text")').css('color','red');
});
I try to get like this:
<div><span style="color: red">text</span></div>

$('div').each(function () {
$(this).html(function (i, v) {
return v.replace(/foo/g, '<span style="color: red">$&<\/span>');
});
});

What are you actually trying to do? What you're doing at the moment is taking the HTML of each matching DIV, wrapping a span around the word "text" if it appears (literally the word "text") and then setting that as the text of the element (and so you'll see the HTML markup on the page).
If you really want to do something with the actual word "text", you probably meant to use html rather than text in your first function call:
$('div').each(function(){
$(this).html($(this).html().replace(/text/, '<span style="color: none">$1<\/span>'));
// ^-- here
}
But if you're trying to wrap a span around the text of the div, you can use wrap to do that:
$('div').wrap('<span style="color: none"/>');
Like this: http://jsbin.com/ucopo3 (in that example, I've used "color: blue" rather than "color: none", but you get the idea).

$(function(){
$('div:contains("text")').each(function() {
$(this).html($(this).html().replace(/(text)/g, '<span style="color:red;">\$1</span>'));
});
});
I've updated your fiddle: http://jsfiddle.net/nMzTw/15/

The general practice of interacting with the DOM as strings of HTML using innerHTML has many serious drawbacks:
Event handlers are removed or replaced
Opens the possibility of script inject attacks
Doesn't work in XHTML
It also encourages lazy thinking. In this particular instance, you're matching against the string "text" within the HTML with the assumption that any occurrence of the string must be within a text node. This is patently not a valid assumption: the string could appear in a title or alt attribute, for example.
Use DOM methods instead. This will get round all the problems. The following will use only DOM methods to surround every match for regex in every text node that is a descendant of a <div> element:
$(function() {
var regex = /text/;
function getTextNodes(node) {
if (node.nodeType == 3) {
return [node];
} else {
var textNodes = [];
for (var n = node.firstChild; n; n = n.nextSibling) {
textNodes = textNodes.concat(getTextNodes(n));
}
return textNodes;
}
}
$('div').each(function() {
$.each(getTextNodes(this), function() {
var textNode = this, parent = this.parentNode;
var result, span, matchedTextNode, matchLength;
while ( textNode && (result = regex.exec(textNode.data)) ) {
matchedTextNode = textNode.splitText(result.index);
matchLength = result[0].length;
textNode = (matchedTextNode.length > matchLength) ?
matchedTextNode.splitText(matchLength) : null;
span = document.createElement("span");
span.style.color = "red";
span.appendChild(matchedTextNode);
parent.insertBefore(span, textNode);
}
});
});
});

Develop Reference

JavaScript is the programming language of the Web.

Highlighting text in document (JavaScript) Efficiently - javascript

Use this in combination with this and you should be pretty ok. (It is almost better than trying to implement selection / selection-highlighting logic yourself.)

Related

"False" text node causing trouble

Function does not work on second occurence?

DOM manipulation while preserving previous handlers

Parsing through DOM get all children and values

How to select a part of string?

Categories

Resources