I am trying to replace text on a webpage with links. When I try this it just replaces the text with the tag and not a link. For example this code will replace "river" with:
asdf
This is what I have so far:
function handleText(textNode)
{
var v = textNode.nodeValue;
v = v.replace(/\briver\b/g, 'asdf');
textNode.nodeValue = v;
}
If all you wanted to do was change the text to other plain text, then you could change the contents of the text nodes directly. However, you are wanting to add an <a> element. For each <a> element you want to add, you are effectively wanting to add a child element. Text nodes can not have children. Thus, to do this you have to actually replace the text node with a more complicated structure. In doing so, you will want to make as little impact on the DOM as possible, in order to not disturb other scripts which rely on the current structure of the DOM. The simplest way to make little impact is to replace the text node with a <span> which contains the new text nodes (the text will split around the new <a>) and any new <a> elements.
The code below should do what you desire. It replaces the textNode with a <span> containing the new text nodes and the created <a> elements. It only makes the replacement when one or more <a> elements need to be inserted.
function handleTextNode(textNode) {
if(textNode.nodeName !== '#text'
|| textNode.parentNode.nodeName === 'SCRIPT'
|| textNode.parentNode.nodeName === 'STYLE'
) {
//Don't do anything except on text nodes, which are not children
// of <script> or <style>.
return;
}
let origText = textNode.textContent;
let newHtml=origText.replace(/\briver\b/g,'asdf');
//Only change the DOM if we actually made a replacement in the text.
//Compare the strings, as it should be faster than a second RegExp operation and
// lets us use the RegExp in only one place for maintainability.
if( newHtml !== origText) {
let newSpan = document.createElement('span');
newSpan.innerHTML = newHtml;
textNode.parentNode.replaceChild(newSpan,textNode);
}
}
//Testing: Walk the DOM of the <body> handling all non-empty text nodes
function processDocument() {
//Create the TreeWalker
let treeWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT,{
acceptNode: function(node) {
if(node.textContent.length === 0) {
//Alternately, could filter out the <script> and <style> text nodes here.
return NodeFilter.FILTER_SKIP; //Skip empty text nodes
} //else
return NodeFilter.FILTER_ACCEPT;
}
}, false );
//Make a list of the text nodes prior to modifying the DOM. Once the DOM is
// modified the TreeWalker will become invalid (i.e. the TreeWalker will stop
// traversing the DOM after the first modification).
let nodeList=[];
while(treeWalker.nextNode()){
nodeList.push(treeWalker.currentNode);
}
//Iterate over all text nodes, calling handleTextNode on each node in the list.
nodeList.forEach(function(el){
handleTextNode(el);
});
}
document.getElementById('clickTo').addEventListener('click',processDocument,false);
<input type="button" id="clickTo" value="Click to process"/>
<div id="testDiv">This text should change to a link -->river<--.</div>
The TreeWalker code was taken from my answer here.
Related
How can I write a javascript/jquery function that replaces text in the html document without affecting the markup, only the text content?
For instance if I want to replace the word "style" with "no style" here:
<tr>
<td style="width:300px">This TD has style</td>
<td style="width:300px">This TD has <span class="style100">style</span> too</td>
</tr>
I don't want the replacement to affect the markup, just the text content that is visible to the user.
You will have to look for the text nodes on your document, I use a recursive function like this:
function replaceText(oldText, newText, node){
node = node || document.body; // base node
var childs = node.childNodes, i = 0;
while(node = childs[i]){
if (node.nodeType == 3){ // text node found, do the replacement
if (node.textContent) {
node.textContent = node.textContent.replace(oldText, newText);
} else { // support to IE
node.nodeValue = node.nodeValue.replace(oldText, newText);
}
} else { // not a text mode, look forward
replaceText(oldText, newText, node);
}
i++;
}
}
If you do it in that way, your markup and event handlers will remain intact.
Edit: Changed code to support IE, since the textnodes on IE don't have a textContent property, in IE you should use the nodeValue property and it also doesn't implements the Node interface.
Check an example here.
Use the :contains selector to find elements with matching text and then replace their text.
$(":contains(style)").each(function() {
for (node in this.childNodes) {
if (node.nodeType == 3) { // text node
node.textContent = node.textContent.replace("style", "no style");
}
}
});
Unfortunately you can't use text() for this as it strips out HTML from all descendant nodes, not just child nodes and the replacement won't work as expected.
I am creating a little webextension which modifies a webpage depending on the text. As an example of my problem here is some code which has a tree walker grabs all text nodes on a page:
var treeWalker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
{ acceptNode: () => {return NodeFilter.FILTER_ACCEPT;} },
false
);
while(treeWalker.nextNode()) {
let x = treeWalker.currentNode.data;
//do something with x
}
Unfortunately, x will have all of the text in the node, even if it isn't shown on the webpage.
What I want is something like treeWalker.currentNode.innerText, but that is undefined for text nodes. Does anyone know how to get only the text shown to the user for a text node?
Example: If a webpage has the node with the following HTML:
<div>
<script type="text/x-config">
{
"setObject": -1
}
</script>
<span>Quiz</span>
with associated CSS:
script {
display: none;
}
Then the text content of the respective text node (minus extra spaces and line breaks) is returned as "{ "setObject": -1 } Quiz". However, the only thing that is rendered to the user is "Quiz". Given the respective text node, how do I get only the rendered text?
I guess we have a problem.
If you are using a new browser you should be able to the innerText, but if you are not you have to use textContent.
The problem of textContent is that it gets the content of all elements including and and textContent is not aware of style so it will return hidden events.
I guess the way to go is to replace the filter with NodeFilter.SHOW_ELEMENT and get the Element.innerHTML.
So try it:
var treeWalker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_ELEMENT,
{ acceptNode: (node) => { return NodeFilter.FILTER_ACCEPT;} },
false
);
while(treeWalker.nextNode()) {
let x = treeWalker.currentNode.innerHTML;
//do something with x
}
I have a DOM element that has some number of children, interleaved by text strings.
I want to get each of these text strings, and replace them using regex.
For example:
<div>Text started.
<a>Link child inside</a>
Some more text.
<u>Another child</u>
Closing text.</div>
In this example, I want to extract the strings "Text started.", "Some more text.", and "Closing text.", so that I can replace each of them later with something else.
The solution should be generic since the number of children inside the parent can vary, and the node types as well.
Anyone got a clever answer to achieve this easily using javascript?
You can use childNodes to check if the nodeType is a text node.
Doing this inside a forEach you can easily replace the text with whatever you want.
Example:
var div = document.getElementsByTagName('div').item(0);
[].slice.call(div.childNodes).forEach(function(node , i) {
if(node.nodeType === 3) {
var currNode = div.childNodes[i];
var currText = div.childNodes[i].textContent;
currNode.textContent = currText.replace(/text/i, ' Foo');
}
})
<div>
Text started.
<a>Link child inside</a>
Some more text.
<u>Another child</u>
Closing text.
</div>
You can do as follows;
var textNodes = [...test.childNodes].filter(child => child.nodeType === Node.TEXT_NODE)
textNodes.forEach(tn => console.log(tn.textContent))
<div id="test">Text started.
<a>Link child inside</a>
Some more text.
<u>Another child</u>
Closing text.</div>
I wanted to remove all text from html and print only tags. I Ended up writing this:
var html = $('html');
var elements = html.find('*');
elements.text('');
alert(html.html());
It only out prints <head></head><body></body>. Was not that suppose to print all tags. I've nearly 2000 tags in the html.
var elements = html.find('*');
elements.text('');
That says "find all elements below html, then empty them". That includes body and head. When they are emptied, there are no other elements on the page, so they are the only ones that appear in html's content.
If you really wnat to remove all text from the page and leave the elements, you'll have to do it with DOM methods:
html.find('*').each(function() { // loop over all elements
$(this).contents().each(function() { // loop through each element's child nodes
if (this.nodeType === 3) { // if the node is a text node
this.parentNode.removeChild(this); // remove it from the document
}
});
})
You just deleted everything from your dom:
$('html').find('*').text('');
This will set the text of all nodes inside the <html> to the empty string, deleting descendant elements - the only two nodes that are left are the two children of the root node, <head></head> and <body></body> with their empty text node children - exactly the result you got.
If you want to remove all text nodes, you should use this:
var html = document.documentElement;
(function recurse(el) {
for (var i=0; i<el.childNodes.length; i++) {
var child = el.childNodes[i];
if (child.nodeType == 3)
el.removeChild(child);
else
recurse(child);
}
})(html);
alert(html.outerHTML);
Try this instead
$(function(){
var elements = $(document).find("*");
elements.each(function(index, data){
console.log(data);
});
});
This will return all the html elements of page.
lonesomeday seems to have the right path, but you could also do some string rebuilding like this:
var htmlString=$('html').html();
var emptyHtmlString="";
var isTag=false;
for (i=0;i<htmlString.length;i++)
{
if(htmlString[i]=='<')
isTag=true;
if(isTag)
{
emptyHtmlString+=htmlString[i];
}
if(htmlString[i]=='>')
isTag=false;
}
alert(emptyHtmlString);
I'm trying to build a text editor using DOM Range. Let's say I'm trying to bold selected text. I do it using the following code. However, I couldn't figure out how I would remove the bold if it's already bolded. I'm trying to accomplish this without using the execCommand function.
this.selection = window.getSelection();
this.range = this.selection.getRangeAt(0);
let textNode = document.createTextNode(this.range.toString());
let replaceElm = document.createElement('strong');
replaceElm.appendChild(textNode);
this.range.deleteContents();
this.range.insertNode(replaceElm);
this.selection.removeAllRanges();
Basically, if the selection range is enclosed in <strong> tags, I'd want to remove it.
Ok so I drafted this piece of code. It basically grabs the current selected node, gets the textual content and removes the style tags.
// Grab the currenlty selected node
// e.g. selectedNode will equal '<strong>My bolded text</strong>'
const selectedNode = getSelectedNode();
// "Clean" the selected node. By clean I mean extracting the text
// selectedNode.textContent will return "My bolded text"
/// cleandNode will be a newly created text type node [MDN link for text nodes][1]
const cleanedNode = document.createTextNode(selectedNode.textContent);
// Remove the strong tag
// Ok so now we have the node prepared.
// We simply replace the existing strong styled node with the "clean" one.
// a.k.a undoing the strong style.
selectedNode.parentNode.replaceChild(cleanedNode, selectedNode);
// This function simply gets the current selected node.
// If you were to select 'My bolded text' it will return
// the node '<strong> My bolded text</strong>'
function getSelectedNode() {
var node,selection;
if (window.getSelection) {
selection = getSelection();
node = selection.anchorNode;
}
if (!node && document.selection) {
selection = document.selection
var range = selection.getRangeAt ? selection.getRangeAt(0) : selection.createRange();
node = range.commonAncestorContainer ? range.commonAncestorContainer :
range.parentElement ? range.parentElement() : range.item(0);
}
if (node) {
return (node.nodeName == "#text" ? node.parentNode : node);
}
};
I don't know if this is a "production" ready soution but I hope it helps. This should work for simple cases. I don't know how it will react with more complex cases. With rich text editing things can get quite ugly.
Keep me posted :)