I'm working on a project where I need to replace all occurrences of a string with another string. However, I only want to replace the string if it is text. For example, I want to turn this...
<div id="container">
<h1>Hi</h1>
<h2 class="Hi">Test</h2>
Hi
</div>
into...
<div id="container">
<h1>Hello</h1>
<h2 class="Hi">Test</h2>
Hello
</div>
In that example all of the "Hi"s were turned into "Hello"s except for the "Hi" as the h2 class.
I have tried...
$("#container").html( $("#container").html().replace( /Hi/g, "Hello" ) )
... but that replaces all occurrences of "Hi" in the html as well
This:
$("#container").contents().each(function () {
if (this.nodeType === 3) this.nodeValue = $.trim($(this).text()).replace(/Hi/g, "Hello")
if (this.nodeType === 1) $(this).html( $(this).html().replace(/Hi/g, "Hello") )
})
Produces this:
<div id="container">
<h1>Hello</h1>
<h2 class="Hi">Test</h2>
Hello
</div>
jsFiddle example
Nice results with:
function str_replace_all(string, str_find, str_replace){
try{
return string.replace( new RegExp(str_find, "gi"), str_replace ) ;
} catch(ex){return string;}}
and easier to remember...
replacedstr = str.replace(/needtoreplace/gi, 'replacewith');
needtoreplace should not rounded by '
//Get all text nodes in a given container
//Source: http://stackoverflow.com/a/4399718/560114
function getTextNodesIn(node, includeWhitespaceNodes) {
var textNodes = [], nonWhitespaceMatcher = /\S/;
function getTextNodes(node) {
if (node.nodeType == 3) {
if (includeWhitespaceNodes || nonWhitespaceMatcher.test(node.nodeValue)) {
textNodes.push(node);
}
} else {
for (var i = 0, len = node.childNodes.length; i < len; ++i) {
getTextNodes(node.childNodes[i]);
}
}
}
getTextNodes(node);
return textNodes;
}
var textNodes = getTextNodesIn( $("#container")[0], false );
var i = textNodes.length;
var node;
while (i--) {
node = textNodes[i];
node.textContent = node.textContent.replace(/Hi/g, "Hello");
}
Note that this will also match words where "Hi" is only part of the word, e.g. "Hill". To match the whole word only, use /\bHi\b/g
here you go => http://jsfiddle.net/c3w6X/1/
var children='';
$('#container').children().each(function(){
$(this).html($(this).html().replace(/Hi/g,"Hello")); //change the text of the children
children=children+$(this)[0].outerHTML; //copy the changed child
});
var theText=$('#container').clone().children().remove().end().text(); //get the text outside of the child in the root of the element
$('#container').html(''); //empty the container
$('#container').append(children+theText.replace(/Hi/g,"Hello")); //add the changed text of the root and the changed children to the already emptied element
Related
I need to search my entire document for a phone number, and compile a list of elements which have this phone number in them.
However I have encountered afew snags.
I can't simply do document.body.innerHTML and replace the numbers, as this messes up third party scripts.
The following will match the elements, but ONLY if they have the number within them, and nothing else:
let elements = document.querySelectorAll("a, div, p, li");
let found = [];
for (let elm in elements) {
if (elements.hasOwnProperty(elm)) {
if (elements[elm].textContent !== undefined && elements[elm].textContent.search("00000 000000") != -1) {
found.push(elements[elm]);
}
}
}
So the following element will not match:
<li class="footer__telephone">
<i class="fa fa-phone" aria-hidden="true"></i>00000 000000
</li>
Due to having the i tag in there.
Using textContent instead of text also does not work as the parent of an element will then match, but I don't want the parent.
Edit:
<div class="row-block hmpg-text">
<div class="wrapper">
<div class="container">
<div class="row">
<div class="twelvecol">
00000 000000
</div>
</div>
</div>
</div>
</div>
Lets say the above is my HTML, if I loop through all the elements and test them with testContent then the first is going to be returned as true, to containing my number, but I need the element with the class of twelvecol on it, not the parent which is 4 levels up.
Managed to find an answer, similar to what Phylogenesis said however couldn't get any of them examples working.
function replaceText(el, regex_display, regex_link) {
// Replace any links
if (el.tagName === "A") {
if (regex_link.test(el.getAttribute("href"))) {
el.setAttribute("href", el.getAttribute("href").replace(regex_link, replacement.replace(/\s/g, '')));
}
}
if (el.nodeType === 3) {
if (regex_display.test(el.data)) el.data = el.data.replace(regex_display, replacement);
if (regex_link.test(el.data)) el.data = el.data.replace(regex_link, replacement);
} else {
let children = el.childNodes;
for (let i = 0; i < children.length; i++) {
replaceText(children[i], regex_display, regex_link);
}
}
}
let bodyChildren = document.body.childNodes;
let search_display = new RegExp(search, "g");
let search_link = new RegExp(search.replace(/\s/g, ''), "g");
for (let i = 0; i < bodyChildren.length; i++) {
replaceText(bodyChildren[i], search_display, search_link);
}
I have this text inside a blockquote:
<blockquote class="tr_bq">
4 Text<br />
20 TExt<br />
2 Another text a little longer<br />
<br />
20 text</blockquote>
I want to add for each line a tag or convert the br to include a class. if the br was including all the line i would know how to do it. This is how i want to end like:
<blockquote class="tr_bq">
<strike>4 Text</strike><br/>
<strike>20 TExt</strike><br/>
<strike>2 Another text a little longer</strike><br/>
<br />
<strike>20 text</strike></blockquote>
or
<blockquote class="tr_bq">
<br class="X">4 Text<br>
<br class="X">20 TExt<br>
<br class="X">2 Another text a little longer<br>
<br />
<br class="X"> 20 text</br></blockquote>
I've tried with wrap but with no sucess, any way to do this?
You can do this by manipulating the inner HTML of the blockquote.
$('.tr_bq').each(function () {
var html = $(this).html();
var newHtml = html.split('<br>').map(function (str) {
return '<strike>' + str + '</strike>';
}).join('<br>');
$(this).html(newHtml);
});
Just to offer a plain-JavaScript means of achieving this, avoiding the (unnecessary) use of a library:
function wrapNodesWith(nodes, tag) {
// if we have neither nodes to wrap, nor a tag to wrap
// them with, we quit here:
if (!nodes || !tag) {
return false;
}
// otherwise:
// we convert the nodes to an array (using Array.prototype.slice,
// in conjunction with Function.prototype.call):
nodes = Array.prototype.slice.call(nodes, 0);
// if the tag parameter passed to the function is a string ('strike'),
// we create that element using document.createElement(tag),
// otherwise we assume we've got an HTMLElement (this is a very
// naive check) and so we use that:
tag = 'string' === typeof tag ? document.createElement(tag) : tag;
// an unitialised variable for use within the following forEach:
var clone;
nodes.forEach(function(n) {
// n is the node over which we're iterating,
// cloning the tag (to avoid multiple calls
// to document.createElement):
clone = tag.cloneNode();
// setting the textContent of the clone to the nodeValue
// of the node (if it's a textNode), or to the textContent of
// element (again a simple check):
clone.textContent = n.nodeType === 3 ? n.nodeValue : n.textContent;
// replacing the childNode, using parentNode.replaceChild(),
// inserting clone and removing n:
n.parentNode.replaceChild(clone, n);
});
}
// finding the first <blockquote> element:
var blockquote = document.querySelector('blockquote'),
// creating an array of the childNodes of the <blockquote>:
children = Array.prototype.slice.call(blockquote.childNodes, 0),
// filtering the children array, retaining only those nodes for
// which the assessment returns true:
textNodes = children.filter(function(n) {
return n.nodeType === 3;
});
// can be called with:
wrapNodesWith(textNodes, 'strike');
// or:
wrapNodesWith(textNodes, document.createElement('strike'));
function wrapNodesWith(nodes, tag) {
if (!nodes || !tag) {
return false;
}
nodes = Array.prototype.slice.call(nodes, 0);
tag = 'string' === typeof tag ? document.createElement(tag) : tag;
var parent, clone;
nodes.forEach(function(n) {
clone = tag.cloneNode();
clone.textContent = n.nodeType === 3 ? n.nodeValue : n.textContent;
n.parentNode.replaceChild(clone, n);
});
}
var blockquote = document.querySelector('blockquote'),
children = Array.prototype.slice.call(blockquote.childNodes, 0),
textNodes = children.filter(function(n) {
return n.nodeType === 3;
});
wrapNodesWith(textNodes, 'strike');
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<blockquote class="tr_bq">
4 Text
<br />20 TExt
<br />2 Another text a little longer
<br />
<br />20 text
</blockquote>
References:
Array.prototype.filter().
Array.prototype.forEach().
Array.prototype.slice().
Conditional ('ternary') operator.
document.createElement().
document.querySelector().
Function.prototype.call().
Node.nodeValue.
Node.replaceChild().
Well,
i managed to get it working with this:
var pre = document.getElementsByTagName('blockquote'),pl = pre.length;
for (var i = 0; i < pl; i++) {
var pro = pre[i].innerHTML.split(/<br>/), pz = pro.length;
pre[i].innerHTML = '';
for (var a=0; a < pz ; a++) {
pre[i].innerHTML += '<strike>' + pro[a] + '</strike><br/>';
}
}
I am trying to get the inner text of HTML string, using a JS function(the string is passed as an argument). Here is the code:
function extractContent(value) {
var content_holder = "";
for (var i = 0; i < value.length; i++) {
if (value.charAt(i) === '>') {
continue;
while (value.charAt(i) != '<') {
content_holder += value.charAt(i);
}
}
}
console.log(content_holder);
}
extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");
The problem is that nothing gets printed on the console(*content_holder* stays empty). I think the problem is caused by the === operator.
Create an element, store the HTML in it, and get its textContent:
function extractContent(s) {
var span = document.createElement('span');
span.innerHTML = s;
return span.textContent || span.innerText;
};
alert(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>"));
Here's a version that allows you to have spaces between nodes, although you'd probably want that for block-level elements only:
function extractContent(s, space) {
var span= document.createElement('span');
span.innerHTML= s;
if(space) {
var children= span.querySelectorAll('*');
for(var i = 0 ; i < children.length ; i++) {
if(children[i].textContent)
children[i].textContent+= ' ';
else
children[i].innerText+= ' ';
}
}
return [span.textContent || span.innerText].toString().replace(/ +/g,' ');
};
console.log(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>. Nice to <em>see</em><strong><em>you!</em></strong>"));
console.log(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>. Nice to <em>see</em><strong><em>you!</em></strong>",true));
One line (more precisely, one statement) version:
function extractContent(html) {
return new DOMParser()
.parseFromString(html, "text/html")
.documentElement.textContent;
}
textContext is a very good technique for achieving desired results but sometimes we don't want to load DOM. So simple workaround will be following regular expression:
let htmlString = "<p>Hello</p><a href='http://w3c.org'>W3C</a>"
let plainText = htmlString.replace(/<[^>]+>/g, '');
use this regax for remove html tags and store only the inner text in html
it shows the HelloW3c only check it
var content_holder = value.replace(/<(?:.|\n)*?>/gm, '');
Try This:-
<!DOCTYPE html>
<html>
<body>
<script type="text/javascript">
function extractContent(value){
var div = document.createElement('div')
div.innerHTML=value;
var text= div.textContent;
return text;
}
window.onload=function()
{
alert(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>"));
};
</script>
</body>
</html>
For Node.js
This will use the jsdom library, since node.js doesn't have dom features as in browser.
import * as jsdom from "jsdom";
const html = "<h1>Testing<h1>";
const text = new jsdom.JSDOM(html).window.document.textContent;
console.log(text);
Use match() function to bring out HTML tags
const text = `<div>Hello World</div>`;
console.log(text.match(/<[^>]*?>/g));
You could temporarily write it out to a block level element that is positioned off the page .. some thing like this:
HTML:
<div id="tmp" style="position:absolute;top:-400px;left:-400px;">
</div>
JavaScript:
<script type="text/javascript">
function extractContent(value){
var div=document.getElementById('tmp');
div.innerHTML=value;
console.log(div.children[0].innerHTML);//console out p
}
extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");
</script>
Using jQuery, in jQuery we can add comma seperated tags.
var readableText = [];
$("p, h1, h2, h3, h4, h5, h6").each(function(){
readableText.push( $(this).text().trim() );
})
console.log( readableText.join(' ') );
you need array to hold values
function extractContent(value) {
var content_holder = new Array();
for(var i=0;i<value.length;i++) {
if(value.charAt(i) === '>') {
continue;
while(value.charAt(i) != '<') {
content_holder.push(value.charAt(i));
console.log(content_holder[i]);
}
}
}
}extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");
I'm trying to scrape text from an HTML string by using container.innerText || container.textContent where container is the element from which I want to extract text.
Usually, the text I want to extract is located in <p> tags. So for the HTML below as an example:
<div id="container">
<p>This is the first sentence.</p>
<p>This is the second sentence.</p>
</div>
Using
var container = document.getElementById("container");
var text = container.innerText || container.textContent; // the text I want
will return This is the first sentence.This is the second sentence. without a space between the first period and the start of the second sentence.
My overall goal is to parse text using the Stanford CoreNLP, but its parser cannot detect that these are 2 sentences because they are not separated by a space. Is there a better way of extracting text from HTML such that the sentences are separated by a space character?
The HTML I'm parsing will have the text I want mostly in <p> tags, but the HTML may also contain <img>, <a>, and other tags embeeded between <p> tags.
As a dirty hack, try using this:
container.innerHTML.replace(/<.*?>/g," ").replace(/ +/g," ");
This will replace all tags with a space, then collapse multiple spaces into a single one.
Note that if there is a > inside an attribute value, this will mess you up. Avoiding this problem will require more elaborate parsing, such as looping through all text nodes and putting them together.
Longer but more robust method:
function recurse(result, node) {
var c = node.childNodes, l = c.length, i;
for( i=0; i<l; i++) {
if( c[i].nodeType == 3) result += c.nodeValue + " ";
if( c[i].nodeType == 1) result = recurse(result, c[i]);
}
return result;
}
recurse(container);
Assuming I haven't made a stupid mistake, this will perform a depth-first search for text nodes, appending their contents to the result as it goes.
jQuery has the method text() that does what you want. Will this work for you?
I'm not sure if it fits for everything that's in your container but it works in my example. It will also take the text of a <a>-tag and appends it to the text.
Update 20.12.2020
If you're not using jQuery. You could implement the text method with vanilla js like this:
const nodes = Array.from(document.querySelectorAll("#container"));
const text = nodes
.filter((node) => !!node.textContent)
.map((node) => node.textContent)
.join(" ");
Using querySelectorAll("#container") to get every node in the container. Using Array.from so we can work with Array methods like filter, map & join.
Finally, generate the text by filtering out elements with-out textContent. Then use map to get each text and use join to add a space separator between the text.
$(function() {
var textToParse = $('#container').text();
$('#output').html(textToParse);
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="container">
<p>This is the first sentence.</p>
<p>This is the second sentence.</p>
<img src="http://placehold.it/200x200" alt="Nice picture"></img>
<p>Third sentence.</p>
</div>
<h2>output:</h2>
<div id="output"></div>
You can use the following function to extract and process the text as shown. It basically goes through all the children nodes of the target element and the child nodes of the child nodes and so on ... adding spaces at appropriate points:
function getInnerText( sel ) {
var txt = '';
$( sel ).contents().each(function() {
var children = $(this).children();
txt += ' ' + this.nodeType === 3 ? this.nodeValue : children.length ? getInnerText( this ) : $(this).text();
});
return txt;
}
function getInnerText( sel ) {
var txt = '';
$( sel ).contents().each(function() {
var children = $(this).children();
txt += ' ' + this.nodeType === 3 ?
this.nodeValue : children.length ?
getInnerText( this ) : $(this).text();
});
return txt;
}
alert( getInnerText( '#container' ) );
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
<div id="container">
Some other sentence
<p>This is the first sentence.</p>
<p>This is the second sentence.</p>
</div>
You may use jQuery to traverse down the elements.
Here is the code :
$(document).ready(function()
{
var children = $("#container").find("*");
var text = "";
while (children.html() != undefined)
{
text += children.html()+"\n";
children = children.next();
}
alert(text);
});
Here is the fiddle : http://jsfiddle.net/69wezyc5/
I am parsing the following query string:
/?alt_name=test&test=brand&old=Superman&new=Batman
and get this Object {alt_name: "Giggitty", test: "brand", old: "Supco", new: "Batman"}.
How would I go about creating a script that if the test=brand replace inner html instances of the word Superman(old) with the word Batman(new)?
Any help is greatly appriciated.
Edit. HTML would help although a super simple example will suffice.
<html>
<body>
<div>
<p>Superman and some other text</p>
</div>
</body>
</html>
Basically I just want every instance of the word Superman, or whatever the parsed value for old, to be replaced with whatever the parsed value for new is, in this case Batman.
You could do something like this if you want to match exactly "Superman" and not something that contains it.
HTML
<div>
<p>Superman and some other text</p>
</div>
<div>
<p>More Superman and some other text</p>
</div>
<div>
<p>Something Supermanish and some other text</p>
</div>
Javascript
function walkTheDOM(node, func) {
func(node);
node = node.firstChild;
while (node) {
walkTheDOM(node, func);
node = node.nextSibling;
}
}
function escapeRegex(string) {
return string.replace(/[\[\](){}?*+\^$\\.|]/g, "\\$&");
}
function replaceText(node, searchText, replaceText) {
var textNodes = [],
length,
i;
function filterTextNodes(currentNode) {
if (currentNode.nodeType === 3) {
textNodes.push(currentNode);
}
}
walkTheDOM(node, filterTextNodes);
i = 0;
length = textNodes.length;
while (searchText && i < length) {
textNodes[i].nodeValue = textNodes[i].nodeValue.replace(new RegExp("\\b" + escapeRegex(searchText) + "\\b"), replaceText);
i += 1;
}
}
var query = {
alt_name: "Giggitty",
test: "brand",
old: "Superman",
new: "Batman"
};
if (query.test === "brand") {
replaceText(document, query.old, query.new);
}
On jsfiddle