extract text from html without loosing new lines - javascript

I use the following code to extract text from html.
var html = "first line. <div>second line. </div><div>third line.</div><div><br></div>"
var text = extractContent(html);
console.log("TEXT: " + text);
function extractContent(s) {
var span = document.createElement('span');
span.innerHTML = s;
return span.textContent || span.innerText;
}
the result of this code is the text without new lines. but I want the result to replace divs with "\n" like this:
first line."\n"second line. "\n" third line."\n"

Use s.replaceAll('&nbsp', '\\n'); to replace the linefeed.
Note: The backslash \ needs to be escaped with another \.
var html = "first line. <div>second line. </div><div>third line.</div><div><br></div>"
var text = extractContent(html);
console.log("TEXT: " + text);
function extractContent(s) {
var span = document.createElement('span');
s = s.replaceAll('&nbsp', '\\n');
span.innerHTML = s;
return span.textContent || span.innerText;
}

document.createElement is not necessary, you can use regexp alone to achieve this:
var html = "first line. <div>second line. </div><div>third line.</div><div><br></div>"
var text = extractContent(html);
console.log("TEXT: " + text);
function extractContent(s) {
/*
* /(<[^>]+>)+/gim <---- this regexp to match all html tags and replace them with \n,
* also merge empty content html tags into one \n
*
*/
return s.replace(/(<[^>]+>)+/gim, "\n");
}

Related

Javascript highlighter in editable code

My goal is to have a highlighter made in JavaScript which change some word's color multiple times.
<div id="code" contenteditable="true">
hello world hello world
</div>
I want to have an outuput as : " Hello world Hello world "
How can I change the color of both "hello" using JavaScript?
By this following code, you can write a function to find the words and replace them with a colorful element (e.g or & ...)
$(function(){
// inquire element for run the function
var elem = $('#code');
// call highlighter method with the word and elemnet
highlighter('hello', elem);
});
function highlighter(word, elem){
// get inner html from passed element
var html = elem.html();
// define a new regex for replacing the word on specified element
var reg = new RegExp(word,"g");
// replace all found words with a new wrapped word
html = html.replace(reg, "<span class='highlight'>" + word +"</span>");
// set the specified element with new html
elem.html(html);
}
.highlight {
color: red;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="code" contenteditable="true">
hello world hello world
</div>
You have to split your text to get each individual word, and then add each one in different 'span' tags which will have a 'fontWeight' property set to 'bold' if the word is 'hello'.
Here's an example:
var div = document.getElementById("code");
var text = div.innerText;
div.innerText = '';
text.split(' ').forEach(x => {
const span = document.createElement('span');
span.innerText = x + ' ';
if(x == 'hello')
span.style.fontWeight = 'bold';
div.appendChild(span);
})
Try out this fiddle

Match part of text inside of tag

I am in client side context.
I have this html:
<p>Text text \n other text </p>
I want to match only \n element inside paragraph, and replace only this with "br" tag.
I want to do this only inside tag "p" and for all match.
I supposed to use regex in javascript.
Thanks and sorry for my bad english.
Use html() method with callback and inside callback replace text using String#replace method.
$('p').html(function(i, htm) {
return htm.replace(/\\n/g, '<br>');
})
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Text text \n other text</p>
UPDATE 1 : If it's a string then use String#replace method.
console.log(
'<p>Text text \n other text</p>'.replace(/\n/g, '<br>')
)
UPDATE 2 : If the string contains other tag element and you just want to update the p tag then do something like.
var str = '<p>Text text \n other text</p>';
console.log(
// create a temporary div eleemnt
$('<div>', {
// set html content from string
html: str
})
// get all p tags
.find('p')
// iterate and replace \n
.html(function(i, htm) {
// replace \n with br tag
return htm.replace(/\n/g, '<br>')
})
// back to the temp div
.end()
// get it's updated html content
.html()
)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
UPDATE 3 : With pure JavaScript by generating a temporary DOM element.
var str = '<p>Text text \n other text</p>';
// create a temporary div element
var div = document.createElement('div');
// set html content form string
div.innerHTML = str;
// get all p tags and convert into Array using Array.from
// for older browser use [].sclice.call instead
Array.from(div.getElementsByTagName('p'))
// iterate over p tags
.forEach(function(el) {
// update the html content
el.innerHTML = el.innerHTML.replace(/\n/g, '<br>')
});
// get updated html content
console.log(div.innerHTML);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
You can use a for loop with getElementsByTagName:
for(i = 0; i < window.document.getElementsByTagName("p").length; i++){
window.document.getElementsByTagName("p")[i].innerHTML = window.document.getElementsByTagName("p")[i].innerHTML.replace(/\\n/g, "<br/>");
}
If this is inside a string and not inside the HTML, you can add it to a <div> element while handling it like this:
var myString = "<p>Text text \n other text </p>";
var div = document.createElement("div");
div.innerHTML = myString;
for(i = 0; i < div.getElementsByTagName("p").length; i++){
div.getElementsByTagName("p")[i].innerHTML = div.getElementsByTagName("p")[i].innerHTML.replace(/\\n/g, "<br/>");
}
myString = div.innerHTML;

HTML and Javascript: Remove a complete tag with dynamic content inside from string

I have a string that conains HTML. In this HTML I have a textbox with text inside:
<div class="aLotOfHTMLStuff"></div>
<textbox>This textbox must be terminated! Forever!</textbox>
<div class="andEvenMoreHTMLStuff"></div>
Now I want to remove the textbox from that string, including the text inside. The desired result:
<div class="aLotOfHTMLStuff"></div>
<div class="andEvenMoreHTMLStuff"></div>
How can I achieve it? The two main problems: It is a string and not part of the DOM and the content inside the textbox is dynamic.
Here is an example that will look for the opening and closing tags in the string and replace anything in between.
const template = document.querySelector('#html')
const str = template.innerHTML
function removeTagFromString(name, str) {
const reg = new RegExp('<' + name + '.*>.*<\/'+ name +'.*>\\n*', 'gm')
return str.replace(reg, '')
}
console.log('before', str)
console.log('after', removeTagFromString('textbox', str))
<template id="html">
<div class="aLotOfHTMLStuff"></div>
<textbox>This textbox must be terminated! Forever!</textbox>
<div class="andEvenMoreHTMLStuff"></div>
</template>
If it is text string not HTML, you can convert it to DOM:
var str = '<div class="aLotOfHTMLStuff"></div><textbox>This textbox must be terminated! Forever!</textbox><div class="andEvenMoreHTMLStuff"></div>';
var $dom = $('<div>' + str + '</div>');
Then remove element from DOM:
$dom.find('textbox').remove();
If you need, can get string back:
console.log($dom.html());
Try this:
var string = '<div class="aLotOfHTMLStuff"></div><textbox>This textbox must be terminated! Forever!</textbox><div class="andEvenMoreHTMLStuff"></div>';
if ( string.includes("<textbox>") ) {
var start = string.indexOf("<textbox>");
var stop = string.indexOf("</textbox>");
string = string.replace(string.slice(start, stop+10), "");
}
console.log(string);
You can use parseHTML to convert string to html,
like..
var str = '<div class="aLotOfHTMLStuff"></div><textbox>This textbox must be terminated! Forever!</textbox><div class="andEvenMoreHTMLStuff"></div>';
var a = $.parseHTML(str);
var newstr = "";
a.forEach(function(obj) {
if ($(obj).prop('tagName').toLowerCase() != "textbox") {
newstr += $(obj).prop("outerHTML")
}
});
It's very simple and you can remove any tag in future by just replacing "textbox"
It is a string and not part of the DOM and the content inside the
textbox is dynamic.
So that html is in a js variable of type string? like this?:
var string = '<div class="aLotOfHTMLStuff"></div><textbox>This textbox must be terminated! Forever!</textbox><div class="andEvenMoreHTMLStuff"></div>';
So in that case you could use .replace(); like this:
string = string.replace('<textbox>This textbox must be terminated! Forever!</textbox>', '');

Type new line character in Element.TextContent

I am trying to generate a textual content in a web application. I want a text to appear into a header element <h1> but the text must contain newline breaks.
First attempt
var el = document.getElementById("myel");
var newline = "\r\n";
var nbsp = "\u00a0";
el.textContent = "My awesome" + newline + "web" + nbsp + "application";
This is not working. I mean in the developer tools, by inspecting the DOM, I can see the text being broken between "awesome" and "web", but the browser does not render it that way: the text is all on the same line but at least the non breaing space (which is correctly rendered like &nbsp) is there.
Trying <br/>.
If I try to use <br/>, I must use innerHTML:
var el = document.getElementById("myel");
var newline = "<br/>";
var nbsp = "&nbsp";
el.innerHTML = "My awesome" + newline + "web" + nbsp + "application";
So there is no way I can get my problem solved by using textContent? I would really like avoiding innerHTML.
Use CSS white-space: pre; with "\r\n":
var el = document.getElementById("myel");
var newline = "\r\n";
var nbsp = "\u00a0";
el.textContent = "My awesome" + newline + "web" + nbsp + "application";
#myel {
white-space: pre;
}
<h1 id="myel"></h1>

case insensitive word boundaries with regExp

for some reason the gi modifier is behaving as case sensitive. Not sure what's going on, but maybe someone knows why this is. it works fine when the cases are the same. This JSFiddle will demonstrate my problem. Code below. Thanks.
javaScript:
var search_value = $('#search').val();
var search_regexp = new RegExp(search_value, "gi");
$('.searchable').each(function(){
var newText =(this).html().replace(search_value, "<span class = 'highlight'>" + search_value + "</span>");
$(this).html(newText);
});
HTML:
<input id = "search" value = "Red">
<div class = "searchable">this should be red</div>
<div class = "searchable">this should be Red</div>
Correct Code is
var search_value = $('#search').val();
var search_regexp = new RegExp(search_value, "gi");
$('.searchable').each(function(){
// var newText =$(this).html().replace(search_value, "<span class = 'highlight'>" + search_value + "</span>");
var newText =$(this).html().replace(search_regexp, function(matchRes) {
return "<span class = 'highlight'>" + matchRes + "</span>";
});
$(this).html(newText);
});
output
Fiddle
Issues with your code:-
First: search_regexp - You haven't used search_regexp anywhere in your code
Your Code
var newText =$(this).html().replace(search_value, "<span class = 'highlight'>" + search_value + "</span>");
Second
You are using search_value to replace. It will make both Red and red to either Red or red after replace.
eg: if search_value is Red then your output will be
this should be Red
this should be Red
you should use matched result instead of search_value
Third: How to use RegExp with replace function?
Correct Method is
var newText =$(this).html().replace(search_regexp, function(matchRes) {
return "<span class = 'highlight'>" + matchRes + "</span>";
});
Explanation
replace(<RegEx>, handler)
Your code isn't using your regex in the replace call, it's just using the search_value. This JSBin shows your code working: http://jsbin.com/toquz/1/
Do you actually want to replace the matches with the value (changing lowercase instances to uppercase in this example)? Using $.html() will also get you any markup within that element, so keep that in mind as well (in case there's a chance of having markup in the .searchable elements along with text.
Might be easier to do:
function highlight(term) {
var search_regexp = new RegExp(term, "gi");
$('.searchable').each(function(){
if (search_regexp.test($(this).html())) {
var highlighted = $(this).html().replace(search_regexp, function(m) {
return '<span class="highlight">'+m+'</span>';
});
$(this).html(highlighted);
}
});
}
Your original code in the JSBin is the highlightReplace() function.

Categories

Resources