Extract the text out of HTML string using JavaScript - javascript

I am trying to get the inner text of HTML string, using a JS function(the string is passed as an argument). Here is the code:
function extractContent(value) {
var content_holder = "";
for (var i = 0; i < value.length; i++) {
if (value.charAt(i) === '>') {
continue;
while (value.charAt(i) != '<') {
content_holder += value.charAt(i);
}
}
}
console.log(content_holder);
}
extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");
The problem is that nothing gets printed on the console(*content_holder* stays empty). I think the problem is caused by the === operator.

Create an element, store the HTML in it, and get its textContent:
function extractContent(s) {
var span = document.createElement('span');
span.innerHTML = s;
return span.textContent || span.innerText;
};
alert(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>"));
Here's a version that allows you to have spaces between nodes, although you'd probably want that for block-level elements only:
function extractContent(s, space) {
var span= document.createElement('span');
span.innerHTML= s;
if(space) {
var children= span.querySelectorAll('*');
for(var i = 0 ; i < children.length ; i++) {
if(children[i].textContent)
children[i].textContent+= ' ';
else
children[i].innerText+= ' ';
}
}
return [span.textContent || span.innerText].toString().replace(/ +/g,' ');
};
console.log(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>. Nice to <em>see</em><strong><em>you!</em></strong>"));
console.log(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>. Nice to <em>see</em><strong><em>you!</em></strong>",true));

One line (more precisely, one statement) version:
function extractContent(html) {
return new DOMParser()
.parseFromString(html, "text/html")
.documentElement.textContent;
}

textContext is a very good technique for achieving desired results but sometimes we don't want to load DOM. So simple workaround will be following regular expression:
let htmlString = "<p>Hello</p><a href='http://w3c.org'>W3C</a>"
let plainText = htmlString.replace(/<[^>]+>/g, '');

use this regax for remove html tags and store only the inner text in html
it shows the HelloW3c only check it
var content_holder = value.replace(/<(?:.|\n)*?>/gm, '');

Try This:-
<!DOCTYPE html>
<html>
<body>
<script type="text/javascript">
function extractContent(value){
var div = document.createElement('div')
div.innerHTML=value;
var text= div.textContent;
return text;
}
window.onload=function()
{
alert(extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>"));
};
</script>
</body>
</html>

For Node.js
This will use the jsdom library, since node.js doesn't have dom features as in browser.
import * as jsdom from "jsdom";
const html = "<h1>Testing<h1>";
const text = new jsdom.JSDOM(html).window.document.textContent;
console.log(text);

Use match() function to bring out HTML tags
const text = `<div>Hello World</div>`;
console.log(text.match(/<[^>]*?>/g));

You could temporarily write it out to a block level element that is positioned off the page .. some thing like this:
HTML:
<div id="tmp" style="position:absolute;top:-400px;left:-400px;">
</div>
JavaScript:
<script type="text/javascript">
function extractContent(value){
var div=document.getElementById('tmp');
div.innerHTML=value;
console.log(div.children[0].innerHTML);//console out p
}
extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");
</script>

Using jQuery, in jQuery we can add comma seperated tags.
var readableText = [];
$("p, h1, h2, h3, h4, h5, h6").each(function(){
readableText.push( $(this).text().trim() );
})
console.log( readableText.join(' ') );

you need array to hold values
function extractContent(value) {
var content_holder = new Array();
for(var i=0;i<value.length;i++) {
if(value.charAt(i) === '>') {
continue;
while(value.charAt(i) != '<') {
content_holder.push(value.charAt(i));
console.log(content_holder[i]);
}
}
}
}extractContent("<p>Hello</p><a href='http://w3c.org'>W3C</a>");

Related

Adding ID based on innerHTML to all elements in class

I need to write a code to add IDs to all element in one class. The IDs have to be based on innerText.
Elements look like that:
<lable class="sf-label-radio">Name1<span>Some Other Text that I do not need</span><label>
<lable class="sf-label-radio">Name2<span>Some Other Text that I do not need</span><label>
etc.
Here is my code:
<script>
addIDtoGI();
function addIDtoGI() {
let searchButtons = document.getElementsByClassName('sf-label-radio');
for(i = 0; i < searchButtons.length; i++) {
x = searchButtons[i].innerHTML;
x = x.substr(0, x.search("<")).replace(/\s+/g, '-').toLowerCase();
x = onlyEngLetters(x);
searchButtons[i].setAttribute('id',x);
}
}
function onlyEngLetters(text) {
text=text.replace("ę","e");
text=text.replace("ó","o");
text=text.replace("ą","a");
text=text.replace("ś","s");
text=text.replace("ł","l");
text=text.replace("ż","z");
text=text.replace("ź","z");
text=text.replace("ć","c");
text=text.replace("ń","n");
return text;
}
</script>
Thank You for help!
Iterate the childnodes until you get to the first textNode that isn't empty to get the text you want. Note also thaat replace() only works on first instance found and you probably want to convery to lower case to match your replacements
addIDtoGI()
function addIDtoGI(){
document.querySelectorAll('.sf-label-radio').forEach(el=>{
let txtNode = el.childNodes[0];
while(!txtNode.textContent.trim()){
txtNode = txt.nextSibling
}
el.id = onlyEngLetters(txtNode.textContent);
console.log(el.id)
});
}
function onlyEngLetters(text) {
return text.toLowerCase()
.replaceAll("ę","e")
.replaceAll("ó","o")
.replaceAll("ą","a")
.replaceAll("ś","s")
.replaceAll("ł","l")
.replaceAll("ż","z")
.replaceAll("ź","z")
.replaceAll("ć","c")
.replaceAll("ń","n")
}
<label class="sf-label-radio">Name1<span>Some Other Text that I do not need</span><label>
<label class="sf-label-radio">Name2<span>Some Other Text that I do not need</span><label>
First, you define your function but you never call it.
In your script, add the "()" to "addIDToGI;": addIGToGI();
There's also a typo on searchButtons.lenght, it should be length.
It should resolve your errors.
EDIT: Also, as someone mentionned in the comments, <lable> should be <label>.
First you must call function with () and when using for loop must use variables=>
for(let i = 0; i < searchButtons.length; i++) . length is true not lenght. and of course
let x = searchButtons[i].innerHTML;
and ...
const addIdToClassByInnerHTML = cls => {
const elements = document.querySelectorAll('.' + cls);
elements.forEach(el=>{
el.setAttribute('id', el.innerHTML);
});
}
note that do not use this function if the element contains child.
if your element contains one or more child(ren), use this:
const addIdToClassByInnerHTML = cls => {
const elements = document.querySelectorAll('.' + cls);
elements.forEach(el=>{
let html = el.innerHTML;
el.querySelectorAll('*').forEach(sub=>{
html = html.replace(sub.outerHTML, '');
});
el.setAttribute('id', html);
});
}
codepen demo
snippets
const addIdToClassByInnerHTML = cls => {
const elements = document.querySelectorAll('.' + cls);
elements.forEach(el=>{
let html = el.innerHTML;
el.querySelectorAll('*').forEach(sub=>{
html = html.replace(sub.outerHTML, '');
});
el.setAttribute('id', html);
});
}
addIdToClassByInnerHTML('sf-label-radio');
console.log(document.querySelectorAll('.sf-label-radio')[0]);
label{
display: block;
}
<label class="sf-label-radio">Name1<span>Some Other Text that I do not need</span><label>
<label class="sf-label-radio">Name2<span>Some Other Text that I do not need</span><label>

Will this loop correctly and be able to list tag names?

I want to prompt user to enter a tag and it will list it in the console.log and will ask again until they type "quit". if that happens then I will use the documentwrite to list in the innertext what the previous tags been searched for.
var selector = prompt("Please enter a selector: ");
var selectorr = document.getElementsByTagName(selector);
var breaker = "quit";
breaker = false;
var textlogger = "elements have been found that match the selector ";
var lengthfinder = selectorr.length;
while(true) {
console.log(lengthfinder + textlogger + selector);
if (selector == breaker) {
for (var i=0; i<divs.length; i++) {
document.write.innerText(textlogger);
}
}
}
If you wanna try jQuery and something fun, take this:
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Loop with jquery deferred</title>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript">
var loop = function () {
return $.Deferred(function (deferred) {
var selector = prompt("Please enter a selector: ");
var quit = 'quit';
var selectors = [];
while (selector && selector != quit) {
selectors.push(selector);
var elements = $(selector);
console.log(elements.length + " elements have been found that match the selector " + selector);
selector = prompt("Please enter a selector: ");
}
if (selector)
{
deferred.resolve(selectors);
}
else
{
deferred.reject();
}
}).promise();
};
$(function () {
loop().done(function (selectors) {
$($.map(selectors, function (item, index) {
return '<div>' + item + '</div>';
}).join('')).appendTo($('body'));
});
});
</script>
</head>
<body>
<div>
<iframe src="http://stackoverflow.com/questions/40392515/will-this-loop-correctly-and-be-able-to-list-tag-names"/>
</div>
</body>
</html>
Here is the version with comments and suggestions on where to put your necessary code for it to work.
Code Preview
var breaker = "quit",
textlogger = "elements have been found that match the selector ",
textList = new Array();
while (true) {
var selector = prompt("Please enter a selector: ");
if (selector == breaker) {
/*
Write your necessary output here
*/
/*
After output you break out
*/
break;
} else {
/*
Write It inside list
*/
textList.push(selector);
}
/*
Write necessary output in console
*/
console.log(selector);
}
I want to prompt user to enter a tag and it will list it in the
console.log and will ask again until they type "quit"
while ("quit" !== prompt("Tag name selector, type `quit` to exit", "quit")) {
console.log("in loop");
}
console.log("exit loop");
I will use the documentwrite to list in the innertext what the
previous tags been searched for.
Either you use: document.write("some text") to append to existing dom or you can use selectorr[i].innerText="some text"
Here is my small example that might help you:
var selector;
while ("quit" !== (selector = prompt("Tag name selector. Use `quit` to cancel search", "quit"))) {
var elements = document.getElementsByTagName(selector);
var count = elements.length;
while (count--) {
elements[count].innerHTML += " [matched]";
}
}
<span>This is my <span> tag 1</span>
<p>This is my <p> tag 1</p>
<div>This is my <div> tag 2</div>
<p>This is my <p> tag 3</p>
<span>This is my <span> tag 2</span>

jQuery replace all occurrences of a string in an html page

I'm working on a project where I need to replace all occurrences of a string with another string. However, I only want to replace the string if it is text. For example, I want to turn this...
<div id="container">
<h1>Hi</h1>
<h2 class="Hi">Test</h2>
Hi
</div>
into...
<div id="container">
<h1>Hello</h1>
<h2 class="Hi">Test</h2>
Hello
</div>
In that example all of the "Hi"s were turned into "Hello"s except for the "Hi" as the h2 class.
I have tried...
$("#container").html( $("#container").html().replace( /Hi/g, "Hello" ) )
... but that replaces all occurrences of "Hi" in the html as well
This:
$("#container").contents().each(function () {
if (this.nodeType === 3) this.nodeValue = $.trim($(this).text()).replace(/Hi/g, "Hello")
if (this.nodeType === 1) $(this).html( $(this).html().replace(/Hi/g, "Hello") )
})
Produces this:
<div id="container">
<h1>Hello</h1>
<h2 class="Hi">Test</h2>
Hello
</div>
jsFiddle example
Nice results with:
function str_replace_all(string, str_find, str_replace){
try{
return string.replace( new RegExp(str_find, "gi"), str_replace ) ;
} catch(ex){return string;}}
and easier to remember...
replacedstr = str.replace(/needtoreplace/gi, 'replacewith');
needtoreplace should not rounded by '
//Get all text nodes in a given container
//Source: http://stackoverflow.com/a/4399718/560114
function getTextNodesIn(node, includeWhitespaceNodes) {
var textNodes = [], nonWhitespaceMatcher = /\S/;
function getTextNodes(node) {
if (node.nodeType == 3) {
if (includeWhitespaceNodes || nonWhitespaceMatcher.test(node.nodeValue)) {
textNodes.push(node);
}
} else {
for (var i = 0, len = node.childNodes.length; i < len; ++i) {
getTextNodes(node.childNodes[i]);
}
}
}
getTextNodes(node);
return textNodes;
}
var textNodes = getTextNodesIn( $("#container")[0], false );
var i = textNodes.length;
var node;
while (i--) {
node = textNodes[i];
node.textContent = node.textContent.replace(/Hi/g, "Hello");
}
Note that this will also match words where "Hi" is only part of the word, e.g. "Hill". To match the whole word only, use /\bHi\b/g
here you go => http://jsfiddle.net/c3w6X/1/
var children='';
$('#container').children().each(function(){
$(this).html($(this).html().replace(/Hi/g,"Hello")); //change the text of the children
children=children+$(this)[0].outerHTML; //copy the changed child
});
var theText=$('#container').clone().children().remove().end().text(); //get the text outside of the child in the root of the element
$('#container').html(''); //empty the container
$('#container').append(children+theText.replace(/Hi/g,"Hello")); //add the changed text of the root and the changed children to the already emptied element

Smart text replacing with jQuery

I need to replace some part of text, e.g. mustache var {{myvar}}, on already loaded page.
Example html:
<html>
<head>
<title>{{MYTITLE}}</title>
</head>
<body>
<p><strong><ul><li>text {{TEXT}}</li></ul></strong></p>
{{ANOTHER}}
</body>
</html>
What's the problem? Use $(html).html(myrenderscript($(html).html()))!
It's ugly, slow and brokes <script> tags.
What do you want?
I want to get closest tag with {{}} and than render and replace.
Your researches?
Firstly, i tried: $('html :contains("{{")). But it returns <title>, <p>, <strong> .... But i need <title> and <li>.
Than i tried to filter them:
$('html :contains("{{")').filter(function (i) {
return $(this).find(':contains("{{")').length === 0
});
...but it WONT return {{ANOTHER}}. And that is my dead end. Your suggestions?
Using http://benalman.com/projects/jquery-replacetext-plugin/ you could do the following:
$('html *').replaceText(/{{([^}]+)}}/, function(fullMatch, key) {
return key;
}, true);
See http://jsfiddle.net/4nvNy/
If all you want to do is replace that text - then surely the following works (or have I mis-understood)
usage is as follows: CONTAINER (body) - replaceTExt (search term (I have built the function to always include {{}} around the term), (replace - this will remove the {{}} as well)
$('body').replaceText("MYTITLE","WHATEVER YOU WANT IT REPLACING WITH");
$.fn.replaceText = function(search, replace, text_only) {
return this.each(function(){
var v1, v2, rem = [];
$(this).find("*").andSelf().contents().each(function(){
if(this.nodeType === 3) {
v1 = this.nodeValue;
v2 = v1.replace("{{" + search + "}}", replace );
if(v1!=v2) {
if(!text_only && /<.*>/.test(v2)) {
$(this).before( v2 );
rem.push(this);
}
else this.nodeValue = v2;
}
}
});
if(rem.length) $(rem).remove();
});
};
You could avoid jQuery altogether if you wanted to with something like this:
<body>
<p><strong>
<ul>
<li>text {{TEXT}}</li>
</ul>
</strong></p>
{{ANOTHER}}
<hr/>
<div id="showResult"></div>
<script>
var body = document.getElementsByTagName('body')[0].innerHTML;
var startIdx = 0, endIdx = 0, replaceArray = [];
var scriptPos = body.indexOf('<script');
while (startIdx != 1) {
startIdx = body.indexOf('{{', endIdx) + 2;
if(startIdx > scriptPos){
break;
}
endIdx = body.indexOf('}}', startIdx);
var keyText = body.substring(startIdx, endIdx);
replaceArray.push({"keyText": keyText, 'startIdx': startIdx, 'endIdx': endIdx});
}
document.getElementById("showResult").innerHTML = JSON.stringify(replaceArray);
</script>
</body>
You can then do what you want with the replaceArray.

<textarea> what key was pressed with javascript

I would like to ask somebody how i can determine what key was pressed in a textarea....
need to write a little javascript code.. a user type in a textarea and i need to write it in a while he writing so the keydown, keypress event handle this functionality, also need to change the text color if a user typed a "watched" word (or the word what he wrote contains the "watched" word/words ) in the textarea.. any idea how i can handle it ??
till now did the text is appear in the <div>, but with this i have a problem.. can't check if the text is in the "watched"... the document.getElementById('IDOFTHETEXTAREATAG'); on keypress is not really works because i got back the whole text inside of the textarea.....
So how i can do it ? any ideas ??? "(Pref. in Mozilla FireFox)
Well, if you were using jQuery, you could do this given that the id of your textarea was 'ta':
$('#ta').keypress(function (evt) {
var $myTextArea = $(this); // encapsulates the textarea in the jQuery object
var fullText = $myTextArea.val(); // here is the full text of the textarea
if (/* do your matching on the full text here */) {
$myTextArea.css('color', 'red'); // changes the textarea font color to red
}
};
I suggest you use the 'onkeyup' event.
$( element ).keyup( function( evt ) {
var keyPressed = evt.keyCode;
//...
});
I have this made like this (plain JS, no JQuery):
function keyDown(e) {
var evt=(e)?e:(window.event)?window.event:null;
if(evt){
if (window.event.srcElement.tagName != 'TEXTAREA') {
var key=(evt.charCode)?evt.charCode: ((evt.keyCode)?evt.keyCode:((evt.which)?evt.which:0));
}
}
}
document.onkeydown=keyDown;
This script is in head tag. I am catching this in all textarea tags. Modify it for your purpose.
2 textareas.
In the first textarea I need to write the words or chars what you want to "watch" in the typing text.
In the second textarea I need to type text, so when I type text, under the textarea need to write what is in the textarea (real time) and highlight the whole word if contains the watched words or chars.
For example:
watched: text locker p
text: lockerroom (need to highlite the whole word because it contains the locker word) or apple (contains the p)
who I can do if a word not start with watched word/char to highlite the whole word?
JavaScript:
var text;
var value;
var myArray;
var found = new Boolean(false);
function getWatchedWords()
{
myArray = new Array();
text = document.getElementById('watched');
value = text.value;
myArray = value.split(" ");
for (var i = 0;i < myArray.length; i++)
{
document.getElementById('writewatched').innerHTML += myArray[i] + "<newline>";
}
}
function checkTypeing()
{
var text2 = document.getElementById('typeing');
var value2 = text2.value;
var last = new Array();
last = value2.split(" ");
if (last[last.length-1] == "")
{
if(found)
{
document.getElementById('writetyped').innerHTML += "</span>";
document.getElementById('writetyped').innerHTML += " ";
}
else
document.getElementById('writetyped').innerHTML += " ";
}
else
check(last[last.length-1]);
}
function check(string)
{
for (var i = 0; i < myArray.length; i++)
{
var occur = string.match(myArray[i]);
if(occur != null && occur.length > 0)
{
if (!found)
{
found = true;
document.getElementById('writetyped').innerHTML += "<span style='color: blue;'>";
}
else
{
found = true;
}
}
else
{
}
}
if(found)
{
document.getElementById('writetyped').innerHTML += string;
}
else
{
document.getElementById('writetyped').innerHTML += string;
}
}
HTML:
<html>
<head>
<title>TextEditor</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<script src='script.js' type='text/javascript'></script>
</head>
<body>
<div>
<p>Watched words:</p>
<textarea id="watched" onblur=getWatchedWords();>
</textarea>
</div>
<div id="writewatched">
</div>
<div>
<p>Text:</p>
<textarea id="typeing" onkeyup=checkTypeing();>
</textarea>
</div>
<div id="writetyped">
</div>
</body>
</html>

Categories

Resources