How to replace overlapping strings in Javascript without destroying the HTML structure - javascript

I have a string and an array of N items:
<div>
sometimes the fox can fly really high
</div>
const arr = ['the fox can', 'fox can fly', 'really high']`
I want to find a way to replace the text inside the div with HTML to highlight those specific phrases inside the array without breaking the HTML. This can be problematic because I can't do a simple loop and replace because then other words will not match after a replacement because the highlight span would break something like indexOf or includes on the innerHTML, sure I can use innerText to read the text but it doesn't provide anything that makes it so I can add the "next" span without breaking the original HTML highlights. Ideally, I also want to be able to customize the class name depending on the word I use rather than just a generic highlight class too.
The outcome should be
<div>
sometimes
<span class="highlight-1">the <span class="highlight-2">fox can</span></span><span class="highlight-2"> fly</span> <span class="highlight-3">really high</span>
</div>
What have I tried?
I've really thought about this and cannot find any resources online that help with this scenario and the main, Currently, I also need extra values such as charStart and charEnd of the word, I don't like this solution because it depends on using the DOMParser() API and it feels really hacky, definitely isn't performant and I just get a "vibe" that I shouldn't be doing this method and there must be better solutions, I am reaching out to SO for ideas on how I can accomplish this challenge.
let text = `<p id="content">${content}</p>`
let parser = new DOMParser().parseFromString(text, "text/html")
for (const str of strings) {
const content = parser.querySelector("#content")
let descLength = 0
for (const node of content.childNodes) {
const text = node.textContent
let newTextContent = ""
for (const letter in text) {
let newText = text[letter]
if (descLength === str.charStart) {
newText = `<em class="highlight ${str.type}" data-id="${str.id}">${text[letter]}`
} else if (descLength === str.charEnd) {
newText = `${text[letter]}</em>`
}
newTextContent += newText
descLength++
}
node.textContent = newTextContent
}
// Replace the < with `<` and replace > with `>` to construct the HTML as text inside lastHtml
const lastHtml = parser
.querySelector("#content")
.outerHTML.split("<")
.join("<")
.split(">")
.join(">")
// Redefine the parser variable with the updated HTML and let it automatically correct the element structure
parser = new DOMParser().parseFromString(lastHtml, "text/html")
/**
* Replace the placeholder `<em>` element with the span elements to prevent future issues. We need the HTML
* to be invalid for it to be correctly fixed by DOMParser, otherwise the HTML would be valid and *not* render how we'd like it to
* Invalid => `<span>test <em>title </span>here</em>
* Invalid (converted) => `<span>test <em>title </em></span><em>here</em>
* Valid => `<span>test <span>title </span>here</span>
*/
parser.querySelector("#content").innerHTML = parser
.querySelector("#content")
.innerHTML.replaceAll("<em ", "<span ")
.replaceAll("</em>", "</span>")
}

I'll go over your example just to give an idea. Below code is not a clean function, please adjust it according to your needs.
const str = "sometimes the fox can fly really high";
const arr = ['the fox can', 'fox can fly', 'really high'];
// First, find the indices of start and end positions for your substrings.
// Call them event points and push them to an array.
eventPoints = [];
arr.forEach((a, i) => {
let index = strLower.indexOf(a)
while (index !== -1) {
let tagClass = `highlight-${i}`
eventPoints.push({ pos: index, className: tagClass, eventType: "start" })
eventPoints.push({ pos: index + a.length, className: tagClass, eventType: "end" })
index = strLower.indexOf(a, index + 1)
}
return
});
// Sort the event points based on the position properties
eventPoints.sort((a, b) => a.pos < b.pos ? -1 : a.pos > b.pos ? 1 : 0);
// Init the final string, a stack and an index to keep track of the current position on the full string
let result = "";
let stack = [];
let index = 0;
// Loop over eventPoints
eventPoints.forEach(e => {
// concat the substring between index and e.pos to the result
result += str.substring(index, e.pos);
if (e.eventType === "start") {
// when there is a start event, open a span
result += `<span class="${e.className}">`;
// keep track of which span is opened
stack.push(e.className);
}
else {
// when there is an end event, close tags opened after this one, keep track of them, reopen them afterwards
let tmpStack = [];
while (stack.length > 0) {
result += "</span>";
let top = stack.pop();
if (top === e.className) {
break;
}
tmpStack.push(top);
}
while (tmpStack.length > 0) {
let tmp = tmpStack.pop();
result += `<span class="${tmp}">`;
stack.push(tmp);
}
}
index = e.pos;
});
result += str.substring(index, str.length)
console.log(result);

Related

How to remove the first index of an array based on input value?

I apologize in advance if i'm not conveying things properly.
I'm trying to take an array, split the strings into characters, and remove the first index if it matches my input value, and continue linearly.
I'm a little lost on how I should be thinking about solving this problem.
So far I have gotten to here,
showCurrentValue = (event) => {
const value = event.target.value;
document.getElementById("textV").innerText = value;
let arrIndex = newArr[0].split('');
for (let i = 0; i < arrIndex.length; i++) {
if (value === arrIndex[0]) {
console.log("Match");
arrIndex.shift()
} else {
console.log("Err")
}
}
}
With or without the loop it still behaves the same, it DOES remove the first index, but fails to continue, and logs "Err" for all the rest of the characters in the string. It won't match the next character. In my head i'm thinking if I just target the 0 index, and the array will update as each character is removed(?).
Hope someone can shine some light on this, Thanks!!
Basically, i'm trying to build one of those Typing speed test applications.
So I have my array of randomized words rendered to the DOM with an input field below it.
If, the input value matches the first character, I want to manipulate it based on if true or false. Change it's color, remove it from the DOM, etc.
So, my issue currently is getting the 2nd character of my input to compare to the next current character index.
Maybe my whole approach is wrong(?)
The first time you hit a situation where your value !== arrIndex[0], you keep viewing the same element (arrIndex[0]) and not iterating to the next element.
#wlh's comment is pretty close to the solution I think you're looking for:
const filtered = arrIndex.filter(char => char !== value);
filtered will then have all the elements of arrIndex but remove any and all elements that matched value.
This is just one possible solution. You continue moving thru arrIndex with out changing its length, checking each case, and adding the cases which don't match your value to a new array. Once you have finished moving thru the arrIndex then you assign arrIndex to the updatedIndex;
showCurrentValue = (event) => {
const value = event.target.value;
let updatedIndex = [];
const arrIndex = newArr[0].split('');
for (let i = 0; i < arrIndex.length; i++) {
if (value !== arrIndex[i]) {
console.log("Err");
updatedIndex.push(arrIndex[i]);
} else {
console.log("Match")
}
}
arrIndex = updatedIndex.join('');
}
Another solution could be
showCurrentValue = (event) => {
const value = event.target.value;
let arrIndex = newArr[0].split('').filter((char) => char !== value);
return arrIndex;
}
.filter will return a new array - it will loop thru each char and return ones which don't match your value

Javascript .indexOf matching HTML symbols

I solved a problem in a very sloppy manner, looking for a better solution.
Task
Check if a span contains an HTML symbol(an arrow), if present, replace the symbol with it's opposite.
First Attempt (not showing the second code snipet that does the reverse)
first=headers[0].querySelector('span')
if (first.innerHTML.indexOf('&searr;') !== -1)
first.innerHTML=first.innerHTML.substring(0, first.innerHTML.length - 13)
first.innerHTML+=' &nwarr;'
Why doesn't that work?
because while ===
but for some reason &searr; === &searr;
Seriously! Try this (tested in IE 11 and chrome)
<span>test</span>
<script>
first=document.querySelector('span')
first.innerHTML+='&nbsp&searr;'
console.log(first.innerHTML)
</script>
In the console you get: test ↘
My ugly solution
I copied the arrow into my JS code
if (first.innerHTML.indexOf('↘') !== -1)
first.innerHTML=first.innerHTML.substring(0, first.innerHTML.length - 7)
first.innerHTML+=' &nwarr;'
So...what's the correct way of solving this issue?
If you're worried about having that literal arrow in your code, you shouldn't be. If your toolchain is correctly configured, it's just fine to have that literal arrow in your code.
But if you want to avoid it, you can use a unicode escape for it instead:
const nwarr = "\u2196";
if (first.innerHTML.indexOf(nwarr) !== -1) {
first.innerHTML = first.innerHTML.substring(0, first.innerHTML.length - 7);
}
first.innerHTML += ' &nwarr;'; // <== Did you mean &searr; here? The opposite of nwarr?
Live example (using &searr; in the replacement:
const first = document.getElementById("first");
setTimeout(() => {
const nwarr = "\u2196";
if (first.innerHTML.indexOf(nwarr) !== -1) {
first.innerHTML = first.innerHTML.substring(0, first.innerHTML.length - 7);
}
first.innerHTML += ' &searr;';
}, 800);
<span id="first">Foo &nwarr;</span>
Two side notes:
1. To replace things, I'd use replace rather than substring and +=, not least so you don't have magic numbers (13, 7) in your code:
first.innerHTML = first.innerHTML.replace(/\ \u2196/g, " &searr;");
you might even allow for some browsers to use an character entity rather than character for it:
first.innerHTML = first.innerHTML.replace(/\ (?:\&nwarr;|\u2196)/g, " &searr;");
Live example (using &searr; in the replacement:
const first = document.getElementById("first");
setTimeout(() => {
first.innerHTML = first.innerHTML.replace(/\ (?:\&nwarr;|\u2196)/g, " &searr;");
}, 800);
<span id="first">Foo &nwarr;</span>
2. I would avoid textual manipulation of innerHTML. If you assign to it, even when assigning back what it already had, it tears down all of the elements (and other nodes) within the element you're doing it on, and creates new, replacement elements and nodes.
For instance, you could spin through the text nodes:
const first = document.getElementById("first");
setTimeout(() => {
handleReplacement(first);
}, 800);
function handleReplacement(el) {
for (let child = el.firstChild; child; child = child.nextSibling) {
if (child.nodeType === 1) { // Element
handleReplacement(child);
} else if (child.nodeType === 3) { // Text
child.nodeValue = child.nodeValue.replace(/\u00A0\u2196/g, "\u00A0\u2198");
}
}
}
<span id="first">Foo &nwarr;</span>
Based on T.J. Crowder's very good insights. I patched my test code into this:
<span>test &nwarr;</span>
<script>
const nw="\u2196";
const se="\u2198";
first=document.querySelector('span')
if (first.innerHTML.indexOf(nw) !== -1)
{
console.log("found arrow");
first.innerHTML = first.innerHTML.replace(/\u2196/, se);
}
</script>
It still feels like we've stumbled into a messy/buggy corner of the browser world.

How to replace multiple keywords in a string with a component

To start off, I'm primarily an AngularJS developer and recently switched to React, and I decided to convert an angular webapp I had previously developed to a react app. Im having a bit of an issue with a component ExpressiveText that searches through a string for a match to a property on a list objects and inserts a component TriggerModal in its place that when clicked triggers a modal with more detailed information. So the properties passed into ExpressiveTest are: text, tags, and tagsProperty.
text is a string (i.e. "My search string")
tags is an array of objects (i.e. [{id: 1, name: 'my', data: {...}}, {id: 2, name: 'string', data: {...}}]
tagsProperty is the name of the property to search for as a "tag" (i.e. name)
I followed along with this issue to try and formulate an idea of how to approach this. The reason I mention that I am coming from angular is because the component I had previously created simply used something like text.replace(regex, match => <trigger-modal data={tags[i]} />) and then used angulars $compile function to render components in the text. This does not seem to be possible using react. This is what I have tried inside of my ExpressiveText component:
class ExpressiveTextComponent extends React.Component {
constructor (props) {
super(props);
this.filterText = this.filterText.bind(this);
}
filterText () {
let text = this.props.text;
this.props.tags.map(tag => {
const regex = new RegExp(`(${tag[this.props.tagsProperty]})`, 'gi');
let temp = text.split(regex);
for(let i = 1; i < temp.length; i+=2){
temp[i] = <TriggerModal data={tag} label={tag[this.props.tagsProperty]} />;
}
text = temp;
});
return text;
}
render () {
return (
<div className={this.props.className}>{this.filterText()}</div>
);
}
}
This works for the first tag. The issue with it is that once it goes to map on the second tag, text is then an array. I tried adding in a conditional to check if text is an array, but then the issue becomes that the text array becomes nested and doesnt work on the next iteration. Im having a really hard time wrapping my mind around how to handle this. I have also tried dangerouslySetInnerHTML using text.replace(...) but that doesn't work either and just renders [object Object] in place of the component. Any help or advice is much appreciated, I have to say this is probably the only major issue I have come across since my switch to React, otherwise its been very straightforward.
Edit: Since I had a question asking for expected output with a given input and more clarification, what I am looking for is a component that is given this input:
<ExpressiveText text="my text" tags={{id: 1, name: 'text'}} tagsProperty="name" />
would render
<div>my <TriggerModal label="text" data={...} /></div>
with a functional TriggerModal component.
If I am correct in my understanding of what you're trying to accomplish, this is one way to do this it. My apologies if I misunderstood your question. Also, this is pseudocode and I'll try and fill it in with real code in a bit. Sorry if this is difficult to understand, let me know and I will try to clarify
filterText () {
let text = [this.props.text];
for (let item in this.props.tags) {
//item will be something like {id: 1, name: 'text'}
let searchString = new RegExp(item.name, 'gi');
//loop through text array and see if any item matches search string regex.
while (text.some(val => val.test(searchString)) {
//if we are here, at least one item matches the regexp
//loop thru text array, and split any string by searchString, and insert <TriggerModal> in their place
for (let i = text.length-1; i >=0; i--) {
//if text[i] is string and it matches regexp, then replace with nothing
text[i].replace(searchString, "")
//insert <trigger modal>
text.splice(i, 0, <TriggerModal ... />)
}
//end of while loop - test again to see if search string still exists in test array
}
}
return text;
}
Looks like I found a solution.
filterText () {
let text = this.props.text.split(' '),
replaceIndexes = [];
if(this.props.tags.length > 0) {
this.props.tags.map(tag => {
const regex = new RegExp('(' + tag[this.props.tagsProperty] + ')', 'gi');
for(let i = 0; i < text.length; i++){
if(text[i].match(regex)){
/**
* Pretty simple if its a one-word tag, search for the word and replace.
* could potentially cause some mis-matched tags but the words
* in my usecase are pretty specific, unlikely to be used in
* normal dialogue.
*/
text[i] = <TriggerModal data={tag} label={tag[this.props.tagsLabelProperty || 'name']} />;
}else{
// for tags with spaces, split them up.
let tempTag = tag[this.props.tagsProperty].split(' ');
// check for length
if(tempTag.length > 1) {
// we will be replacing at least 1 item in the array
let replaceCount = 0,
startIndex = null;
// If the first word of tempTag matches the current index, loop through the rest of the tempTag and check to see if the next words in the text array match
if(tempTag[0].toLowerCase() === text[i].toLowerCase()){
startIndex = i;
replaceCount += 1;
// loop through temp array
for (let j = 0; j < tempTag.length; j++) {
if(tempTag[j].toLowerCase() === text[i+j].toLowerCase()){
replaceCount += 1;
}
}
// Push data into replaceIndexes array to process later to prevent errors with adjusting the indexes of the text object while looping
replaceIndexes.push({
startIndex: startIndex,
replaceCount: replaceCount,
element: <TriggerModal data={tag} label={tag[this.props.tagsLabelProperty || 'name']} />
});
}
}
}
}
});
}
// Loop through each replace index object
replaceIndexes.forEach((rep, index) => {
text.splice(rep.startIndex - index, rep.replaceCount, [rep.element, ', ']);
});
// Since we stripped out spaces, we need to put them back in the places that need them.
return text.map(item => {
if(typeof item === "string"){
return item + ' ';
}
return item;
});
}
Edit: This is actually pretty buggy. I ended up ditching my own solution in favor of this package

How to ignore any element in Javascript? [duplicate]

<div class="title">
I am text node
<a class="edit">Edit</a>
</div>
I wish to get the "I am text node", do not wish to remove the "edit" tag, and need a cross browser solution.
var text = $(".title").contents().filter(function() {
return this.nodeType == Node.TEXT_NODE;
}).text();
This gets the contents of the selected element, and applies a filter function to it. The filter function returns only text nodes (i.e. those nodes with nodeType == Node.TEXT_NODE).
You can get the nodeValue of the first childNode using
$('.title')[0].childNodes[0].nodeValue
http://jsfiddle.net/TU4FB/
Another native JS solution that can be useful for "complex" or deeply nested elements is to use NodeIterator. Put NodeFilter.SHOW_TEXT as the second argument ("whatToShow"), and iterate over just the text node children of the element.
var root = document.querySelector('p'),
iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT),
textnode;
// print all text nodes
while (textnode = iter.nextNode()) {
console.log(textnode.textContent)
}
<p>
<br>some text<br>123
</p>
You can also use TreeWalker. The difference between the two is that NodeIterator is a simple linear iterator, while TreeWalker allows you to navigate via siblings and ancestors as well.
ES6 version that return the first #text node content
const extract = (node) => {
const text = [...node.childNodes].find(child => child.nodeType === Node.TEXT_NODE);
return text && text.textContent.trim();
}
If you mean get the value of the first text node in the element, this code will work:
var oDiv = document.getElementById("MyDiv");
var firstText = "";
for (var i = 0; i < oDiv.childNodes.length; i++) {
var curNode = oDiv.childNodes[i];
if (curNode.nodeName === "#text") {
firstText = curNode.nodeValue;
break;
}
}
You can see this in action here: http://jsfiddle.net/ZkjZJ/
Pure JavaScript: Minimalist
First off, always keep this in mind when looking for text in the DOM.
MDN - Whitespace in the DOM
This issue will make you pay attention to the structure of your XML / HTML.
In this pure JavaScript example, I account for the possibility of multiple text nodes that could be interleaved with other kinds of nodes. However, initially, I do not pass judgment on whitespace, leaving that filtering task to other code.
In this version, I pass a NodeList in from the calling / client code.
/**
* Gets strings from text nodes. Minimalist. Non-robust. Pre-test loop version.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #param nodeList The child nodes of a Node, as in node.childNodes.
* #param target A positive whole number >= 1
* #return String The text you targeted.
*/
function getText(nodeList, target)
{
var trueTarget = target - 1,
length = nodeList.length; // Because you may have many child nodes.
for (var i = 0; i < length; i++) {
if ((nodeList[i].nodeType === Node.TEXT_NODE) && (i === trueTarget)) {
return nodeList[i].nodeValue; // Done! No need to keep going.
}
}
return null;
}
Of course, by testing node.hasChildNodes() first, there would be no need to use a pre-test for loop.
/**
* Gets strings from text nodes. Minimalist. Non-robust. Post-test loop version.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #param nodeList The child nodes of a Node, as in node.childNodes.
* #param target A positive whole number >= 1
* #return String The text you targeted.
*/
function getText(nodeList, target)
{
var trueTarget = target - 1,
length = nodeList.length,
i = 0;
do {
if ((nodeList[i].nodeType === Node.TEXT_NODE) && (i === trueTarget)) {
return nodeList[i].nodeValue; // Done! No need to keep going.
}
i++;
} while (i < length);
return null;
}
Pure JavaScript: Robust
Here the function getTextById() uses two helper functions: getStringsFromChildren() and filterWhitespaceLines().
getStringsFromChildren()
/**
* Collects strings from child text nodes.
* Generic, cross platform solution. No string filtering or conditioning.
*
* #author Anthony Rutledge
* #version 7.0
* #param parentNode An instance of the Node interface, such as an Element. object.
* #return Array of strings, or null.
* #throws TypeError if the parentNode is not a Node object.
*/
function getStringsFromChildren(parentNode)
{
var strings = [],
nodeList,
length,
i = 0;
if (!parentNode instanceof Node) {
throw new TypeError("The parentNode parameter expects an instance of a Node.");
}
if (!parentNode.hasChildNodes()) {
return null; // We are done. Node may resemble <element></element>
}
nodeList = parentNode.childNodes;
length = nodeList.length;
do {
if ((nodeList[i].nodeType === Node.TEXT_NODE)) {
strings.push(nodeList[i].nodeValue);
}
i++;
} while (i < length);
if (strings.length > 0) {
return strings;
}
return null;
}
filterWhitespaceLines()
/**
* Filters an array of strings to remove whitespace lines.
* Generic, cross platform solution.
*
* #author Anthony Rutledge
* #version 6.0
* #param textArray a String associated with the id attribute of an Element.
* #return Array of strings that are not lines of whitespace, or null.
* #throws TypeError if the textArray param is not of type Array.
*/
function filterWhitespaceLines(textArray)
{
var filteredArray = [],
whitespaceLine = /(?:^\s+$)/; // Non-capturing Regular Expression.
if (!textArray instanceof Array) {
throw new TypeError("The textArray parameter expects an instance of a Array.");
}
for (var i = 0; i < textArray.length; i++) {
if (!whitespaceLine.test(textArray[i])) { // If it is not a line of whitespace.
filteredArray.push(textArray[i].trim()); // Trimming here is fine.
}
}
if (filteredArray.length > 0) {
return filteredArray ; // Leave selecting and joining strings for a specific implementation.
}
return null; // No text to return.
}
getTextById()
/**
* Gets strings from text nodes. Robust.
* Generic, cross platform solution.
*
* #author Anthony Rutledge
* #version 6.0
* #param id A String associated with the id property of an Element.
* #return Array of strings, or null.
* #throws TypeError if the id param is not of type String.
* #throws TypeError if the id param cannot be used to find a node by id.
*/
function getTextById(id)
{
var textArray = null; // The hopeful output.
var idDatatype = typeof id; // Only used in an TypeError message.
var node; // The parent node being examined.
try {
if (idDatatype !== "string") {
throw new TypeError("The id argument must be of type String! Got " + idDatatype);
}
node = document.getElementById(id);
if (node === null) {
throw new TypeError("No element found with the id: " + id);
}
textArray = getStringsFromChildren(node);
if (textArray === null) {
return null; // No text nodes found. Example: <element></element>
}
textArray = filterWhitespaceLines(textArray);
if (textArray.length > 0) {
return textArray; // Leave selecting and joining strings for a specific implementation.
}
} catch (e) {
console.log(e.message);
}
return null; // No text to return.
}
Next, the return value (Array, or null) is sent to the client code where it should be handled. Hopefully, the array should have string elements of real text, not lines of whitespace.
Empty strings ("") are not returned because you need a text node to properly indicate the presence of valid text. Returning ("") may give the false impression that a text node exists, leading someone to assume that they can alter the text by changing the value of .nodeValue. This is false, because a text node does not exist in the case of an empty string.
Example 1:
<p id="bio"></p> <!-- There is no text node here. Return null. -->
Example 2:
<p id="bio">
</p> <!-- There are at least two text nodes ("\n"), here. -->
The problem comes in when you want to make your HTML easy to read by spacing it out. Now, even though there is no human readable valid text, there are still text nodes with newline ("\n") characters in their .nodeValue properties.
Humans see examples one and two as functionally equivalent--empty elements waiting to be filled. The DOM is different than human reasoning. This is why the getStringsFromChildren() function must determine if text nodes exist and gather the .nodeValue values into an array.
for (var i = 0; i < length; i++) {
if (nodeList[i].nodeType === Node.TEXT_NODE) {
textNodes.push(nodeList[i].nodeValue);
}
}
In example two, two text nodes do exist and getStringFromChildren() will return the .nodeValue of both of them ("\n"). However, filterWhitespaceLines() uses a regular expression to filter out lines of pure whitespace characters.
Is returning null instead of newline ("\n") characters a form of lying to the client / calling code? In human terms, no. In DOM terms, yes. However, the issue here is getting text, not editing it. There is no human text to return to the calling code.
One can never know how many newline characters might appear in someone's HTML. Creating a counter that looks for the "second" newline character is unreliable. It might not exist.
Of course, further down the line, the issue of editing text in an empty <p></p> element with extra whitespace (example 2) might mean destroying (maybe, skipping) all but one text node between a paragraph's tags to ensure the element contains precisely what it is supposed to display.
Regardless, except for cases where you are doing something extraordinary, you will need a way to determine which text node's .nodeValue property has the true, human readable text that you want to edit. filterWhitespaceLines gets us half way there.
var whitespaceLine = /(?:^\s+$)/; // Non-capturing Regular Expression.
for (var i = 0; i < filteredTextArray.length; i++) {
if (!whitespaceLine.test(textArray[i])) { // If it is not a line of whitespace.
filteredTextArray.push(textArray[i].trim()); // Trimming here is fine.
}
}
At this point you may have output that looks like this:
["Dealing with text nodes is fun.", "Some people just use jQuery."]
There is no guarantee that these two strings are adjacent to each other in the DOM, so joining them with .join() might make an unnatural composite. Instead, in the code that calls getTextById(), you need to chose which string you want to work with.
Test the output.
try {
var strings = getTextById("bio");
if (strings === null) {
// Do something.
} else if (strings.length === 1) {
// Do something with strings[0]
} else { // Could be another else if
// Do something. It all depends on the context.
}
} catch (e) {
console.log(e.message);
}
One could add .trim() inside of getStringsFromChildren() to get rid of leading and trailing whitespace (or to turn a bunch of spaces into a zero length string (""), but how can you know a priori what every application may need to have happen to the text (string) once it is found? You don't, so leave that to a specific implementation, and let getStringsFromChildren() be generic.
There may be times when this level of specificity (the target and such) is not required. That is great. Use a simple solution in those cases. However, a generalized algorithm enables you to accommodate simple and complex situations.
.text() - for jquery
$('.title').clone() //clone the element
.children() //select all the children
.remove() //remove all the children
.end() //again go back to selected element
.text(); //get the text of element
This will ignore the whitespace as well so, your never got the Blank textNodes..code using core Javascript.
var oDiv = document.getElementById("MyDiv");
var firstText = "";
for (var i = 0; i < oDiv.childNodes.length; i++) {
var curNode = oDiv.childNodes[i];
whitespace = /^\s*$/;
if (curNode.nodeName === "#text" && !(whitespace.test(curNode.nodeValue))) {
firstText = curNode.nodeValue;
break;
}
}
Check it on jsfiddle : - http://jsfiddle.net/webx/ZhLep/
Simply via Vanilla JavaScript:
const el = document.querySelector('.title');
const text = el.firstChild.textContent.trim();
You can also use XPath's text() node test to get the text nodes only. For example
var target = document.querySelector('div.title');
var iter = document.evaluate('text()', target, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
var node;
var want = '';
while (node = iter.iterateNext()) {
want += node.data;
}
There are some overcomplicated solutions here but the operation is as straightforward as using .childNodes to get children of all node types and .filter to extract e.nodeType === Node.TEXT_NODEs. Optionally, we may want to do it recursively and/or ignore "empty" text nodes (all whitespace).
These examples convert the nodes to their text content for display purposes, but this is technically a separate step from filtering.
const immediateTextNodes = el =>
[...el.childNodes].filter(e => e.nodeType === Node.TEXT_NODE);
const immediateNonEmptyTextNodes = el =>
[...el.childNodes].filter(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
);
const firstImmediateTextNode = el =>
[...el.childNodes].find(e => e.nodeType === Node.TEXT_NODE);
const firstImmediateNonEmptyTextNode = el =>
[...el.childNodes].find(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
);
// example usage:
const text = el => el.textContent;
const p = document.querySelector("p");
console.log(immediateTextNodes(p).map(text));
console.log(immediateNonEmptyTextNodes(p).map(text));
console.log(text(firstImmediateTextNode(p)));
console.log(text(firstImmediateNonEmptyTextNode(p)));
// if you want to trim whitespace:
console.log(immediateNonEmptyTextNodes(p).map(e => text(e).trim()));
<p>
<span>IGNORE</span>
<b>IGNORE</b>
foo
<br>
bar
</p>
Recursive alternative to a NodeIterator:
const deepTextNodes = el => [...el.childNodes].flatMap(e =>
e.nodeType === Node.TEXT_NODE ? e : deepTextNodes(e)
);
const deepNonEmptyTextNodes = el =>
[...el.childNodes].flatMap(e =>
e.nodeType === Node.TEXT_NODE && e.textContent.trim()
? e : deepNonEmptyTextNodes(e)
);
// example usage:
const text = el => el.textContent;
const p = document.querySelector("p");
console.log(deepTextNodes(p).map(text));
console.log(deepNonEmptyTextNodes(p).map(text));
<p>
foo
<span>bar</span>
baz
<span><b>quux</b></span>
</p>
Finally, feel free to join the text node array into a string if you wish using .join(""). But as with trimming and text content extraction, I'd probably not bake this into the core filtering function and leave it to the caller to handle as needed.

Replace text in the middle of a TextNode with an element

I want to insert html tags within a text node with TreeWalker, but TreeWalker forces my html brackets into & lt; & gt; no matter what I've tried. Here is the code:
var text;
var tree = document.createTreeWalker(document.body,NodeFilter.SHOW_TEXT);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
tree.currentNode.nodeValue = text;
}
Using \< or " instead of ' won't help. My workaround is to copy all of the DOM tree to a string and to replace the html body with that. It works on very simple webpages and solves my first problem, but is a bad hack and won't work on anything more than a trivial page. I was wondering if I could just work straight with the text node rather than use a workaround. Here is the code for the (currently buggy) workaround:
var text;
var newHTML = "";
var tree = document.createTreeWalker(document.body);
while (tree.nextNode()) {
text = tree.currentNode.nodeValue;
if (tree.currentNode.nodeType == 3){
text = text.replace(/(\W)(\w+)/g, '$1<element onmouseover="sendWord(\'$2\')">$2</element>');
text = text.replace(/^(\w+)/, '<element onmouseover="sendWord(\'$1\')">$1</element>');
}
newHTML += text
}
document.body.innerHTML = newHTML;
Edit: I realize a better workaround would be to custom tag the text nodes ((Customtag_Start_Here) etc.), copy the whole DOM to a string, and use my customs tags to identify text nodes and modify them that way. But if I don't have to, I'd rather not.
To 'change' a text node into an element, you must replace it with an element. For example:
var text = tree.currentNode;
var el = document.createElement('foo');
el.setAttribute('bar','yes');
text.parentNode.replaceChild( el, text );
If you want to retain part of the text node, and inject an element "in the middle", you need to create another text node and insert it and the element into the tree at the appropriate places in the tree.
Edit: Here's a function that might be super useful to you. :)
Given a text node, it runs a regex on the text values. For each hit that it finds it calls a custom function that you supply. If that function returns a string, then the match is replaced. However, if that function returns an object like:
{ name:"element", attrs{onmouseover:"sendWord('foo')"}, content:"foo" }
then it will split the text node around the match and inject an element in that location. You can also return an array of strings or those objects (and can recursively use arrays, strings, or objects as the content property).
Demo: http://jsfiddle.net/DpqGH/8/
function textNodeReplace(node,regex,handler) {
var mom=node.parentNode, nxt=node.nextSibling,
doc=node.ownerDocument, hits;
if (regex.global) {
while(node && (hits=regex.exec(node.nodeValue))){
regex.lastIndex = 0;
node=handleResult( node, hits, handler.apply(this,hits) );
}
} else if (hits=regex.exec(node.nodeValue))
handleResult( node, hits, handler.apply(this,hits) );
function handleResult(node,hits,results){
var orig = node.nodeValue;
node.nodeValue = orig.slice(0,hits.index);
[].concat(create(mom,results)).forEach(function(n){
mom.insertBefore(n,nxt);
});
var rest = orig.slice(hits.index+hits[0].length);
return rest && mom.insertBefore(doc.createTextNode(rest),nxt);
}
function create(el,o){
if (o.map) return o.map(function(v){ return create(el,v) });
else if (typeof o==='object') {
var e = doc.createElementNS(o.namespaceURI || el.namespaceURI,o.name);
if (o.attrs) for (var a in o.attrs) e.setAttribute(a,o.attrs[a]);
if (o.content) [].concat(create(e,o.content)).forEach(e.appendChild,e);
return e;
} else return doc.createTextNode(o+"");
}
}
It's not quite perfectly generic, as it does not support namespaces on attributes. But hopefully it's enough to get you going. :)
You would use it like so:
findAllTextNodes(document.body).forEach(function(textNode){
replaceTextNode( textNode, /\b\w+/g, function(match){
return {
name:'element',
attrs:{onmouseover:"sendWord('"+match[0]+"')"},
content:match[0]
};
});
});
function findAllTextNodes(node){
var walker = node.ownerDocument.createTreeWalker(node,NodeFilter.SHOW_TEXT);
var textNodes = [];
while (walker.nextNode())
if (walker.currentNode.parentNode.tagName!='SCRIPT')
textNodes.push(walker.currentNode);
return textNodes;
}
or if you want something closer to your original regex:
replaceTextNode( textNode, /(^|\W)(\w+)/g, function(match){
return [
match[1], // might be an empty string
{
name:'element',
attrs:{onmouseover:"sendWord('"+match[2]+"')"},
content:match[2]
}
];
});
Function that returns the parent element of any text node including partial match of passed string:
function findElByText(text, mainNode) {
let textEl = null;
const traverseNodes = function (n) {
if (textEl) {
return;
}
for (var nodes = n.childNodes, i = nodes.length; i--;) {
if (textEl) {
break;
}
var n = nodes[i], nodeType = n.nodeType;
// Its a text node, check if it matches string
if (nodeType == 3) {
if (n.textContent.includes(text)) {
textEl = n.parentElement;
break;
}
}
else if (nodeType == 1 || nodeType == 9 || nodeType == 11) {
traverseNodes(n);
}
}
}
traverseNodes(mainNode);
return textEl;
}
Usage:
findElByText('Some string in document', document.body);

Categories

Resources