JavaScript add spaces between sentences while parsing XML string - javascript

I have this XML string which I am displaying as a text in a document:
<p>The new strain of <s alias="coronavirus">COVID</s>seems to be having a greater spread rate.</p>
The following function returns the text form of the XML:
function stripHtml(html) {
// Create a new div element
var temporalDivElement = document.createElement("div");
// Set the HTML content with the providen
temporalDivElement.innerHTML = html;
// Retrieve the text property of the element (cross-browser support)
return temporalDivElement.textContent || temporalDivElement.innerText || "";
}
The problem is, this function returns the following string:
The new strain of COVIDseems to be having a greater spread rate.
which is nearly what I want, but there is no space between the word COVID and seems. Is it possible that I can add a space between contents of two tags if it doesn't exist?

One option is to iterate over text nodes and insert spaces at the beginning if they don't exist, something like:
const getTextNodes = (parent) => {
var walker = document.createTreeWalker(
parent,
NodeFilter.SHOW_TEXT,
null,
false
);
var node;
var textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node);
}
return textNodes;
}
function stripHtml(html) {
// Create a new div element
var temporalDivElement = document.createElement("div");
// Set the HTML content with the providen
temporalDivElement.innerHTML = html;
// Retrieve the text property of the element (cross-browser support)
for (const node of getTextNodes(temporalDivElement)) {
node.nodeValue = node.nodeValue.replace(/^(?!\s)/, ' ');
}
return temporalDivElement.textContent.replace(/ +/g, ' ').trim();
}
console.log(stripHtml(`<p>The new strain of <s alias="coronavirus">COVID</s>seems to be having a greater spread rate.</p>`));

Related

Highlight matched text instead of the whole the text

I have a function, getTextNodes, that searches text nodes recursively. Then I use a addHighlight function to highlight the text with <mark> tags:
const buttonEl = `<button>
<span>
Icon
</span>
Text
</button>
`;
document.body.innerHTML = buttonEl;
const foundButtonEl = document.querySelector("button");
const elements = [];
elements.push(foundButtonEl);
addHighlight(elements, "T");
function addHighlight(elements, text) {
elements.forEach((element, index) => {
const textNodes = getTextNodes(document.body);
const matchingNode = textNodes.find(node => node.textContent.includes(text));
const markElement = document.createElement('mark');
markElement.innerHTML = matchingNode.textContent;
matchingNode.replaceWith(markElement);
});
}
function getTextNodes(node) {
let textNodes = [];
if (node.nodeType === Node.TEXT_NODE) {
textNodes.push(node);
}
node.childNodes.forEach(childNode => {
textNodes.push(...getTextNodes(childNode));
});
return textNodes;
}
The problem is that addHighlight is highlighing the whole text (in the example, Text), instead of the matched text (in the example, T).
How to change this code so that only the matched text is highlighted (text)?
matchingNode is the whole node so you're replacing everything. If you want to match just part of it, you need to iterate though the textnode and find the index position of the substring that you're searching for.
Start by splitting the node into an array
matchingNode.wholeText.split("")
Then find the index position, insert markElement at that position, and go from there.
The problem is that the node you match is the element of which the innerContent contains the string you want to highlight.
What you should do instead of :
markElement.innerHTML = matchingNode.textContent;
matchingNode.replaceWith(markElement);
is probably something like
markElement.innerHTML = text;
matchingNode.replaceTextWithHTML(text, markElement);
replaceTextWithHTML is a fictive function :)

Check the content in script tag

I have added span tag for all comma using jquery to adding class for css. unfortunately It adds span tag inside script and get collapsed.I want to check the content is not in script tag and add span tag.I want to replace all comma (,) except the content in script tag.
if ($("#overview").length > 0) {
$("#overview").html( $("#overview").html().replace(/,/g,"<span class='comma'>,</span>"));
}
You need to be careful because the html attributes can also contain text with ','.
One way to solve this problem is iterate by the TextNodes, and for each one split the value in multiples TextNodes separated by a SpanNode.
// get all text nodes excluding the script
function getTextNodes(el) {
if (el.nodeType == 3) { // nodeType 3 is a TextNode
return el.parentElement && el.parentElement.nodeName == "SCRIPT" ? [] : [el];
}
return Array.from(el.childNodes)
.reduce((acc, item) => acc.concat(getTextNodes(item)), []);
}
// this will replace the TextNode with the necessary Span and Texts nodes
function replaceComma(textNode) {
const parent = textNode.parentElement;
const subTexts = textNode.textContent.split(','); // get all the subtexts separated by ,
// for each item in subtext it will insert a new TextNode with a SpanNode
// (iterate from end to beginning to use the insertBefore function)
textNode.textContent = subTexts[subTexts.length - 1];
let currentNode = textNode;
for(var i = subTexts.length - 2; i>= 0 ; i --) {
const spanEl = createCommaEl();
parent.insertBefore(spanEl, currentNode);
currentNode = document.createTextNode(subTexts[i]);
parent.insertBefore(currentNode, spanEl)
}
}
// create the html node: <span class="comma">,</span>
// you can do this more easy with JQUERY
function createCommaEl() {
const spanEl = document.createElement('span');
spanEl.setAttribute('class', 'comma');
spanEl.textContent = ',';
return spanEl;
}
// then, if you want to replace all comma text from the element with id 'myId'
// you can do
getTextNodes(document.getElementById('myId'))
.forEach(replaceComma);

How to change all texts in DOM without breaking existing HTML?

I want to replace specific texts on a page with javascript. For simplicty lets say I want to replace all letters A with the letter X. Important is that it's not going to break inline HTML.
Is there a simple way to iterate over all DOM elements and only change actual texts?
<span>hello world abcd..</span>
should become
<span>hello world xbcd..</span>
and not
<spxn>hello world <x href="/">xbcd</x>..</spxn>
Iterate over all text nodes, and change their nodeValue if they contain an a:
function getAllTextNodes() {
var walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);
var node;
var textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node);
}
return textNodes;
}
getAllTextNodes().forEach((node) => {
const { nodeValue } = node;
const newValue = nodeValue.replace(/a/g, 'x');
if (newValue !== nodeValue) {
node.nodeValue = newValue;
}
});
abcd
You can also create a whitelist or blacklist of parents whose text nodes are changeable, if you want:
function getAllTextNodes() {
var walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);
var node;
var textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node);
}
return textNodes;
}
const tagNamesToKeepUnchanged = ['SCRIPT'];
getAllTextNodes().forEach((node) => {
if (tagNamesToKeepUnchanged.includes(node.parentNode.tagName)) {
return;
}
const { nodeValue } = node;
const newValue = nodeValue.replace(/a/g, 'x');
if (newValue !== nodeValue) {
node.nodeValue = newValue;
}
});
const obj = JSON.parse(
document.querySelector('script[type="application/json"]').textContent
);
console.log(obj.key);
abcd
<p>foo bar</p>
<script type="application/json">{"key":"value"}</script>
This will preserve tag names, event listeners, and pretty much everything except the content of certain text nodes.
I usually use this:
/**
* Executes operation over all text nodes in a document
* #param {HTMLElement} element
* #param {function(Text):void} callback
*/
function processTextNodes(element, callback) {
// For text node, execute callback
if (element.nodeType == Node.TEXT_NODE)
callback(element);
// Otherwise, loop over child nodes
else if (element.childNodes.length > 0) {
for (const childNode of element.childNodes) {
if (childNode.nodeType == Node.TEXT_NODE)
callback(childNode);
// Recursion to child nodes
else {
processTextNodes(childNode, callback);
}
}
}
}
For example try this:
processTextNodes(document.body, (el)=>{el.data = el.data.toUpperCase()})
I used this in several userscripts that replace words in news articles to make them more fun.
The crawler by #CertainPerformance made JSfiddle.net crash for me.
I also need to replace the text node with an html element, so I had to move away from text nodes and I settled for more modest solution with an extensive regex lookaround to ensure html tags, properties and values are (mostly) not edited.
var list = ["crabe", "eau", "voir", "nom", "de", "des", "le", "les"];
var colorThoseWords = function(arr) {
words = arr.join('|');
// Regex lookareound: https://regular-expressions.info/lookaround.html
// Regex `negative lookbehind` and `negative lookahead`
// Run it: https://regex101.com/r/NZ5LQZ/1
var s = `(?<![<=#"'\`:;,./({[-])\\b(${words})\\b(?![>=#"'\`:)\]}-])`,
r = new RegExp(s, 'gi');
console.log({r});
$("p,li,h2,h3,a").each(function() {
var text = $(this).html();
$(this).html(text.replace(r, "<i class='signit-colored'>$1</i>"));
});
};
var uncolorWords = function() {
$(".signit-colored").each(function() {
var $text = $(this).text();
$(this).replaceWith($text);
});
};
colorThoseWords(list);
// uncolorWords();
See https://jsfiddle.net/x7f24qnv/13/
I suspect #Tomáš_Zato_-_Reinstate_Monica's transverse solution may be best if we edit the TEXT_NODE's parent.

How can I get all text node in document fragment?

I get the user selected text:
var selection = window.getSelection();
var selectRange = selection.getRangeAt(0);
var content = selectRange.cloneContents(); // DocumentFragment
How can I get the textNode in the DocumentFragment content?
use textContent
var selection = window.getSelection();
var selectRange = selection.getRangeAt(0);
var content = selectRange.cloneContents(); // DocumentFragment
var text = content.textContent;
Filter fragment.childNodes to get the text nodes:
const selection = window.getSelection();
const selectRange = selection.getRangeAt(0);
const fragment = selectRange.cloneContents(); // DocumentFragment
// Get the child nodes and filter them to only include text nodes
const textNodes = Array.from(fragment.childNodes).filter(child => child.nodeName === "#text");
Combining some tricks it is easy to extract the text nodes out of any container node (in this case a fragment). The fragment part of the question is irrelevant to the extraction part.
Getting all the children of the container, converting them to a "real" Array using the spread operator ... so filter could be used to. Can also skip this part because HTMLCollection does support forEach so it's possible to fill an empty Array within that.
Note that Node.TEXT_NODE is a DOM constant for 3
// create a demo fragment with some HTML mix of text nodes & elements
var frag = document.createRange().createContextualFragment("<a>1</a> 2 <b>3</b> 4.");
// now the work begins: get only the text nodes from the fragment
var textNodes = [...frag.childNodes].filter(node => node.nodeType == Node.TEXT_NODE)
// print the text nodes as an array
console.log( textNodes.map(node => node.textContent) )

Applying google app script code in selection in google docs

I have the following code that puts bold style some keywords in a whole google document:
function boldKeywords() {
// Words that will be put in bold:
var keywords = ["end", "proc", "fun"];
var document = DocumentApp.getActiveDocument();
var body = document.getBody();
var Style = {};
Style[DocumentApp.Attribute.BOLD] = true;
for (j in keywords) {
var found = body.findText(keywords[j]);
while(found != null) {
var foundText = found.getElement().asText();
var start = found.getStartOffset();
var end = found.getEndOffsetInclusive();
foundText.setAttributes(start, end, Style)
found = body.findText(keywords[j], found);
}
}
}
But I would like the code to put the keywords in bold only in the selected area of the document, for doing that, I tried using the function getSelection(), but I have the problem that this function returns a Range, but for applying findText I need a Body, somebody knows what could I do?
Modified Script
function boldKeywordsInSelection() {
const keywords = ["end", "proc", "fun"];
const document = DocumentApp.getActiveDocument();
const selection = document.getSelection();
// get a list of all the different range elements
const rangeElements = selection.getRangeElements();
const Style = {};
Style[DocumentApp.Attribute.BOLD] = true;
// forEach used here because for in was giving me trouble...
rangeElements.forEach(rangeElement => {
// Each range element has a corresponding element (e.g. paragraph)
const parentElement = rangeElement.getElement();
// fixing the limits of the bold operations depending on the selection
const startLimit = rangeElement.getStartOffset();
const endLimit = rangeElement.getEndOffsetInclusive();
for (j in keywords) {
let found = parentElement.findText(keywords[j]);
// wrapping in try catch to escape the for loop from within the while loop
try {
while (found != null) {
const foundText = found.getElement().asText();
const start = found.getStartOffset();
// Checking if the start of the word is after the start of the selection
if (start < startLimit) {
// If so, then skip to next word
found = parentElement.findText(keywords[j], found);
continue;
}
// Checking if the start of the word is after the end of the selection
// if so, go to next element
if (start > endLimit) throw "out of selection";
const end = found.getEndOffsetInclusive();
foundText.setAttributes(start, end, Style)
found = parentElement.findText(keywords[j], found);
}
} catch (e) {
Logger.log(e)
continue;
}
}
})
}
See the comments in the code for more details.
A getSelection produces a Range object, which contains within it various instances of RangeElement. Each RangeElement makes reference to a parent element, and the index positions within that parent. The parent is the element that the range element is a part of. For example:
This selection spans 3 elements. Therefore the selection has 3 range elements. You can only use the findText method on the whole element, not the range element.
This means that the flow of the script is generally the same, except that you need to go through each element and find the text within each element. Since this will return elements that are outside the selection, you need to keep track of the index positions of the selection and the found element and make sure the found element is within the selection.
References
Range
RangeElement
getSelection()

Categories

Resources