Google apps script - retain links when copying footnote content - javascript

Background
I have a Google Apps Script that we use to parse the footnote content, wrapped in double parenthesis, in place of the footnote number superscript. The intended result should be:
Before Script
This is my footie index.1 1This is my
footie content with a link and emphasis.
After Script
This is my footie index. (( This is my footie content with a
link and emphasis.)
Problem
Everything works fine, except when I parse the footnotes in double parenthesis, they are losing all the links and formatting:
This is my footie index. (( This is my footie content with a
link and emphasis.)
If anyone can assist me with fixing the code below I would really appreciate the help :)
SOLUTION:
function convertFootNotes () {
var doc = DocumentApp.getActiveDocument()
var copy = generateCopy(doc) // make a copy to avoid damaging the original
var openCopy = doc; //DocumentApp.openById(copy.getId()) // you have to use the App API to copy, but the Doc API to manipulate
performConversion(openCopy); // perform formatting on the copy
}
function performConversion (docu) {
var footnotes = docu.getFootnotes(); // get the footnotes
footnotes.forEach(function (note) {
// Traverse the child elements to get to the `Text` object
// and make a deep copy
var paragraph = note.getParent(); // get the paragraph
var noteIndex = paragraph.getChildIndex(note); // get the footnote's "child index"
insertFootnote(note.getFootnoteContents(),true, paragraph, noteIndex);
note.removeFromParent();
})
}
function insertFootnote(note, recurse, paragraph, noteIndex){
var numC = note.getNumChildren(); //find the # of children
paragraph.insertText(noteIndex," ((");
noteIndex++;
for (var i=0; i<numC; i++){
var C = note.getChild(i).getChild(0).copy();
if (i==0){
var temp = C.getText();
var char1 = temp[0];
var char2 = temp[1];
if (C.getText()[0]==" "){
C = C.deleteText(0,0);
}
}
if (i>0){
paragraph.insertText(noteIndex,"\n");
noteIndex++;
}
paragraph.insertText(noteIndex,C);
noteIndex++;
} //end of looping through children
paragraph.insertText(noteIndex,"))");
}
function generateCopy (doc) {
var name = doc.getName() + ' #PARSED_COPY' // rename copy for easy visibility in Drive
var id = doc.getId()
return DriveApp.getFileById(id).makeCopy(name)
}

Were there any changes to the code other than the added )) to make it not work? Removing the (( & )) still did not have the formatting applied when testing it; getText() returns the element contents as a String, not a rich text object/element which contains the formatting info.
To get to the Text object:
getFootnoteContents().getChild(0) returns the FootnoteSection Paragraph
getChild(0).getChild(0) returns the Text object of that paragraph
copy() returns a detached deep copy of the text object to work with
Note: If there are other child elements in the FootnoteSection or in it's Paragraph child, you'll want to add some kind of type/index checking to get the correct one. However, with basic footnotes - as the above example - this is the correct path.
function performConversion (docu) {
var footnotes = docu.getFootnotes() // get the footnotes
var noteText = footnotes.map(function (note) {
// Traverse the child elements to get to the `Text` object
// and make a deep copy
var note_text_obj = note.getFootnoteContents().getChild(0).getChild(0).copy();
// Add the `((` & `))` to the start and end of the text object
note_text_obj.insertText(0, " ((");
note_text_obj.appendText(")) ");
return note_text_obj // reformat text with parens and save in array
})
...
}

Related

Insert a HTML link next to every word that matches in a sentence for multiple words

I am trying to do something that I thought would be simple but been stuck on this for a while I want to find all instances of a word in a paragraph and insert a link next to it.
I dont want to use innerHTML and destroy the events. I also dont want to use jQuery ideally pure js.
I am looking to take this paragraph.
<p>red this is a sentence I want to change red and I want to change it for all instances the word red</p>
Find all index positions of the word red and change it too.
<p>red Some link this is a sentence I want to change red Some link and I want to change it for all instances the word red Some link</p>
So find every instance of the word red grab the index and then insert html not sure it can even be done the way I am doing it, it always only inserts it one time.
I have this so far.
var ps = document.querySelectorAll("p");
[].forEach.call(ps, function(p) {
const indexes = [...p.innerText.matchAll(new RegExp("red", "gi"))].map(
(a) => a.index
);
var link = document.createElement("a");
link.href = "";
link.innerHTML = `Changed`;
indexes.forEach((pos) => {
insertAtStringPos(p, pos, link);
})
});
function insertAtStringPos(el, pos, insertable) {
if (!el.children.length) {
var text = el.outerText;
var beginning = document.createTextNode(text.substr(0, pos));
var end = document.createTextNode(text.substr(pos - text.length));
while (el.hasChildNodes()) {
el.removeChild(el.firstChild);
}
el.appendChild(beginning);
el.appendChild(insertable);
el.appendChild(end);
}
}
I grabbed the insertAtStringPos function from another stackoverflow post.
I have an example here: https://jsbin.com/watopeteki/edit?html,js,console,output
Why do it always only insert once?
It can be an easier, you need just a split text and by a keyword insert a link.
function links() {
const ps = document.querySelectorAll('p');
return Array.from(ps).reduce((acc, p) => {
const links = p.querySelectorAll('a');
const isUpdate = Boolean(links?.length);
const text = p.innerHTML;
let index = 0;
const splitted = text.split(/(red)/gi);
splitted.forEach((txt) => {
const el = document.createTextNode(txt);
acc.appendChild(el);
if (txt === 'red') {
let link;
if (isUpdate) {
link = links[index++];
link.href = '';
link.innerHTML = `Changed after update`;
} else {
link = document.createElement('a');
link.href = '';
link.innerHTML = `Changed`;
acc.appendChild(link);
}
}
});
return acc;
}, document.createElement('p'));
}
const element = links(); // creates links
document.body.appendChild(element);
links(); // updates current links
If I understood correct, you need a function which updates your existing links. I have update stackblitz and example, check this out.
Stackblitz
I see a few problems with your code.
Your insertAtStringPos() function mutates the paragraph element contents, invalidating the remaining indexes in the indexes array. Reversing the indexes array before looping, and inserting from the end toward the beginning of the text, overcomes this problem.
You're passing the link element to the insertAtStringPos(). This same element gets inserted then moved with each subsequent insertion. Passing a cloned link element with each indexes iteration solves this problem.
outerText, in var text = el.outerText;, returns undefined in my version of Firefox (78.15, October 5, 2021).
To search for a word in one or more paragraphs, and insert a link node after that string, loop over each paragraph, search for occurrences of the string, and build an index array. Then loop the index array but first reverse it to start insertion at the end of the string. Also copy the link node before passing to the insertion function, otherwise the link node will simply be moved from one position to the next.
const ps = document.querySelectorAll("p");
const word = "red";
const link = document.createElement("a");
link.href = "";
link.innerHTML = "Changed";
[].forEach.call(ps, function(p) {
const indexes = [...p.innerText.matchAll(new RegExp(word, "gi"))].map(
// add word length to position, making sure position is not beyond end of text
(a) => (p.innerText.length > a.index + word.length)
? a.index + word.length // add word length to position
: p.innerText.length // word is at end of text
);
// execute insertion function for each position
// first reverse index array to start at the end of the text and work towards the beginning
indexes.reverse().forEach((pos) => {
// clone node before passing to insertion, otherwise same node simply gets moved
insertAtStringPos(p, pos, link.cloneNode(true)); // <-- clone node
})
});
function insertAtStringPos(el, pos, insertable) {
const text = el.childNodes[0].textContent;
const beginning = document.createTextNode(text.substr(0, pos) + " "); // text before and including word, plus a space
const end = document.createTextNode(
// if position is at end of text, create empty text node
(text.length > pos)
? " " + text.substr(pos - text.length) // a space, and text after word
: "" // empty text node
);
el.removeChild(el.childNodes[0]);
el.insertBefore(end, el.childNodes[0]);
el.insertBefore(insertable, el.childNodes[0]);
el.insertBefore(beginning, el.childNodes[0]);
}
<p>red this is a sentence I want to change red and I want to change it for all instances the word red</p>

Google Appscript: Output of parsing function for a Table of Contents results in an array of objects not in the correct format. Help appreciated

I am currently trying to parse the url and text from a table of contents on a google doc and write them into a table on google sheets.
So far I have been successful in getting the text and the url using the following code modified from the top answer in this post (How to use .findElement(DocumentApp.ElementType.TABLE_OF_CONTENTS) to get and parse a Document's Table of Contents Element)
function parseTOC( docId ) {
var contents = [];
var doc = DocumentApp.openById(docId);
// Define the search parameters.
var searchElement = doc.getBody();
var searchType = DocumentApp.ElementType.TABLE_OF_CONTENTS;
// Search for TOC. Assume there's only one.
var searchResult = searchElement.findElement(searchType);
if (searchResult) {
// TOC was found
var toc = searchResult.getElement().asTableOfContents();
// Parse all entries in TOC. The TOC contains child Paragraph elements,
// and each of those has a child Text element. The attributes of both
// the Paragraph and Text combine to make the TOC item functional.
var numChildren = toc.getNumChildren();
for (var i=0; i < numChildren; i++) {
var itemInfo = {}
var tocItem = toc.getChild(i).asParagraph();
var tocItemAttrs = tocItem.getAttributes();
var tocItemText = tocItem.getChild(0).asText();
// Set itemInfo attributes for this TOC item, first from Paragraph
itemInfo.text = tocItem.getText(); // Displayed text
// ... then from child Text
itemInfo.linkUrl= tocItemText.getLinkUrl(); // URL Link in document
contents.push(itemInfo);
}
}
// Return array of objects containing TOC info
return contents;
}
function test_parseTOC() {
var fileId = '--Doc-ID--';
var array = parseTOC(DocumentApp.getActiveDocument().getId() );
Logger.log(array)
}
The function test_parseTOC returns the following array:
[{linkUrl=#heading=h.nyq88bov1u8x, text=Google}, {text=Help, linkUrl=#heading=h.9lthewlyeqjd}]
Although the information is correct, the "=" is giving me trouble. When working with an array of objects I would expect it to be:
[{linkUrl: "#heading=h.nyq88bov1u8x", text: "Google"}, {text: "Help", linkUrl: "#heading:h.9lthewlyeqjd"}]
What I am trying to emulate can be outlined by this post here (Google Apps Script: how to copy array of objects to range?)
If I manually change the "=" to ":" and add quotation marks then the array works fine. Is there something with the original function I can change to produce a ":" instead of a "="? Or is there a way to modify the array of objects after the fact to replace the "=" with ":"? I could easily use concat to automatically add the quotation marks, but the "=" is out of my wheelhouse.
Thank you for any input on this
Update: It actually turns out that this code was working appropriately and I was receiving an error from a different function that was using the output of this one (i.e. I wrote .getSheets(0) not .getSheets()[0]. I will leave up the original post in case anyone needs a working example of how to extract out links from a TOC and along with the resources to write it to a google sheet

Use loop and find html element's values JavaScript

I want to use vanilla js to loop through a string of html text and get its values. with jQuery I can do something like this
var str1="<div><h2>This is a heading1</h2><h2>This is a heading2</h2></div>";
$.each($(str1).find('h2'), function(index, value) {
/// console.log($(value).text());
});
using $(str) converts it to an html string as I understand it and we can then use .text() to get an element (h2)'s value.
but I want to do this within my node app on the backend rather than on the client side, because it'd be more efficient (?) and also it'd just be nice to not rely on jQuery.
Some context, I'm working on a blogging app. I want a table of contents created into an object server side.
This is another way using .innerHTML but uses the built-in iterable protocol
Here's the operations we'll need, the types they have, and a link to the documentation of that function
Create an HTML element from a text
String -> HTMLElement – provided by set Element#innerHTML
Get the text contents of an HTML element
HTMLElement -> String – provided by get Element#innerHTML
Find nodes matching a query selector
(HTMLElement, String) -> NodeList – provided by Element#querySelectorAll
Transform a list of nodes to a list of text
(NodeList, HTMLElement -> String) -> [String] – provided by Array.from
// html2elem :: String -> HTMLElement
const html2elem = html =>
{
const elem = document.createElement ('div')
elem.innerHTML = html
return elem.childNodes[0]
}
// findText :: (String, String) -> [String]
const findText = (html, selector) =>
Array.from (html2elem(html).querySelectorAll(selector), e => e.textContent)
// str :: String
const str =
"<div><h1>MAIN HEADING</h1><h2>This is a heading1</h2><h2>This is a heading2</h2></div>";
console.log (findText (str, 'h2'))
// [
// "This is a heading1",
// "This is a heading2"
// ]
// :: [String]
console.log (findText (str, 'h1'))
// [
// "MAIN HEADING"
// ]
// :: [String]
The best way to parse HTML is to use the DOM. But, if all you have is a string of HTML, according to this Stackoverflow member) you may create a "dummy" DOM element to which you'd add the string to be able to manipulate the DOM, as follows:
var el = document.createElement( 'html' );
el.innerHTML = "<html><head><title>aTitle</title></head>
<body><div><h2>This is a heading1</h2><h2>This is a heading2</h2></div>
</body</html>";
Now you have a couple of ways to access the data using the DOM, as follows:
var el = document.createElement( 'html' );
el.innerHTML = "<html><head><title>aTitle</title></head><body><div><h2>This is a heading1</h2><h2>This is a heading2</h2></div></body</html>";
// one way
el.g = el.getElementsByTagName;
var h2s = el.g("h2");
for(var i = 0, max = h2s.length; i < max; i++){
console.log(h2s[i].textContent);
if (i == max -1) console.log("\n");
}
// and another
var elementList = el.querySelectorAll("h2");
for (i = 0, max = elementList.length; i < max; i++) {
console.log(elementList[i].textContent);
}
You may also use a regular expression, as follows:
var str = '<div><h2>This is a heading1</h2><h2>This is a heading2</h2></div>';
var re = /<h2>([^<]*?)<\/h2>/g;
var match;
var m = [];
var i=0;
while ( match = re.exec(str) ) {
m.push(match.pop());
}
console.log(m);
The regex consists of an opening H2 tag followed by not a "<",followed by a closing H2 tag. The "*?" take into account zero or multiple instances of which there is at least zero or one instance.
Per Ryan of Stackoverflow:
exec with a global regular expression is meant to be used in a loop,
as it will still retrieve all matched subexpressions.
The critical part of the regex is the "g" flag as per MDN. It allows the exec() method to obtain multiple matches in a given string. In each loop iteration, match becomes an array containing one element. As each element is popped off and pushed onto m, the array m ultimately contains all the captured text values.

Assigning javascript array elements class or id for css styling

I'm trying to assign class and id to items in an array I created in js and input into my html. I'm doing this so I can style them in my stylesheet. Each item will not be styled the same way.
I'm a beginner so trying to keep it to code I can understand and make it as clean as possible, i.e. not making each of these items an element in the html.
This part works fine:
var pool =['A','B','3','J','R','1','Q','F','5','T','0','K','N','C','R','U']
var letters = pool.join('');
document.getElementById('key').innerHTML = letters;
This part not so much:
var char1 = letters[1];
char1.classList.add('hoverRed');
There is a similar question here that didn't work for me, it just showed [object][object][object] when I ran it.
Your code attempts to apply a style to an array element, but CSS only applies to HTML. If you wish to style one character in a string, that character must be wrapped in an HTML element (a <span> is the best choice for wrapping an inline value).
This code shows how to accomplish this:
var pool =['A','B','3','J','R','1','Q','F','5','T','0','K','N','C','R','U']
var letters = pool.join('');
// Replace a specific character with the same character, but wrapped in a <span>
// so it can be styled
letters = letters.replace(letters[1], "<span>" + letters[1] + "</span>");
// Insert the letters string into the div
var theDiv = document.getElementById('key');
// Inject the string into the div
theDiv.innerHTML = letters;
// Get a reference to the span:
var theSpan = theDiv.querySelector("span");
// Add the style to the <span> that wraps the character, not the character itself
theSpan.classList.add('hoverRed');
.hoverRed {
color:red;
}
<div id="key"></div>
And, this snippet shows how you could apply CSS to any letter:
var pool =['A','B','3','J','R','1','Q','F','5','T','0','K','N','C','R','U'];
// Leave the original array alone so that it can be manipulated any way needed
// in the future, but create a new array that wraps each array element within
// a <span>. This can be accomplished in several ways, but the map() array method
// is the most straight-forward.
var charSpanArray = pool.map(function(char){
return "<span>" + char + "</span>";
});
// Decide which character(s) need CSS applied to them. This data can come from anywhere
// Here, we'll just say that the 2nd and 5th ones should.
// Loop through the new array and on the 2nd and 5th elements, apply the CSS class
charSpanArray.forEach(function(element, index, array){
// Check for the particular array elements in question
if(index === 1 || index === 4){
// Update those strings to include the CSS
array[index] = element.replace("<span>","<span class='hoverRed'>");
}
});
// Now, turn the new array into a string
var letters = charSpanArray.join('');
// For diagnostics, print the string to the console just to see what we've got
console.log(letters);
// Get a reference to the div container
var theDiv = document.getElementById('key');
// Inject the string into the div
theDiv.innerHTML = letters;
.hoverRed {
color:red;
}
<div id="key"></div>
You're on the right track, but missed one key thing.
In your example, pool contains characters. When you combine them using join, you get a string. Setting that string as the innerHTML of an element doesn't give the string super powers, it's still just a string.
In order to get a classList, you need to change your letters into elements and work with them.
I've included an es6 example (and a working plunker) of how to get the functionality you want below.
let pool = ['A','B','3','J','R','1','Q','F','5','T','0','K','N','C','R','U']
const letterToElement = function(char) {
//Create the element
let e = document.createElement("SPAN");
//Create the text node
let t = document.createTextNode(char);
//Put the text node on the element
e.appendChild(t);
//Add the class name you want
e.className += "hoverRed";
return e;
};
//create your elements from your pool and append them to the "key" element
window.onload = function() {
let container = document.getElementById("key");
pool.map(l => letterToElement(l))
.forEach(e => container.appendChild(e));
}
https://plnkr.co/edit/mBhA60aUCEGSs0t0MDGu

Highlight phone number and wrap with tag javascript

The following code checks if the selected tag has childnodes. If a child node is present , it loops till a child node is found. When there are no further child nodes found, it loops out i.e it reaches a text node causing the loop to end. The function is made recursive to run until no child node is found. The code runs as per above info, but when I try to match TEXT_NODE (console.log() outputs all text node), replace() is used to identify phone numbers using regex and replaced with hyperlink. The number gets detected and is enclosed with a hyperlink but it gets displayed twice i.e. number enclosed with hyperlink and only the number.Following is the code
function DOMwalker(obj){
var regex = /\+\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g;
var y = "$&";
if(obj.hasChildNodes()){
var child = obj.firstChild;
while(child){
if(child.nodeType!==3)
{
DOMwalker(child);
}
if (child.nodeType=== 3) {
var text = child.nodeValue;
console.log(typeof text);
var regs = regex.exec(text);
match = text.replace(regex,y);
if(match){
var item = document.createElement('a');
item.setAttribute('href','javascript:void(0);');
var detect = document.createTextNode(match);
var x=item.appendChild(detect);
console.log(x);
child.parentNode.insertBefore(x,child);
}
}
child=child.nextSibling;
}
}
};
$(window).load(function(){
var tag = document.querySelector(".gcdMainDiv div.contentDiv");
DOMwalker(tag);
});
Following are the screenshot of the output:
Here the number gets printed twice instead of one with hyperlink which is been displayed(expected highlighted number with hyperlink) and second widout tags
Following is console.log of x
I have already gone through this.
The solution provided below works well with FF. The problem arises when used in IE11. It throws Unknown runtime error and references the .innerHTML. I used the appenChild(),but the error couldn't be resolved.
You've got a couple of problems with what you posted. First, if a child is not node type 3 and not a SCRIPT node, you re-call recursivetree() but you do not pass the child in. The function will just start over at the first div element and again, infinitely loop.
Second, you're calling replace() on the node itself, and not the node's innerHTML. You're trying to replace a node with a string, which just won't work, and I think you mean to replace any matching numbers within that node, rather than the entire node.
If you have <div>My number is +111-555-9999</div>, you only want to replace the number and not lose everything else.
Try this as a solution:
function recursivetree(obj){
var regex = /\+\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g;
var y = "$&";
var obj = obj || document.getElementsByTagName('div')[0];
if(obj.hasChildNodes()){
var child = obj.firstChild;
while(child){
if(child.nodeType !== 3 && child.nodeName !== 'SCRIPT'){
//Recall recursivetree with the child
recursivetree(child);
}
//A nodeType of 3, text nodes, sometimes do not have innerHTML to replace
//Check if the child has innerHTML and replace with the regex
if (child.innerHTML !== undefined) {
child.innerHTML = child.innerHTML.replace(regex,y);
}
child=child.nextSibling;
}
}
}
recursivetree();
Fiddle: http://jsfiddle.net/q07n5mz7/
Honestly? If you're trying to loop through the entire page and replace all instances of numbers, just do a replace on the body.
var regex = /\+\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g;
var y = "$&";
var body = document.body;
body.innerHTML = body.innerHTML.replace(regex, y);
Fiddle: http://jsfiddle.net/hmdv7adu/
Finally, I got the solution of my question. I referred to this answer which helped me to solve my query.
Here goes the code:
function DOMwalker(obj){
if(obj.hasChildNodes()){
var child = obj.firstChild;
var children = obj.childNodes;
var length = children.length;
for(var i = 0;i<length;i++){
var nodes = children[i];
if(nodes.nodeType !==3){
DOMwalker(nodes);
}
if(nodes.nodeType===3){
//Pass the parameters nodes:current node being traversed;obj:selector being passed as parameter for DOMwalker function
highlight(nodes,obj);
}
}
child = child.nextSibling;
}
}
function highlight(node,parent){
var regex =/(\d{1}-\d{1,4}-\d{1,5})|(\+\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9})/g;
//Stores the value of current node which is passed through the if loop
var matchs = node.data.match(regex);
if matchs is true,add it to DOM
if(matchs){
var anchor = document.createElement("a");
var y = /[(]\d[)]|[.-\s]/g;//removes spaces periods or dash,also number within brackets
var remove = number.replace(y,'');
//tel uri,if you have an app like skype for click-to dial
anchor.setAttribute("href","tel:"+remove);
//the anchor tag should be inserted before in the current node in the DOM
parent.insertBefore(anchor,node);
//append it toh the DOM to be displaye don the web page
anchor.appendChild(node);
}
else
{
return false;
}
}
I hope this code helps others.

Categories

Resources