Converting AJAX'd-in XML to HTML - javascript

I have a web page that allows the user to select a category from a dropdown list. The category relates to a table that is derived from an XML file that is AJAX'd in. I intend to use some JS code to read the contents of the XML file and create a table based on the values therein. (In other words, the XML files could create tables with completely different columns, which is why I don't want to hard code anything, but would rather have it built programatically.) I also don't want to use XSLT or any other methods that would create a second set of files; the XML files used are to be considered the "sources of truth".
I can get an XML file to load, but I'm having difficulty navigating the structure in order to read column names from the first child element.
<xml>
<item>
<number>Number</number>
<image>Image</image>
</item>
<item>
<number>123</number>
<image>123.jpg</image>
</item>
</xml>
I'd like to be able to read the first set of <item> elements and use their text values to generate the <th> elements in the table. And then fill in the table with the rest of the XML content.
I've tried items[0].childNodes[0], but it picks up the line breaks (which I don't want to remove from the XML source files).
I've also tried items[0].children[0], but I get "Unable to get property '0' of undefined or null reference".
Here is my code thus far:
const vPathToJSON = "../JSON/";
const vPathToXML = "../XML/";
var vCategoriesJSON = "categoriesJSON.txt";
// Perform functions after DOM is readyState
$("document").ready(
function() {
getFromFile(vPathToJSON, vCategoriesJSON, "JSON")
.then(fillCategoriesDropdown)
.then(getFromFile(vPathToXML, "acMitigation.xml", "XML"));
}
);
// Use jQuery AJAX "GET" function to import from file and parse as
requested
function getFromFile(pFilePath, pFileName, pParseType) {
var vCompletePath = pFilePath + pFileName;
switch(pParseType) {
case "JSON":
return $.get(vCompletePath)
.then(function(returnedText) {
return JSON.parse(returnedText);
});
case "XML":
$.ajax({
type: "GET",
url: vCompletePath,
dataType: "xml",
success: function(xml) {
console.log("Success!");
fillMainTable(xml);
},
error: function() {
console.log("Unsuccessful!");
}
});
break;
}
}
// Fill Category dropdown dynamically based on returned JSON file values
function fillCategoriesDropdown(pReturnedJSON) {
for (i = 0; i < pReturnedJSON.length; i++) {
var vNewOption = document.createElement("OPTION");
vNewOption.text = pReturnedJSON[i].category;
document.getElementById("id_CategorySelect").appendChild(vNewOption);
aListOfCategories[i] = pReturnedJSON[i];
}
}
function fillMainTable(pXML) {
var items = pXML.getElementsByTagName("item");
// ???
}
Any and all help is appreciated!

I'm not sure if this is the best strategy, but it's working at least.
FYI, I changed the name of the function to more accurately describe what it does.
The main problem I was facing was that it was picking up the empty text nodes, which made navigating the element tree confusing.
function getColumnNames(pXML) {
var entriesElement = pXML.childNodes[0].childNodes[1].nodeName; //finds <item>
var items = pXML.getElementsByTagName(entriesElement); //array of <item> elements
var columns = []; //column elements and names
var j = 0;
// childNodes starts at 1 and goes up by 2 to skip empty text nodes in <item>
for (i = 1; i < items[0].childNodes.length; i += 2) {
var elementName = items[0].childNodes[i].nodeName;
var elementValue = items[0].childNodes[i].textContent;
columns[j] = {
"elementName":elementName,
"elementValue":elementValue};
console.log(columns[j]);
j++;
}
}

Related

Google Appscript: Output of parsing function for a Table of Contents results in an array of objects not in the correct format. Help appreciated

I am currently trying to parse the url and text from a table of contents on a google doc and write them into a table on google sheets.
So far I have been successful in getting the text and the url using the following code modified from the top answer in this post (How to use .findElement(DocumentApp.ElementType.TABLE_OF_CONTENTS) to get and parse a Document's Table of Contents Element)
function parseTOC( docId ) {
var contents = [];
var doc = DocumentApp.openById(docId);
// Define the search parameters.
var searchElement = doc.getBody();
var searchType = DocumentApp.ElementType.TABLE_OF_CONTENTS;
// Search for TOC. Assume there's only one.
var searchResult = searchElement.findElement(searchType);
if (searchResult) {
// TOC was found
var toc = searchResult.getElement().asTableOfContents();
// Parse all entries in TOC. The TOC contains child Paragraph elements,
// and each of those has a child Text element. The attributes of both
// the Paragraph and Text combine to make the TOC item functional.
var numChildren = toc.getNumChildren();
for (var i=0; i < numChildren; i++) {
var itemInfo = {}
var tocItem = toc.getChild(i).asParagraph();
var tocItemAttrs = tocItem.getAttributes();
var tocItemText = tocItem.getChild(0).asText();
// Set itemInfo attributes for this TOC item, first from Paragraph
itemInfo.text = tocItem.getText(); // Displayed text
// ... then from child Text
itemInfo.linkUrl= tocItemText.getLinkUrl(); // URL Link in document
contents.push(itemInfo);
}
}
// Return array of objects containing TOC info
return contents;
}
function test_parseTOC() {
var fileId = '--Doc-ID--';
var array = parseTOC(DocumentApp.getActiveDocument().getId() );
Logger.log(array)
}
The function test_parseTOC returns the following array:
[{linkUrl=#heading=h.nyq88bov1u8x, text=Google}, {text=Help, linkUrl=#heading=h.9lthewlyeqjd}]
Although the information is correct, the "=" is giving me trouble. When working with an array of objects I would expect it to be:
[{linkUrl: "#heading=h.nyq88bov1u8x", text: "Google"}, {text: "Help", linkUrl: "#heading:h.9lthewlyeqjd"}]
What I am trying to emulate can be outlined by this post here (Google Apps Script: how to copy array of objects to range?)
If I manually change the "=" to ":" and add quotation marks then the array works fine. Is there something with the original function I can change to produce a ":" instead of a "="? Or is there a way to modify the array of objects after the fact to replace the "=" with ":"? I could easily use concat to automatically add the quotation marks, but the "=" is out of my wheelhouse.
Thank you for any input on this
Update: It actually turns out that this code was working appropriately and I was receiving an error from a different function that was using the output of this one (i.e. I wrote .getSheets(0) not .getSheets()[0]. I will leave up the original post in case anyone needs a working example of how to extract out links from a TOC and along with the resources to write it to a google sheet

Split multiple JSON string into strucutred table using Google App Script

I am trying to split a data set with an ID and JSON string into a structured table.
The difficult part is I need it to be dynamic, the JSON string varies often and I want headings to be determined by the unique values in the input column at that time. I need the script to be able to create headings if the string changes without needed to recode the script.
We have about 150 different JSON strings we are hoping to use this script on, without recoding it for each one. Each string has lots of data points.
I have a script working but it splits them one by one, need to build something that will do bulk in one go, by looping through all outputs in B and creating a column for each unique field in all the strings, then populating them.
The script works if I paste the additional info straight in, however I am having trouble reading from the sheet
var inputsheet = SpreadsheetApp.getActive().getSheetByName("Input");
var outputsheet = SpreadsheetApp.getActive().getSheetByName("Current Output");
var additionalinfo = inputsheet.getRange(1,1).getValue()
Logger.log(additionalinfo)
var rows = [],
data;
for (i = 0; i < additionalinfo.length; i++) {
for (j in additionalinfo[i]) {
dataq = additionalinfo[i][j];
Logger.log(dataq);
rows.push([j, dataq]);
}
dataRange = outputsheet.getRange(1, 1, rows.length, 2);
dataRange.setValues(rows);
}
}
Here is a link to the sample data. Note that in Sample 1 & 2 there are different headings, we need the script to identify this and create headings for both
https://docs.google.com/spreadsheets/d/1BMiVuAgDbibLw6yUG3IZ9iw4MZTaVVegkw_k3ItQ4mU/edit#gid=0
Try this script that produces dynamic headers based on the json that has been read. It collects all json data, get its keys, and remove the duplicates.
Script:
function JSON_SPLITTER() {
var spreadsheet = SpreadsheetApp.getActive();
var inputsheet = spreadsheet .getSheetByName("Input");
var outputsheet = spreadsheet .getSheetByName("Current Output");
var additionalinfo = inputsheet.getDataRange().getValues();
var keys = [];
// prepare the additionalInfo data to be parsed for later
var data = additionalinfo.slice(1).map(row => {
// collect all keys in an array
if (JSON.parse(row[1]).additionalInfo) {
keys.push(Object.keys(JSON.parse(row[1]).additionalInfo));
return JSON.parse(row[1]).additionalInfo;
}
else {
keys.push(Object.keys(JSON.parse(row[1])));
return JSON.parse(row[1]);
}
});
// unique values of keys, modified to form header
var headers = [...new Set(keys.flat())]
// Add A1 as the header for the ids
headers.unshift(additionalinfo[0][0]);
// set A1 and keys as headers
var output = [headers]
// build output array
additionalinfo.slice(1).forEach((row, index) => {
var outputRow = [];
headers.forEach(column => {
if(column == 'Contract Oid')
outputRow.push(row[0]);
else
outputRow.push(data[index][column]);
});
output.push(outputRow)
});
outputsheet.getRange(1, 1, output.length, output[0].length).setValues(output);
}
Output:
Update:
Modified script for no-additionalInfo key objects.

Grab data from website HTML table and transfer to Google Sheets using App-Script

Ok, I know there are similar questions out there to mine, but so far I have yet to find any answers that work for me. What I am trying to do is gather data from an entire HTML table on the web (https://www.sports-reference.com/cbb/schools/indiana/2022-gamelogs.html) and then parse it/transfer it to a range in my Google Sheet. The code below is probably the closest thing I've found so far because at least it doesn't error out, but it will only find one string or value, not the whole table. I've found other answers where they use xmlservice.parse, however that doesn't work for me, I believe because the HTML format has issues that it can't parse. Does anyone have an idea of how to edit what I have below, or a whole new idea that may work for this website?
function SAMPLE() {
const url="http://www.sports-reference.com/cbb/schools/indiana/2022-gamelogs.html#sgl-basic?"
// Get all the static HTML text of the website
const res = UrlFetchApp.fetch(url, {muteHttpExceptions: true}).getContentText();
// Find the index of the string of the parameter we are searching for
index = res.search("td class");
// create a substring to only get the right number values ignoring all the HTML tags and classes
sub = res.substring(index+92,index+102);
Logger.log(sub);
return sub;
}
I understand that I can use importHTML natively in a Google Sheet, and that's what I'm currently doing. However I am doing this for over 350 webpage tables, and iterating through each one to load it and then copy the value to another sheet. App Script bogs down quite a bit when it is repeatedly waiting on Sheets to load an importHTMl and then grab some data and do it all over again on another url. I apologize for any formatting issues in this post or things I've done wrong, this is my first time posting here.
Edit: ok, I've found a method that works, but it's still much slower than I would like, because it is using Drive API to create a document with the HTML data and then parse and create an array from there. The Drive.Files.Insert line is the most time consuming part. Anyone have an idea of how to make this quicker? It may not seem that slow to you right now, but when I need to do this 350 times, it adds up.
function parseTablesFromHTML() {
var html = UrlFetchApp.fetch("https://www.sports-reference.com/cbb/schools/indiana/2022-gamelogs.html");
var docId = Drive.Files.insert(
{ title: "temporalDocument", mimeType: MimeType.GOOGLE_DOCS },
html.getBlob()
).id;
var tables = DocumentApp.openById(docId)
.getBody()
.getTables();
var res = tables.map(function(table) {
var values = [];
for (var row = 0; row < table.getNumRows(); row++) {
var temp = [];
var cols = table.getRow(row);
for (var col = 0; col < cols.getNumCells(); col++) {
temp.push(cols.getCell(col).getText());
}
values.push(temp);
}
return values;
});
Drive.Files.remove(docId);
var range=SpreadsheetApp.getActive().getSheetByName("Test").getRange(3,6,res[0].length,res[0][0].length);
range.setValues(res[0]);
SpreadsheetApp.flush();
}
Solution by formula
Try
=importhtml(url,"table",1)
Other solution by script
function importTableHTML() {
var url = 'https://www.sports-reference.com/cbb/schools/indiana/2022-gamelogs.html'
var html = '<table' + UrlFetchApp.fetch(url, {muteHttpExceptions: true}).getContentText().replace(/(\r\n|\n|\r|\t| )/gm,"").match(/(?<=\<table).*(?=\<\/table)/g) + '</table>';
var trs = [...html.matchAll(/<tr[\s\S\w]+?<\/tr>/g)];
var data = [];
for (var i=0;i<trs.length;i++){
var tds = [...trs[i][0].matchAll(/<(td|th)[\s\S\w]+?<\/(td|th)>/g)];
var prov = [];
for (var j=0;j<tds.length;j++){
donnee=tds[j][0].match(/(?<=\>).*(?=\<\/)/g)[0];
prov.push(stripTags(donnee));
}
data.push(prov);
}
return(data);
}
function stripTags(body) {
var regex = /(<([^>]+)>)/ig;
return body.replace(regex,"");
}

javascript help - doing something new in a old school way

UPDATE :
Some have said that they were able to get more than 1 childNode...
Here is my fiddle - I am only getting 1 childNode to display.
Where is the error?
ORIGINAL Question
Below is a partial snippet of javascript code that I have inherited. Basically this function used to get XML data by calling an AJAX function. However, due to requirement changes I am generating an XML string and storing that string in hidden input variable on the screen (Classic ASP).
After looking closer at the original script I found that it would be nice if I could somehow pass my xml string into the cmdxml variable. However, when I set cmdxml equal to my xml string: cmdxml = $.parseXML(xmlVal); and then try to use the snippet below it only gets 1 child node. I've included a small snippet of the xml string that I'm passing below.
Old Javascript Function (partial) using cmdxml:
if (req.responseXML!=null) {
var PropName;
var PropValue;
var cmdxml = req.responseXML.documentElement;
// read each document element child node in the XML document
for (var c =0;c<cmdxml.childNodes.length;c++) {
var m;
var t = cmdxml.childNodes[c]; //req.responseXML.documentElement.childNodes[c]
if (t!=null) {
//console.log(t.nodeName);
switch(t.nodeName) { //req.responseXML.documentElement.childNodes[c].nodeName
case "RObject": { //response object
var RObject = t;
//req.responseXML.documentElement.childNodes[c].nodeName.attributes[2].value
var CtrlChangeType = RObject.attributes[2].value;
var CtrlObjName = RObject.attributes[1].value;
var CtrlObjType = RObject.attributes[0].value;
var CtrlObj;
var RObjProp = RObject.getElementsByTagName("Property");
PropName = RObjProp[0].attributes[0].value;
PropValue = getElementText(RObjProp[0].getElementsByTagName("Value")[0]);
switch (CtrlChangeType) { //req.responseXML.documentElement.childNodes[c].nodeName.attributes[0].value
case "comboboxInsRow": {
Here is a snippet of my xml string that I'm passing:
<?xml version="1.0" ?><xCMDS><JCallBack ProgramName="x"><Value><![CDATA[top.closeCtrlLoading();]]></Value></JCallBack><RObject Type="E" Name="gH2ptObj_co_code" ChangeType="objProp" rowNum="" colNum=""><Property Name="value"><Value><![CDATA[]]></Value></Property></RObject>
parseXML returns an XMLDocument, . You'll need to set cmdxml zo $.parseXML('snippet').documentElement to access the childNodes(childNodes is a property of nodes, usually not available in documents) .
Your fiddle returns a childNode, but this is the root-element, you like to access the childNodes of the root-element.

XML to javascript array with jQuery

I am new to XML and AJAX and am only a newcomer to Javascript and jQuery. Among other job duties I design our website. A deadline is very near, and the only way I can think of to do this project well is with AJAX. I have a document full of XML objects such as this one repeating:
<item>
<subject></subject>
<date></date>
<thumb></thumb>
</item>
I want to create an array of all elements and their child elements. I've been reading jQuery tutorials on AJAX for hours and don't even know where to start because they all assume a certain level of javascript proficiency. If someone could show me the easiest way to loop through all elements and put their children into an array, I'd appreciate it tons.
Using jQuery, $.ajax() your XML file, and on success pass retrieved data with each, like:
var tmpSubject, tmpDate, tmpThumb;
$.ajax({
url: '/your_file.xml',
type: 'GET',
dataType: 'xml',
success: function(returnedXMLResponse){
$('item', returnedXMLResponse).each(function(){
tmpSubject = $('subject', this).text();
tmpDate = $('date', this).text();
tmpThumb = $('thumb', this).text();
//Here you can do anything you want with those temporary
//variables, e.g. put them in some place in your html document
//or store them in an associative array
})
}
});
I wrote this.. pretty simple way to take a welformed XML response/string and parse it with jquery and then convert to array.
var response = '<?xml version="1.0" encoding="UTF-8"?><root><node1>something</node1></root>'
var xmlDoc = $.parseXML( response );
var myArray = getXMLToArray(xmlDoc);
alert(myArray['root']['node1']);
//Pop up window displaying the text "something"
function getXMLToArray(xmlDoc){
var thisArray = new Array();
//Check XML doc
if($(xmlDoc).children().length > 0){
//Foreach Node found
$(xmlDoc).children().each(function(){
if($(xmlDoc).find(this.nodeName).children().length > 0){
//If it has children recursively get the inner array
var NextNode = $(xmlDoc).find(this.nodeName);
thisArray[this.nodeName] = getXMLToArray(NextNode);
} else {
//If not then store the next value to the current array
thisArray[this.nodeName] = $(xmlDoc).find(this.nodeName).text();
}
});
}
return thisArray;
}
Hope this helps!!
If you are using jQuery then parseXML will suck an entire xml doc into a usable data structure.
I added to your script Troublesum
function getXMLToArray(xmlDoc){
var thisArray = new Array();
//Check XML doc
if($(xmlDoc).children().length > 0){
//Foreach Node found
$(xmlDoc).children().each(function(){
if($(xmlDoc).find(this.nodeName).children().length > 0){
//If it has children recursively get the inner array
var NextNode = $(xmlDoc).find(this.nodeName);
thisArray[this.nodeName] = getXMLToArray(NextNode);
} else {
//If not then store the next value to the current array
thisArray[this.nodeName] = [];
$(xmlDoc).children(this.nodeName).each(function(){
thisArray[this.nodeName].push($(this).text());
});
}
});
}
return thisArray;
}
It now also supports many children with same name in XML. f.e
XML being
<countries>
<NL>
<borders>
<country>Germany</country>
<country>Belgium</country>
countries.NL.borders[1] will give Germany.

Categories

Resources