Confused about XML DOM object - javascript

I am using the below JavaScript to try to get a node name from an xml document.
if (window.XMLHttpRequest) {
xhttp = new XMLHttpRequest();
} else {
xhttp = new ActiveXObject("Microsoft.XMLHTTP");
}
xhttp.open("GET", "doc.xml", false);
xhttp.send();
xmlDoc = xhttp.responseXML;
alert(xmlDoc.getElementsByTagName("DesignPatterns")[0].childNodes[0].nodeName);
}
The XML document...
<DesignPatterns>
<Composite>Composite</Composite>
</DesignPatterns>
The Alert is outputting '#text'. I expected it to output 'Composite'. Why is this happening, is there some invisible and magical #text node that I am not seeing? I assume its the /n that I see in debug but this just makes no sense to me :(

That is because of the formatting characters (whitespace, newline etc) in your formatted xml. Everything is treated as a node. Any continuous white space becomes a text node. You could use children instead of childNodes. children will not include text nodes.
xmlDoc.getElementsByTagName("DesignPatterns")[0].children[0].nodeName

Related

Only parse necessary html data and skip unwanted html data

I am working on a script that gets a url and parses all of its html. But it only uses the "data-style-name" "href" "data-sold-out" and the "select".
this is how I parse the html:
function loadHTMLSource(urlSource) {
xhttp = new XMLHttpRequest();
xhttp.open("GET", urlSource, false);
xhttp.send();
return xhttp.response;
}
var page_html = loadHTMLSource(url);
parser = new DOMParser();
my_document = parser.parseFromString(page_html, "text/html");
and Im only pulling info from these html things
my_document.querySelectorAll("[data-style-name]");
attributes["data-sold-out"].value
my_document.querySelector("meta[name='csrf-token']");
my_document.querySelector('select');
Is it possible to pull only these certain html things. So I don't end up pulling data that I don't need?
Any help is appreciated. Thank You.

Carriage returns and tabs cause JavaScript to not parse XML node

I'm parsing an XML file and having issues that seem to have to do with carriage returns (or maybe line feeds) and tabs (or white space) in the XML file. They trip up my JavaScript and return nothing instead of the data.
Here's a snippet of the JS code:
var xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function() {
if (xhttp.readyState == 4 && xhttp.status == 200) {
fullXMLContent = xhttp.responseXML;
}
};
xhttp.open("GET", xmlPathname, true);
xhttp.send();
var navLocationMain = fullXMLContent.getElementsByTagName("locationMainTitle");
projectLocationMain = navLocationMain[0].childNodes[0].data;
document.getElementById("Nav_Top_Left-courseLocationMain").innerHTML = projectLocationMain;
This XML works and returns "INTRODUCTION":
<locationMainTitle><![CDATA[<div style="margin-top: 12px">INTRODUCTION </div>]]></locationMainTitle>
This XML doesn't work and returns nothing:
<locationMainTitle>
<![CDATA[<div style="margin-top: 12px">INTRODUCTION </div>]]>
</locationMainTitle>
The only difference is the carriage returns and tabs in the second example. This happens in all browsers I've tested so far.
I'm trying to preserve the readability and easier editing of the formatting that includes carriage returns and tabs in the XML file.
I saw one reference that mentioned a CR, LF, tab or white space can cause problems in parsing XML but I don't know how to get around that with my JS an still preserve the formatting in the XML file.

Get DOM from a string

Given a String which contains HTML for an entire page, I want only the innerHTML of the body. Rather than parse the HTML myself, it seems easier if I could make an element from the String, and then just get the body directly.
I found some things related, but that I couldn't get to work (I can't find the question anymore).
xmlhttp = new XMLHttpRequest();
xmlhttp.onreadystatechange=function()
{
var ret = xmlhttp.responseText + "";
if(xmlhttp.readyState==4 && xmlhttp.status==200)
{
alert(ret);
}
}
xmlhttp.open("GET", "http://url.php", false);
xmlhttp.send();
Right now I have this ajax request, but I need only the body from the return.
I tried document.createElement(ret).body and new Element(ret).body but they didn't seem to work.
var helper = document.createElement("html");
helper.innerHTML = ret;
body = helper.querySelector("body"); //Or getElementsByTagName("body")[0]
You could use simple_html_dom to do this, and get the HTML of the entire page using PHP, and then get only the contents of the body, like this
$html=file_get_html("url.php");
$body=$html->find("body");
$echo $body->plaintext

When is a JavaScript function without a name called? And is it possible to explicitly call it

I'm new to JavaScript and I'm not too familiar with the syntax.
I want to know how the function
{
/*
Data is split into distributions and relations - so append them
*/
//getURLParameters();
/*** Read in and parse the Distributome.xml DB ***/
var xmlhttp=createAjaxRequest();
var xmlDoc, xmlDoc1;
xmlhttp.open("GET","distributome-relations.xml",false);
xmlhttp.send();
xmlDoc = xmlhttp.responseXML;
xmlhttp=createAjaxRequest();
xmlhttp.open("GET","distributome-references.xml",false);
xmlhttp.send();
xmlDoc1 = xmlhttp.responseXML;
var node = xmlDoc.importNode(xmlDoc1.getElementsByTagName("references").item(0), true);
xmlDoc.getElementsByTagName("distributome").item(0).appendChild(node);
try{
DistributomeXML_Objects=xmlDoc.documentElement.childNodes;
}catch(error){
DistributomeXML_Objects=xmlDoc.childNodes;
}
traverseXML(false, null, DistributomeXML_Objects, distributome.nodes, distributome.edges, distributome.references, distributomeNodes, referenceNodes);
xmlhttp=createAjaxRequest();
xmlhttp.open("GET","Distributome.xml.pref",false);
xmlhttp.send();
if (!xmlhttp.responseXML.documentElement && xmlhttp.responseStream)
xmlhttp.responseXML.load(xmlhttp.responseStream);
var ontologyOrder = xmlhttp.responseXML;
getOntologyOrderArray(ontologyOrder);
//console.log("firstread: xmlDoc: " xmlDoc);
}
I'm trying to print the contents of xmlDoc so I can examine the contents the next time I assign values to the variable so I know that I am creating a valid document.
For the purposes of clarification - I am using ProtoVis and want to redraw the nodes after I upload an XML file containing some data.
The first issue I need to fix is creating a valid xmlDoc so that I can display newer information and so wanted to view xmlDoc contents so I could compare it to the next time xmlDoc is assigned a value using the file uploaded.
Thanks for your time!
Edit: I realised that the console.log was missing a comma.
Edit: My question is when the function is executed - given as I can't explicitly call it
Your syntax is wrong. You are missing a + to concatenate the strings:
console.log("firstread: xmlDoc: " + xmlDoc);

XSL processing a fragment in IE

I am trying to use Javascript to transform part of a XML file to a single row of an HTML table. The thought is that I will create multiple rows in the table from multiple XML files. For Firefox and Opera, this nice little chunk of code works beautifully.
var resTable = document.createElement('table');
for (i = 0; i < xmlNames.length; i++)
{
// code for IE
if (window.ActiveXObject)
{
}
// code for Mozilla, Firefox, Opera, etc.
else if (document.implementation && document.implementation.createDocument)
{
xml=loadXMLDoc(xmlNames[i]);
xsl=loadXMLDoc(xslName);
xsltProcessor=new XSLTProcessor();
xsltProcessor.importStylesheet(xsl);
resultDocument = xsltProcessor.transformToFragment(xml,document);
resTable.appendChild(resultDocument);
}
}
document.getElementById("theDoc").appendChild(resTable);
The problem is that I have tried a thousand things in the "if IE" part, and nothing ever works. I've done a lot of googling, and browsing here before I asked, but to no avail. In fact, there is an unanswered question on SO that sounds very similar, but there were no responses and no resolution, so I was hoping someone would be able to help me out here..
I've been successful in getting an entire doc to transform on IE, but the fact that I want to do this as a fragment is whats causing my problem.. Any help would be much appreciated! Thanks!
Edit: Sorry I forgot to provide my loadXMLDoc function in case thats important. Here is is:
function loadXMLDoc(dname)
{
if (window.XMLHttpRequest)
{
xhttp=new XMLHttpRequest();
}
else
{
xhttp=new ActiveXObject("Microsoft.XMLHTTP");
}
xhttp.open("GET",dname,false);
xhttp.send("");
return xhttp.responseXML;
}
After much more trial and error, I came up with something that works. Here's what I did:
Create a xsltProcessor as usual, and call the transform method. This results in xsltProcessor.output being a HTML formatted string. Of course, I want a DOM element, so I had to convert the HTML string to a DOM. Luckily, because I'm the author of the XSL stylesheet too, I know exactly what I'm expecting to come back. In my case, the output HTML string would be some number of <tr>...</tr> elements. I initially tried setting my resTable (a table DOM element) innerHTML to the output string, but that did not work. I'm still not sure why, but it seems like it has something specific to do with the fact they were <tr>s and it wasn't able to be parsed when set to innerHTML outside the context of a table tag.
At any rate, I created a temporary div element and set ITs innerHTML to a string having the xsltProcessor's output string encased in a <table></table> tag. Now the temp div element is DOM table, which I then stepped through and grabbed just the child nodes (which are the tr nodes that the xsl processor returned in the first place). Seems kind of ridiculous to do all this, but it works, and thats the first time I can say that.. Here's the final version that works in all the browsers I've tested..
var resTable = document.createElement('table');
for (i = 0; i < xmlNames.length; i++)
{
// code for IE
if (window.ActiveXObject)
{
var xml = new ActiveXObject("Microsoft.XMLDOM");
xml.async = false;
xml.load(xmlNames[i]);
var xslt = new ActiveXObject("Msxml2.XSLTemplate");
var xsl = new ActiveXObject("Msxml2.FreeThreadedDOMDocument.3.0");
var xsltProcessor;
xsl.async = false;
xsl.resolveExternals = false;
xsl.load(xslName);
xslt.stylesheet = xsl;
xsltProcessor = xslt.createProcessor();
xsltProcessor.input = xml;
//This transform results in one or more tr.../tr HTML tag(s)
xsltProcessor.transform();
//Create a temp div element which is used to convert the HTML
//string to a DOM element so I can grab just the part I want..
tmp = document.createElement('div');
//Can't set innerHTML to tr tags directly I guess, so have to put
//in context of a table so it can be parsed...
tmp.innerHTML = "<table>" + xsltProcessor.output + "</table>";
//Now I need to grad the tr children from inside the table node, since
//the table was only to please the parser
for (tmpChildInd = 0; tmpChildInd < tmp.childNodes[0].childNodes.length; tmpChildInd++)
{
//finally, append the temporary elements children (the tr tags)
//to the overall table I created before the loop.
resTable.appendChild(tmp.childNodes[0].childNodes[tmpChildInd]);
}
}
// code for Mozilla, Firefox, Opera, etc.
else if (document.implementation && document.implementation.createDocument)
{
xml=loadXMLDoc(xmlNames[i]);
xsl=loadXMLDoc(xslName);
xsltProcessor=new XSLTProcessor();
xsltProcessor.importStylesheet(xsl);
resultDocument = xsltProcessor.transformToFragment(xml,document);
resTable.appendChild(resultDocument);
}
}
//put the full table at the div location "theDoc" now..
document.getElementById("theDoc").appendChild(resTable);
Not sure how often folks try to do this, but hopefully this helps someone else out there..

Categories

Resources