troubles trying to parse an html string with DOMParser

troubles trying to parse an html string with DOMParser - javascript

here's come the snippet :
html = "<!doctype html>";
html += "<html>";
html += "<head><title>test</title></head>";
html += "<body><p>test</p></body>";
html += "</html>";
parser = new DOMParser();
dom = parser.parseFromString (html, "text/html");
here's come the error when trying to execute these lines :
Error: Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIDOMParser.parseFromString]
I try to figure out what's going on but the code seems to be right and I searched on the web, i come here with no clues.
have you encounter this failure before ? if yes, where's the bug hiding ?

You should use DomParser function described at JavaScript DOMParser access innerHTML and other properties
I created fiddle for you http://jsfiddle.net/CSAnZ/
/*
* DOMParser HTML extension
* 2012-02-02
*
* By Eli Grey, http://eligrey.com
* Public domain.
* NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
*/
/*! #source https://gist.github.com/1129031 */
/*global document, DOMParser*/
(function(DOMParser) {
"use strict";
var DOMParser_proto = DOMParser.prototype
, real_parseFromString = DOMParser_proto.parseFromString;
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if ((new DOMParser).parseFromString("", "text/html")) {
// text/html parsing is natively supported
return;
}
} catch (ex) {}
DOMParser_proto.parseFromString = function(markup, type) {
if (/^\s*text\/html\s*(?:;|$)/i.test(type)) {
var doc = document.implementation.createHTMLDocument("")
, doc_elt = doc.documentElement
, first_elt;
doc_elt.innerHTML = markup;
first_elt = doc_elt.firstElementChild;
if (doc_elt.childElementCount === 1
&& first_elt.localName.toLowerCase() === "html") {
doc.replaceChild(first_elt, doc_elt);
}
return doc;
} else {
return real_parseFromString.apply(this, arguments);
}
};
}(DOMParser));

Related

XPathEvaluator in Firefox addon

I am attempting to follow this article to evaluate an XPath expression. My code is copy/pasted from the article:
// Evaluate an XPath expression aExpression against a given DOM node
// or Document object (aNode), returning the results as an array
// thanks wanderingstan at morethanwarm dot mail dot com for the
// initial work.
function evaluateXPath(aNode, aExpr) {
var xpe = new XPathEvaluator();
var nsResolver = xpe.createNSResolver(aNode.ownerDocument == null ?
aNode.documentElement : aNode.ownerDocument.documentElement);
var result = xpe.evaluate(aExpr, aNode, nsResolver, 0, null);
var found = [];
var res;
while (res = result.iterateNext())
found.push(res);
return found;
}
However, I'm getting this error:
Message: ReferenceError: XPathEvaluator is not defined
Is Mozilla's article out of date, perhaps? Is there a more up-to-date article available on parsing XML in an SDK add-on?
Edit. When I tried it this way:
var {Cc, Ci} = require("chrome");
var domXPathEvaluator = Cc["#mozilla.org/dom/xpath-evaluator;1"].createInstance(Ci.nsIDOMXPathEvaluator);
I got a long error message:
- message = Component returned failure code: 0x80570019 (NS_ERROR_XPC_CANT_CREATE_WN) [nsIJSCID.createInstance]
- fileName = undefined
- lineNumber = 14
- stack = #undefined:14:undefined|#resource://helloworld-addon/index.js:14:25|run#resource://gre/modules/commonjs/sdk/addon/runner.js:145:19|startup/</<#resource://gre/modules/commonjs/sdk/addon/runner.js:86:7|Handler.prototype.process#resource://gre/modules/Promise-backend.js:920:23|this.PromiseWalker.walkerLoop#resource://gre/modules/Promise-backend.js:799:7|this.PromiseWalker.scheduleWalkerLoop/<#resource://gre/modules/Promise-backend.js:738:39|Promise*this.PromiseWalker.scheduleWalkerLoop#resource://gre/modules/Promise-backend.js:738:7|this.PromiseWalker.schedulePromise#resource://gre/modules/Promise-backend.js:762:7|this.PromiseWalker.completePromise#resource://gre/modules/Promise-backend.js:705:7|handler#resource://gre/modules/commonjs/sdk/addon/window.js:56:3|
- toString = function () /* use strict */ toString
edit 2. Here, I'll just post my whole code, because it's clear something stranger than I thought is going on. I've created a hello-world addon using the Mozilla tutorials including this one to display a popup. I've modified that further so that it will append text to a file, and modified that further to, I hope, parse and modify XML. So the resulting add-on is supposed to take text entered in the popup and append it to an XML file.
var data = require("sdk/self").data;
var text_entry = require("sdk/panel").Panel({
contentURL: data.url("text-entry.html"),
contentScriptFile: data.url("get-text.js")
});
const fooFile = "/Users/sabrina/Documents/addon/foo.xml";
var {Cc, Ci} = require("chrome");
var parser = Cc["#mozilla.org/xmlextras/domparser;1"].createInstance(Ci.nsIDOMParser);
//var domXPathEvaluator = Cc["#mozilla.org/dom/xpath-evaluator;1"].createInstance(Ci.nsIDOMXPathEvaluator);
var foo = parser.parseFromString(readTextFromFile(fooFile), "application/xml");
// Create a button
require("sdk/ui/button/action").ActionButton({
id: "show-panel",
label: "Show Panel",
icon: {
"16": "./icon-16.png",
"32": "./icon-32.png",
"64": "./icon-64.png"
},
onClick: handleClick
});
// Show the panel when the user clicks the button.
function handleClick(state) {
text_entry.show();
}
text_entry.on("show", function() {
text_entry.port.emit("show");
});
text_entry.port.on("text-entered", function (text) {
console.log(text);
// appendTextToFile(text, "/Users/sabrina/Documents/addon/output.txt");
appendFoo(text);
text_entry.hide();
});
function appendFoo(text) {
var newNode = foo.createElement("blah");
newNode.innerHTML = text;
var mainFoo = evaluateXPath(foo, '/foo')[0];
mainFoo.appendChild(newNode);
foo.save(fooFile);
}
function evaluateXPath(aNode, aExpr) {
var xpe = new XPathEvaluator();
var nsResolver = xpe.createNSResolver(aNode.ownerDocument == null ?
aNode.documentElement : aNode.ownerDocument.documentElement);
//var result = domXPathEvaluator.evaluate(aExpr, aNode, null,
// domXPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
var found = [];
var res;
while (res = result.iterateNext())
found.push(res);
return found;
}
function readTextFromFile(filename) {
var fileIO = require("sdk/io/file");
var text = null;
if (fileIO.exists(filename)) {
var TextReader = fileIO.open(filename, "r");
if (!TextReader.closed) {
text = TextReader.read();
TextReader.close();
}
}
console.log(arguments.callee.name + ": have read " + text + " from " + filename);
return text;
}
function writeTextToFile(text, filename) {
var fileIO = require("sdk/io/file");
var TextWriter = fileIO.open(filename, "w");
if (!TextWriter.closed) {
TextWriter.write(text + "\n");
console.log(arguments.callee.name + ": have written " + text + " to " + filename);
TextWriter.close();
}
function appendTextToFile(text, filename) {
var textplus = readTextFromFile(filename) + text;
writeTextToFile(textplus, filename);
}
I run at the command line using jpm run which opens Firefox Developer Edition. I click the addon button, the popup comes up, I enter text, I hit return, and I see this in the console:
JPM undefined Starting jpm run on Sabrina's Helloworld Addon
Creating XPI
JPM undefined XPI created at /var/folders/gg/r_hp4hzs0gdfy70f__l18fmr0000gn/T/#helloworld-addon-0.0.1.xpi (46ms)
Created XPI at /var/folders/gg/r_hp4hzs0gdfy70f__l18fmr0000gn/T/#helloworld-addon-0.0.1.xpi
JPM undefined Creating a new profile
console.log: helloworld-addon: readTextFromFile: have read <?xml version="1.0" encoding="UTF-8"?>
<foo><blah>eek</blah><foo>
from /Users/sabrina/Documents/addon/foo.xml
console.log: helloworld-addon: ook
console.error: helloworld-addon:
JPM undefined Message: ReferenceError: XPathEvaluator is not defined
Stack:
evaluateXPath#resource://gre/modules/commonjs/toolkit/loader.js -> resource://helloworld-addon/index.js:63:9
appendFoo#resource://gre/modules/commonjs/toolkit/loader.js -> resource://helloworld-addon/index.js:57:19
#resource://gre/modules/commonjs/toolkit/loader.js -> resource://helloworld-addon/index.js:50:2
emitOnObject#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/event/core.js:112:9
emit#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/event/core.js:89:38
portEmit#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/content/sandbox.js:343:7
emitOnObject#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/event/core.js:112:9
emit#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/event/core.js:89:38
onContentEvent/<#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/content/sandbox.js:384:5
delay/<#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/lang/functional/concurrent.js:38:20
notify#resource://gre/modules/commonjs/toolkit/loader.js -> resource://gre/modules/commonjs/sdk/timers.js:40:9

Non-authoritative, speculative answer
In a different question, Wladimir Palant (author of Adblock Plus, presumably he has good knowledge of firefox) said:
Yes, a lot of global classes available in the window context aren't there in SDK modules which are sandboxes.
Source: https://stackoverflow.com/a/10522459/3512867
This could explain why XPathEvaluator is not defined in the SDK addon.
The logical conclusion would be to use Firefox's Components object to access the nsIDOMXPathEvaluator interface. Which brings up the following error:
NS_ERROR_XPC_CANT_CREATE_WN
Looking into it takes us to this, from mozillazine's forums user "lithopsian":
That means it can't create a wrapper for a non-javascript interface.
Source: http://forums.mozillazine.org/viewtopic.php?f=19&t=2854793
I am unable to judge the credibility of that statement and while the linked bug reports seem to be relevant, I can not attest they actually are:
https://bugzilla.mozilla.org/show_bug.cgi?id=994964
https://bugzilla.mozilla.org/show_bug.cgi?id=1027095
https://bugzilla.mozilla.org/show_bug.cgi?id=1029104
Unless those informations are confirmed (or dispelled) by people with a deeper knowledge of Firefox's internal workings, I can only hesitantly conclude that the nsIDOMXPathEvaluator interface can simply not work in an SDK addon.

How to using XPath in WebBrowser Control?

In C# WinForms sample application, I have used WebBrowser control. I want to use JavaScript XPath to select single node. To do this, I use XPathJS
But with the following code, the returned value of vResult is always NULL.
bool completed = false;
WebBrowser wb = new WebBrowser();
wb.ScriptErrorsSuppressed = true;
wb.DocumentCompleted += delegate { completed = true; };
wb.Navigate("http://stackoverflow.com/");
while (!completed)
{
Application.DoEvents();
Thread.Sleep(100);
}
if (wb.Document != null)
{
HtmlElement head = wb.Document.GetElementsByTagName("head")[0];
HtmlElement scriptEl = wb.Document.CreateElement("script");
mshtml.IHTMLScriptElement element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.src = "https://raw.github.com/andrejpavlovic/xpathjs/master/build/xpathjs.min.js";
head.AppendChild(scriptEl);
// Initialize XPathJS
wb.Document.InvokeScript("XPathJS.bindDomLevel3XPath");
string xPathQuery = #"count(//script)";
string code = string.Format("document.evaluate('{0}', document, null, XPathResult.ANY_TYPE, null);", xPathQuery);
var vResult = wb.Document.InvokeScript("eval", new object[] { code });
}
Is there a way to do JavaScript XPath with WebBrowser control ?
Rem : I'd like to avoid using HTML Agility Pack, I wanted to directly manipulate WebBrowser control's DOM's content mshtml.IHTMLElement

I have found solution, here is the code:
bool completed = false;
WebBrowser wb = new WebBrowser();
wb.ScriptErrorsSuppressed = true;
wb.DocumentCompleted += delegate { completed = true; };
wb.Navigate("http://stackoverflow.com/");
while (!completed)
{
Application.DoEvents();
Thread.Sleep(100);
}
if (wb.Document != null)
{
HtmlElement head = wb.Document.GetElementsByTagName("head")[0];
HtmlElement scriptEl = wb.Document.CreateElement("script");
mshtml.IHTMLScriptElement element = (mshtml.IHTMLScriptElement)scriptEl.DomElement;
element.text = System.IO.File.ReadAllText(#"wgxpath.install.js");
head.AppendChild(scriptEl);
// Call wgxpath.install() from JavaScript code, which will ensure document.evaluate
wb.Document.InvokeScript("eval", new object[] { "wgxpath.install()" });
string xPathQuery = #"count(//script)";
string code = string.Format("document.evaluate('{0}', document, null, XPathResult.NUMBER_TYPE, null).numberValue;", xPathQuery);
int iResult = (int) wb.Document.InvokeScript("eval", new object[] { code });
}
I use "A pure JavaScript XPath library": wicked-good-xpath and download the wgxpath.install.js

IE11 not recognizing .loadXML()

Working on some xml data and my function works in all browsers except for IE10 and up.
Is there something else I can use instead of .loadXML(data)
Here is part of the transform function. it breaks at x.loadXML(data)
$.transform = function(o) {
var createXmlObj = function(data) {
if($.browser.msie) {
var x = $("<xml>")[0];
x.loadXML(data);
return x;
} else {
var parser = new DOMParser();
return parser.parseFromString(data,"text/xml");
}
};

add your page to Internet Explorer's Compatibility View Settings. You won't believe the types of issues that this fixes.

How to convert a string into a DOM element

This might have been asked before and answered or the question has been closed before someone could properly answer it. Just in case someone googles for it here is the answer:
https://developer.mozilla.org/en-US/docs/DOM/DOMParser
var parser = new DOMParser();
var doc = parser.parseFromString(stringContainingXMLSource, "application/xml");
// returns a Document, but not a SVGDocument nor a HTMLDocument
parser = new DOMParser();
doc = parser.parseFromString(stringContainingXMLSource, "image/svg+xml");
// returns a SVGDocument, which also is a Document.
parser = new DOMParser();
doc = parser.parseFromString(stringContainingHTMLSource, "text/html");
// returns a HTMLDocument, which also is a Document.
/*
* DOMParser HTML extension
* 2012-09-04
*
* By Eli Grey, http://eligrey.com
* Public domain.
* NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
*/
/*! #source https://gist.github.com/1129031 */
/*global document, DOMParser*/
(function(DOMParser) {
"use strict";
var
DOMParser_proto = DOMParser.prototype
, real_parseFromString = DOMParser_proto.parseFromString
;
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if ((new DOMParser).parseFromString("", "text/html")) {
// text/html parsing is natively supported
return;
}
} catch (ex) {}
DOMParser_proto.parseFromString = function(markup, type) {
if (/^\s*text\/html\s*(?:;|$)/i.test(type)) {
var
doc = document.implementation.createHTMLDocument("")
;
doc.body.innerHTML = markup;
return doc;
} else {
return real_parseFromString.apply(this, arguments);
}
};
}(DOMParser));

https://developer.mozilla.org/en-US/docs/DOM/DOMParser
var parser = new DOMParser();
var doc = parser.parseFromString(stringContainingXMLSource, "application/xml");
// returns a Document, but not a SVGDocument nor a HTMLDocument
parser = new DOMParser();
doc = parser.parseFromString(stringContainingXMLSource, "image/svg+xml");
// returns a SVGDocument, which also is a Document.
parser = new DOMParser();
doc = parser.parseFromString(stringContainingHTMLSource, "text/html");
// returns a HTMLDocument, which also is a Document.
Some browsers do not support text/html so here is the extra code from the MDN site to add support for text/html
/*
* DOMParser HTML extension
* 2012-09-04
*
* By Eli Grey, http://eligrey.com
* Public domain.
* NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
*/
/*! #source https://gist.github.com/1129031 */
/*global document, DOMParser*/
(function(DOMParser) {
"use strict";
var
DOMParser_proto = DOMParser.prototype
, real_parseFromString = DOMParser_proto.parseFromString
;
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if ((new DOMParser).parseFromString("", "text/html")) {
// text/html parsing is natively supported
return;
}
} catch (ex) {}
DOMParser_proto.parseFromString = function(markup, type) {
if (/^\s*text\/html\s*(?:;|$)/i.test(type)) {
var
doc = document.implementation.createHTMLDocument("")
;
doc.body.innerHTML = markup;
return doc;
} else {
return real_parseFromString.apply(this, arguments);
}
};
}(DOMParser));
IE8 and below
Some browsers like IE8 and below do not have DOMParser, you can parse a string to HTML in the following way:
var div=document.createElement("div");
div.innerHTML=HTMLString;
var anchors=div.getElementsByTagName("a");

Check for XML errors using JavaScript

Question: How do I syntax-check my XML in modern browsers (anything but IE)?
I've seen a page on W3Schools which includes an XML syntax-checker. I don't know how it works, but I'd like to know how I may achieve the same behavior.
I've already performed many searches on the matter (with no success), and I've tried using the DOM Parser to check if my XML is "well-formed" (also with no success).
var xml = 'Caleb';
var parser = new DOMParser();
var doc = parser.parseFromString(xml, 'text/xml');
I expect the parser to tell me I have an XML syntax error (i.e. an unclosed name tag). However, it always returns an XML DOM object, as if there were no errors at all.
To summarize, I would like to know how I can automatically check the syntax of an XML document using JavaScript.
P.S. Is there any way I can validate an XML document against a DTD (using JS, and not IE)?

Edit: Here is a more concise example, from MDN:
var xmlString = '<a id="a"><b id="b">hey!</b></a>';
var domParser = new DOMParser();
var dom = domParser.parseFromString(xmlString, 'text/xml');
// print the name of the root element or error message
dump(dom.documentElement.nodeName == 'parsererror' ? 'error while parsing' : dom.documentElement.nodeName);

NoBugs answer above did not work with a current chrome for me. I suggest:
var sMyString = "<a id=\"a\"><b id=\"b\">hey!<\/b><\/a>";
var oParser = new DOMParser();
var oDOM = oParser.parseFromString(sMyString, "text/xml");
dump(oDOM.getElementsByTagName('parsererror').length ?
(new XMLSerializer()).serializeToString(oDOM) : "all good"
);

You can also use the package fast-xml-parser, this package have a validate check for xml files:
import { validate, parse } from 'fast-xml-parser';
if( validate(xmlData) === true) {
var jsonObj = parse(xmlData,options);
}

Just F12 to enter developer mode and check the source there you can then search validateXML and you are to locate a very long complete XML checker for your reference.
I am using react and stuff using the DOMParser to present the error message as:
handleXmlCheck = () => {
const { fileContent } = this.state;
const parser = new window.DOMParser();
const theDom = parser.parseFromString(fileContent, 'application/xml');
if (theDom.getElementsByTagName('parsererror').length > 0) {
showErrorMessage(theDom.getElementsByTagName('parsererror')[0].getElementsByTagName('div')[0].innerHTML);
} else {
showSuccessMessage('Valid Xml');
}
}

Basic xml validator in javscript. This code may not valid for advance xml but basic xml.
function xmlValidator(xml){
// var xml = "<note><to>Tove</to><from>Jani</from><heading>Reminder</heading><body>Don't forget me this weekend!</body></note>";
while(xml.indexOf('<') != -1){
var sub = xml.substring(xml.indexOf('<'), xml.indexOf('>')+1);
var value = xml.substring(xml.indexOf('<')+1, xml.indexOf('>'));
var endTag = '</'+value+'>';
if(xml.indexOf(endTag) != -1){
// console.log('xml is valid');
// break;
}else{
console.log('xml is in invalid');
break;
}
xml = xml.replace(sub, '');
xml = xml.replace(endTag, '');
console.log(xml);
console.log(sub+' '+value+' '+endTag);
}
}
var xml = "<note><to>Tove</to><from>Jani</from><heading>Reminder</heading><body>Don't forget me this weekend!</body></note>";
xmlValidator(xml);

/**
* Check if the input is a valid XML file.
* #param xmlStr The input to be parsed.
* #returns If the input is invalid, this returns an XMLDocument explaining the problem.
* If the input is valid, this return undefined.
*/
export function xmlIsInvalid(xmlStr : string) : HTMLElement | undefined {
const parser = new DOMParser();
const dom = parser.parseFromString(xmlStr, "application/xml");
// https://developer.mozilla.org/en-US/docs/Web/API/DOMParser/parseFromString
// says that parseFromString() will throw an error if the input is invalid.
//
// https://developer.mozilla.org/en-US/docs/Web/Guide/Parsing_and_serializing_XML
// says dom.documentElement.nodeName == "parsererror" will be true of the input
// is invalid.
//
// Neither of those is true when I tested it in Chrome. Nothing is thrown.
// If the input is "" I get:
// dom.documentElement.nodeName returns "html",
// doc.documentElement.firstElementChild.nodeName returns "body" and
// doc.documentElement.firstElementChild.firstElementChild.nodeName = "parsererror".
//
// It seems that the parsererror can move around. It looks like it's trying to
// create as much of the XML tree as it can, then it inserts parsererror whenever
// and wherever it gets stuck. It sometimes generates additional XML after the
// parsererror, so .lastElementChild might not find the problem.
//
// In case of an error the <parsererror> element will be an instance of
// HTMLElement. A valid XML document can include an element with name name
// "parsererror", however it will NOT be an instance of HTMLElement.
//
// getElementsByTagName('parsererror') might be faster than querySelectorAll().
for (const element of Array.from(dom.querySelectorAll("parsererror"))) {
if (element instanceof HTMLElement) {
// Found the error.
return element;
}
}
// No errors found.
return;
}
(Technically that's TypeScript. Remove : string and : HTMLElement | undefined to make it JavaScript.)

Develop Reference

JavaScript is the programming language of the Web.

troubles trying to parse an html string with DOMParser - javascript

Related

XPathEvaluator in Firefox addon

How to using XPath in WebBrowser Control?

IE11 not recognizing .loadXML()

How to convert a string into a DOM element

Check for XML errors using JavaScript

Categories

Resources