Only parse necessary html data and skip unwanted html data - javascript

I am working on a script that gets a url and parses all of its html. But it only uses the "data-style-name" "href" "data-sold-out" and the "select".
this is how I parse the html:
function loadHTMLSource(urlSource) {
xhttp = new XMLHttpRequest();
xhttp.open("GET", urlSource, false);
xhttp.send();
return xhttp.response;
}
var page_html = loadHTMLSource(url);
parser = new DOMParser();
my_document = parser.parseFromString(page_html, "text/html");
and Im only pulling info from these html things
my_document.querySelectorAll("[data-style-name]");
attributes["data-sold-out"].value
my_document.querySelector("meta[name='csrf-token']");
my_document.querySelector('select');
Is it possible to pull only these certain html things. So I don't end up pulling data that I don't need?
Any help is appreciated. Thank You.

Related

When I replace XML DOM with some HTML and inject some js, the js doesn’t run

I’m trying to make a bookmarklet that swaps out the DOM on the current page for a new one, and then injects and runs some javascript.
It works fine when the original page is HTML (so I don’t think this is a CORS problem). However, when the original page is XML, the injected javascript doesn’t run :( Why not? How can I get it working?
Here’s some example bookmarklet code:
(function () {
var jsHref = 'https://rawgit.com/andylolz/b2e894fa5ccdecacd901c05769fa97fe/raw/289719871f859a1b19e06f8b8ce3769f0002ce55/js.js';
var htmlHref = 'https://rawgit.com/andylolz/b2e894fa5ccdecacd901c05769fa97fe/raw/289719871f859a1b19e06f8b8ce3769f0002ce55/html.html';
// fetch some HTML
var xhr = new XMLHttpRequest();
xhr.open('GET', htmlHref, false);
xhr.send();
var htmlString = xhr.response;
var parser = new DOMParser();
var result = parser.parseFromString(htmlString, 'text/html');
// swap the DOM for the fetched HTML
document.replaceChild(document.adoptNode(result.documentElement), document.documentElement);
// inject some javascript
var sc = document.createElement('script');
sc.setAttribute('src', jsHref);
document.documentElement.appendChild(sc);
})();
Here it is working on codepen (on an HTML page):
https://codepen.io/anon/pen/wEWemL?editors=1010
Again – If I run the above on an XML page, it mostly works, but the injected javascript doesn’t execute.
Thanks!

Get innerHTML from XMLHttpResponse

I'm trying to write a chrome extension that will get a value from a current page, then use that information to go to another page and pull a specific element from the html response. I can get the html response fine, but I'm unable to parse the html response to get the specific element.
content.js
function getTicketInfo(){
var ticketURI = document.getElementById("p3_lkid").value;
var ticketNumber = document.getElementById("p3_lkold").value;
var xhr = new XMLHttpRequest();
xhr.open('GET',"remotePage.html",true);
xhr.onreadystatechange = function(){
if(xhr.readyState == 4 && xhr.status == 200){
function handleResponse(xhr)
}
}
xhr.send();
}
function handleResponse(xhr){
var contactElement = xhr.getElementById("CF00N80000005MAX6_ileinner");
alert(contactElement.clildNodes[0].nodeValue);
}
remotePage.html
<html>
<div id="CF00N80000005MAX6_ileinner">
Text I need!
</div>
</html>
How can I get this value from the external page? Is there a better way to request this information?
Your XHR response is a string, and not a DOM.
With jQuery you'll be able to convert it to a DOM, and query it.
function handleResponse(xhr){
$(xhr.response).find('#CF00N80000005MAX6_ileinner')
}
This is as simple as not parsing the HTML response to a DOM object. According to MDN, this is how you parse XML (Or HTML, and with Vanilla JavaScript):
var parser = new DOMParser();
var doc = parser.parseFromString(xhr, "text/xml");
And then using the new DOM Object doc for accessing elements.
var contactElement = doc.getElementById("CF00N80000005MAX6_ileinner");
alert(contactElement.childNodes[0].nodeValue);
I also noticed you spelled childNodes wrong, but that isn't the main problem.

Get DOM from a string

Given a String which contains HTML for an entire page, I want only the innerHTML of the body. Rather than parse the HTML myself, it seems easier if I could make an element from the String, and then just get the body directly.
I found some things related, but that I couldn't get to work (I can't find the question anymore).
xmlhttp = new XMLHttpRequest();
xmlhttp.onreadystatechange=function()
{
var ret = xmlhttp.responseText + "";
if(xmlhttp.readyState==4 && xmlhttp.status==200)
{
alert(ret);
}
}
xmlhttp.open("GET", "http://url.php", false);
xmlhttp.send();
Right now I have this ajax request, but I need only the body from the return.
I tried document.createElement(ret).body and new Element(ret).body but they didn't seem to work.
var helper = document.createElement("html");
helper.innerHTML = ret;
body = helper.querySelector("body"); //Or getElementsByTagName("body")[0]
You could use simple_html_dom to do this, and get the HTML of the entire page using PHP, and then get only the contents of the body, like this
$html=file_get_html("url.php");
$body=$html->find("body");
$echo $body->plaintext

When is a JavaScript function without a name called? And is it possible to explicitly call it

I'm new to JavaScript and I'm not too familiar with the syntax.
I want to know how the function
{
/*
Data is split into distributions and relations - so append them
*/
//getURLParameters();
/*** Read in and parse the Distributome.xml DB ***/
var xmlhttp=createAjaxRequest();
var xmlDoc, xmlDoc1;
xmlhttp.open("GET","distributome-relations.xml",false);
xmlhttp.send();
xmlDoc = xmlhttp.responseXML;
xmlhttp=createAjaxRequest();
xmlhttp.open("GET","distributome-references.xml",false);
xmlhttp.send();
xmlDoc1 = xmlhttp.responseXML;
var node = xmlDoc.importNode(xmlDoc1.getElementsByTagName("references").item(0), true);
xmlDoc.getElementsByTagName("distributome").item(0).appendChild(node);
try{
DistributomeXML_Objects=xmlDoc.documentElement.childNodes;
}catch(error){
DistributomeXML_Objects=xmlDoc.childNodes;
}
traverseXML(false, null, DistributomeXML_Objects, distributome.nodes, distributome.edges, distributome.references, distributomeNodes, referenceNodes);
xmlhttp=createAjaxRequest();
xmlhttp.open("GET","Distributome.xml.pref",false);
xmlhttp.send();
if (!xmlhttp.responseXML.documentElement && xmlhttp.responseStream)
xmlhttp.responseXML.load(xmlhttp.responseStream);
var ontologyOrder = xmlhttp.responseXML;
getOntologyOrderArray(ontologyOrder);
//console.log("firstread: xmlDoc: " xmlDoc);
}
I'm trying to print the contents of xmlDoc so I can examine the contents the next time I assign values to the variable so I know that I am creating a valid document.
For the purposes of clarification - I am using ProtoVis and want to redraw the nodes after I upload an XML file containing some data.
The first issue I need to fix is creating a valid xmlDoc so that I can display newer information and so wanted to view xmlDoc contents so I could compare it to the next time xmlDoc is assigned a value using the file uploaded.
Thanks for your time!
Edit: I realised that the console.log was missing a comma.
Edit: My question is when the function is executed - given as I can't explicitly call it
Your syntax is wrong. You are missing a + to concatenate the strings:
console.log("firstread: xmlDoc: " + xmlDoc);

parse rss feed using javascript

I am parsing an RSS feed using PHP and JavaScript. First I created a proxy with PHP to obtain the RSS feed. Then get individual data from this RSS feed using JavaScript. My issue with with the JavaScript. I am able to get the entire JavaScript document if I use console.log(rssData); with no errors. If I try to get individual elements within this document say for example: <title>, <description>, or <pubDate> using rssData.getElementsByName("title"); it gives an error "Uncaught TypeError: Object....has no method 'getElementsByName'". So my question is how to I obtain the elements in the RSS feed?
Javascript (Updated)
function httpGet(theUrl) {
var xmlHttp = null;
xmlHttp = new XMLHttpRequest();
xmlHttp.open("GET", theUrl, false);
xmlHttp.send(null);
return xmlHttp.responseXML;
}
// rss source
var rssData = httpGet('http://website.com/rss.php');
// rss values
var allTitles = rssData.getElementsByTagName("title"); // title
var allDate = rssData.getElementsByTagName("pubDate"); // date
Try changing the last line of the httpGet function to:
return xmlHttp.responseXML;
After all, you are expecting an XML response back. You may also need to add this line to your PHP proxy:
header("Content-type: text/xml");
To force the return content to be sent as XML.

Categories

Resources