PhantomJS: Get page content with correct selector - javascript

I'm trying to get some content off of a page that renders elements post load like such:
var URL = '/';
var IMG = 'http://img.site.com/';
for(var k=0;k<thumb.length;k++)
{
html = html +
'<div class="thumbnail"><a href="'+URLthumb+tumb_id[k]+'">
<img class="th" src='IMG+thumb[k]' /></a></div>';
}
document.write(html);
I'm currently loading this page with:
var system = require('system');
var page = require('webpage').create();
page.open('http://example.com/search?q=some+query+goes+here', function() {
var title = page.evaluate(function() {
return document.documentElement.outerHTML;
});
system.stdout.writeLine(title);
phantom.exit();
});
document.documentElement.outerHTML returns the page pre-render no matter how long I wait. What is the best object to get the page content post render?
What am I doing wrong?

Related

Dynamic iframe content from js response

I have a page where I'm dynamically creating an empty iframe. Then I need to populate the iframe content by retrieving some js-script response data. Everything works fine when I call js URL:
(function(d) {
var iframe = d.body.appendChild(d.createElement('iframe')),
doc = iframe.contentWindow.document,
options = {
objid: 152,
key: 316541321
},
src = "host/widget.js",
uri = encodeURIComponent(JSON.stringify(options));
iframe.id = "iframewidget";
iframe.width = 200;
iframe.height = 300;
var html = '<body onload="var d=document;' +
'd.getElementsByTagName(\'head\')[0].appendChild(d.createElement(\'script\')).src=\'' + src + '\'">';
doc.open().write(html);
doc.close();
})(document);
But I need to run the widget.js with some dynamic parameter. Is there a way to pass the uri variable value to the script? I use pure JavaScript not jQuery
Here's a quick modification of your code to show how its possible.
Your doc variable is the document element of the iFrame. Setting a variable on it will allow you to access it from code running within the iFrame.
https://jsfiddle.net/78gf5e8q/
(function(d) {
var iframe = d.body.appendChild(d.createElement('iframe')),
doc = iframe.contentWindow.document,
options = {
objid: 152,
key: 316541321
},
//src = "host/widget.js",
uri = encodeURIComponent(JSON.stringify(options));
doc.someVar = "Hello World!";
iframe.id = "iframewidget";
iframe.width = 200;
iframe.height = 300;
var html = '<body onload="var d=document;' +
'd.getElementsByTagName(\'head\')[0].appendChild(d.createElement(\'script\')).innerHTML=\'alert(document.someVar)\'">';
doc.open().write(html);
doc.close();
})(document);
Edit: you were also passing in a non existent var called js into doc.open().write(js); I changed it to .write(html);
Edit 2: For cross domain you'll probably get "Permission Denied" on that var.

Unable to receive parameter in query string when redirected to other page

Hi I am developing one application in java-script. I have two pages default.aspx and addnewitem.aspx. there is one html table in default.aspx and one button. When i click on button i want to redirect to addnewitem.aspx page. I have some parameters to send in query string. I am able to redirect to addnewitem.aspx but page not found error i am getting. I am not sure why i am getting page not found error. I am trying as below.
function getValues() {
var Title = "dfd";
var PrimarySkills = "fdfd";
var SecondarySkills = "dfdf";
var url = "http://sites/APPSTEST/JobRequisitionApp/Pages/AddNewItem.aspx?Title=" + encodeURIComponent($(Title)) + "&PrimarySkills=" + encodeURIComponent($(PrimarySkills)) + "&SecondarySkills=" + encodeURIComponent($(SecondarySkills));
window.location.href = url;
}
I am checking querystring in addnewitem.aspx as below.
<script type="text/javascript">
var queryString = new Array();
$(function () {
if (queryString.length == 0) {
if (window.location.search.split('?').length > 1) {
var params = window.location.search.split('?')[1].split('&');
for (var i = 0; i < params.length; i++) {
var key = params[i].split('=')[0];
var value = decodeURIComponent(params[i].split('=')[1]);
queryString[key] = value;
}
}
}
if (queryString["Title"] != null && queryString["PrimarySkills"] != null) {
var data = "<u>Values from QueryString</u><br /><br />";
data += "<b>Title:</b> " + queryString["Title"] + " <b>PrimarySkills:</b> " + queryString["PrimarySkills"] + " <b>SecondarySkills:</b> " + queryString["SecondarySkills"];
$("#lblData").html(data);
alert(data);
}
});
</script>
"http://sites/APPSTEST/JobRequisitionApp/Pages/AddNewItem.aspx?Title=%5Bobject%20Object%5D&PrimarySkills=%5Bobject%20Object%5D&SecondarySkills=%5Bobject%20Object%5D"
I tried lot to fix this. May i know where i am doing wrong? Thanks for your help.
You should use the relative path in your url instead of hard coding the entire folder structure, which is probably incorrect since you are getting a 404. And you need to change the url every time you publish the site to the hosting enviroment when you hard code it like that.
So change
var url = "http://sites/APPSTEST/JobRequisitionApp/Pages/AddNewItem.aspx?Title=...
into
var url = "/AddNewItem.aspx?Title=...
if both the pages are in the same folder. Should AddNewItem.aspx be located in the Pages folder, you have to add that folder of course: var url = "/Pages/AddNewItem.aspx?Title=...

PhantomJS get JS refresh

I need to check every content change on a web site.
I wrote a PhantomJS function to get the whole web site. After receiving the web site I parse every row to get the data.
Now I want to hold the connection and receive all JS-updates.
What need I do to get the JS-updates?
I hope my question is clear.
Here some code explanations:
Pseudo HTML Website
<html>
<head>...</head>
<body>
<div class="value-123" onload="(function() { setInterval(retrieveData(...), 5000); })">
<!-- If retrieveData finished, VALUE will change from e.g. 1.23 to 4.235 -->
VALUE
</div>
</body>
</html>
My PhantomJS Code:
const string cEndLine = "All output received";
var sb = new StringBuilder();
var p = new PhantomJS();
p.OutputReceived += (sender, e) =>
{
if (e.Data == cEndLine)
{
callBack(sb.ToString());
}
else
{
sb.AppendLine(e.Data);
}
};
p.RunScript(#"
var page = require('webpage').create();
page.settings.loadImages = false;
page.viewportSize = { width: 1920, height: 1080 };
page.onLoadFinished = function(status) {
if (status=='success') {
setTimeout(function() {
console.log(page.content);
console.log('" + cEndLine + #"');
phantom.exit();
}," + waitAfterPageLoad.TotalMilliseconds + #");
}
};
var url = '" + url + #"';
page.open(url);", new string[0]);

is it possible to use javascript var i= [["j.php?i="+input]]; to send to php?

I am new and just doing practices i just did:
var i= [["j.php?i=1"]]; and it sent value to php and prints but when i want some input from user it does nothing. For example:
file j.js
var input = "hello";
var send= [["j.php?i="+input]];
and in j.php
<?php
$e=$_GET['i'];
echo $e;
?>
Any idea or i am totally wrong? I am trying to have some variation. And i really did with window.location.href. But I just want to know how to do in this way var send= [[]]. Thanks
Edit:
your question is not very clear, maybe try this:
var input = "hello";
var url = "/j.php?i=" + input;
var send = [[url]];
But, it's not clear to me what you want to do with your send variable...
Original
You have to send the data to the server either using a a tag (GET method), a form (POST method) or an Ajax request (any Http verb):
<!-- Here you give a unique id to your link -->
<a id="link" href="">your link</a>
<script>
var i = "Hello";
// wait for page to be loaded
document.addEventListener("DOMContentLoaded", function(event) {
// change the href attribute of the link with the id equal to 'link'
// with whatever data contained in the i variable
// this is done after the page is loaded
document.getElementById("link").href = "/j.php?i=" + i;
});
</script>
Then, when you just click the link, it will send the variable named i containgin the value Hello to your php page.
You could also do it in this way:
<a id="link2">your link</a>
<script>
var i = "HELLO";
// wait for page to be loaded
document.addEventListener("DOMContentLoaded", function(event) {
// here we are telling javascript to change the url
// in the browser when the user clicks the link
document.getElementById('link2').onclick = function() {
window.location = '/j.php?i=' + i
}
});
</script>

Temporarily unzip a file to view contents within a browser

I want to unzip a file that contains an html page, css, and js directories. I want to unzip this temporarily and view the html in an iFrame, preferrably. I am using jszip which is working. I got the html to load, but how do I add the image, js, and css folders into the iFrame?
Here is what I have so far...
<div id="jszip_utils"></div>
<iframe id="iframe"></iframe>
<script type="text/javascript">
function showError(elt, err) {
elt.innerHTML = "<p class='alert alert-danger'>" + err + "</p>";
}
function showContent(elt, content) {
elt.innerHTML = "<p class='alert alert-success'>loaded !<br/>" +
"Content = " + content + "</p>";
}
var htmltext = JSZipUtils.getBinaryContent("/zip/myWebsite.zip", function (err, data) {
var elt = document.getElementById('jszip_utils');
if (err) {
showError(elt, err);
return;
}
try {
JSZip.loadAsync(data)
.then(function (zip) {
for(var name in zip.files) {
if (name.substring(name.lastIndexOf('.') + 1) === "html") {
return zip.file(name).async("string");
}
}
return zip.file("").async("string");
})
.then(function success(text) {
$('#iframe').contents().find('html').html(text);
showContent(elt, text);
}, function error(e) {
showError(elt, e);
});
} catch(e) {
showError(elt, e);
}
});
</script>
This gets the html, but the js css and image files are not showing up. I believe I need to do some sort of fake routing, but I'm not sure how I would be able to do that. Thanks for your help.
If the html/js in the zip is not too complicated, for instance an AngularJS app that has routes for partials, this is possible.
The trick is to replace css,js,img src/href urls that point to a file in the zip with either:
Object Url: URL.createObjectURL(Blob or File object);
Data Url: data:[<mediatype>][;base64],<data>
Or in the case of js and css inject the content directly into the appropriate element
After replacing the src/href references than just inject the new html into the iframe.
Step 1: Parse the html so you can manipulate it
//html from a call like zip.file("index.html").async("string")
let parser = new DOMParser;
let doc = parser.parseFromString(html,"text/html");
Step 2: Find all elements with a relative path (e.g. /imgs/img.jpg) as they are easier to deal with as you can then use that path for zip.file
//Simply finds all resource elements, then filters all that dont start with '/'
var elements = jQuery("link[href],script[src],img[src]",doc).filter(function(){
return /^\//.test(this.href || this.src);
});
Step 3: Replace src,href with object url, data url, or direct content
//assume element is the html element: <script src="/js/main.js"></script>
zip.file(element.src).async("string").then(jsText=>{
element.src = "data:text/javascript,"+encodeURIComponent(jsText);
});
Step 4: Get the new html text and inject it into the iframe
let newHTML = doc.documentElement.outerHTML;
var viewer = document.querySelector('#iframeID');
viewer = viewer.contentWindow || viewer.contentDocument.document || viewer.contentDocument;
viewer.document.open();
viewer.document.write(html);
viewer.document.close();
JSFiddle Demo - Demonstrates replacing the src/href urls
As a security note, if you are using zip files that you do not know the contents of, you should run the whole app in a protected iframe

Categories

Resources