Having trouble downloading dynamic web content with PhantomJS - javascript

My goal is to download the dynamic web content of a website, so javascript is necessary to be executed on the received content. The code that I am currently using with PhantomJS 2.1 is the following:
var page = require('webpage').create();
var fs = require('fs');
page.open('https://sports.bovada.lv/soccer/premier-league', function () {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
page.evaluate(); // Edit: this line is removed
page.close();
});
});
page.onLoadFinished = function() {
console.log("Download finished");
fs.write('test.html', page.content, 'w');
phantom.exit(0);
};
The code is saving the received page as "test.html", but unfortunately it is not loading the full page content as it does with a web browser. I would appreciate if someone could help me out.
Website used for testing: https://sports.bovada.lv/soccer/premier-league

The issue could be that your're exiting too soon. Try delaying script termination:
page.onLoadFinished = function() {
console.log("Download finished");
fs.write('test.html', page.content, 'w');
setTimeout(function(){
phantom.exit(0);
}, 1000);
};

Related

How to get HTML content using PhantomJS after X seconds?

How to get the content of website after 10 seconds using PhantomJS? In my website for example, I have script that do setTimeout then change the DOM. I need to feach the website html with that change.
I can't find any working answers.
Try this code:
var page = require('webpage').create();
var fs = require('fs');
page.open('http://example.com', function(status) {
console.log('Page load status: ' + status);
if (status === 'success') {
setTimeout(function(){
fs.write('example.html', page.content, 'a');
console.log('Page saved');
phantom.exit();
}, 10000);
}
});

firefox addon sdk: get page load time

When using this Firefox Addon SDK example:
var tabs = require("sdk/tabs");
tabs.open({
url: "http://www.example.com",
onReady: runScript
});
function runScript(tab) {
tab.attach({
contentScript: "document.body.style.border = '5px solid red';"
});
console.log(tab.title);
}
How can I get the page load time and print it to the console?
(like the app.telemetry Addon does - http://www.apptelemetry.com/de/page-speed-monitor.html)
Just use Navigation Timing API:
window.onload = function(){
setTimeout(function(){
var t = performance.timing;
console.log(t.loadEventEnd - t.responseEnd);
}, 0);
}
Notice that it's a Javascript API so it must run inside a Content Script (ie, it won't work inside your lib/main.js or index.js file).

Extracting dynamic content with node.js and PhantomJS

I want to console.log the content of web page with nodejs and phantomjs. This is my code:
var phantom = require('phantom');
phantom.create(function(ph) {
return ph.createPage(function(page) {
return page.open("http://zehinz.com/test.html", function(status) {
if (status === 'success') {
//console.log the content of page with Javascript executed ???
} else {
console.log('some error');
ph.exit();
}
});
});
});
How can I output the dynamically rendered content of web page?
In plain PhantomJS one would use page.content, but since you're using a bridge, the content property has to be explicitly fetched from the PhantomJS process in the background. You can do this with page.get.
In your case, this is
page.get('content', function(content){
console.log("Content", content);
ph.exit();
});

Add/inject image to DOM via PhantomJS

I'm trying to dynamically add an image to the DOM of a loaded page, but it's not showing up when rendering the page.
In page.evaluate in modify the DOM like this (excerpt):
page.open(url, function(status) {
...
window.setTimeout(function () {
...
page.evaluate(function() {
...
var myimg = document.createElement("img");
myimg.setAttribute('src', 'http://www.foobar.com/fooimage.png');
myimg.setAttribute('height', '41px');
myimg.setAttribute('width', '80px');
outerdiv.appendChild(myimg); // outerdiv is visible in the rendered output
document.body.appendChild(outerdiv);
...
}
page.render
Debugging page.content shows that it's successfully added, but page.render does not show it (only the outerdiv it's appended to). Instead of using an external URL src I also tried a base64 encoded string with no luck. I also omitted the path and stored the file inside PhantomJS' include path. None of this 3 seems to work.
I also have a window.timeout of 2000 so I don't think it's an issue of rendering the page before the PNG is loaded.
What would be the proper way to add the src? External URL, local file? Why isn't even adding a base64 encoded image working? Are any security limitations blocking what I'm trying to do? I'm running PhantomJS 1.9.0 btw.
I cannot reproduce your problem with this complete script.
var page = require('webpage').create();
var url = "http://phantomjs.org/img/phantomjs-logo.png";
page.open("http://example.com", function (status) {
if (status !== 'success') {
console.log('Unable to access network');
phantom.exit();
} else {
page.render("without.png");
page.evaluate(function(url){
var myimg = document.createElement("img");
myimg.setAttribute('src', url);
document.body.appendChild(myimg);
}, url);
setTimeout(function(){
page.render("with.png");
phantom.exit();
}, 5000);
}
});
I tried it with PhantomJS 1.9.0 and 1.9.8 with exactly the same result.
I came across the same problem. When I am using page.evaluate() to add images, they are not getting loaded, but content is getting changed.
If I do the same with page.content, images are getting loaded and working as expected.
var page = require('webpage').create();
page.onLoadFinished = function() {
console.log('');
console.log('load finished');
var render = page.render('evaluateTesting.png');
}
page.open('about:blank', function(status) {
console.log('');
console.log('open callback');
//page reloading after this line
page.content = '<!DOCTYPE html><head></head><body><img src="image path"></img></body>';
/*
* this is not causing page reload
* but content is changing
page.evaluate(function() {
console.log('');
console.log('evaluating');
var body = document.getElementsByTagName('body');
body = body[0];
body.innerHTML = '<img src="file:///home/ravitejay/projects/testappDup/sample.png"></img><br>';
})*/
console.log('content : '+page.content);
})

HTML5 WebWorker and Rails 3 Error

I am trying to implement an HTML5 WebWorker and I am using rails 3. I have this in my js code:
//When body(DOM) is ready
$(function() {
....
..
//test html5 webworkers
$('#test_html5_workers').click(function() {
var worker=new Worker('play.js');
worker.addEventListener('message', function(e) {
console.log('Worker said: ', e.data);
}, false);
worker.postMessage('Hello World'); // Send data to our worker.
});
..
....
)};
and in another file I have this (in play.js)
self.addEventListener('message', function(e) {
self.postMessage(e.data);
}, false);
I am currently getting this error:
Script file not found: play.js
I have tried other file path names with no luck.
Any ideas why I am getting this since both files are in the same directory, assets/javascripts?
I am using Firefox 17.0.1
The javascript file (play.js) should go in the public folder, or at least that's what I did to get it working.
This works on mine.
(function() {
//test html5 webworkers
$('#test').on('click', function(){
if(window.Worker){
var worker = new Worker('test.js');
worker.onmessage = function(e){
alert('worker said: ' + e.data);
}
worker.postMessage('start');
}
});
})();
//test.js
self.onmessage = function(e){
if(e.data === 'start'){
self.postMessage('booyeah');
}
}

Categories

Resources