I'm trying to parse the status page of my router to get the number of wlan devices. The page uses some JavaScript to get the status, so I tried to use PhantomJS, but had no luck.
This is the html source of the status page (status.html and status.js): http://pastebin.com/dmvptBqv
The developer tools of my browser show me this output on the console (anonymized):
([
{"vartype":"value","varid":"device_name","varvalue":"Speedport W 921V"},
{"vartype":"value","varid":"factorydefault","varvalue":"1"},
{"vartype":"value","varid":"rebooting","varvalue":"0"},
{"vartype":"value","varid":"router_state","varvalue":"OK"},
{"vartype":"value","varid":"bngscrat","varvalue":"0"},
{"vartype":"value","varid":"acsreach","varvalue":"0"},
Full reference
How can I get this evaluated output out of PhantomJS? Maybe it is very simple and I just missed the part in the documentation.
I think that i have to use the evluate function, but have no idea what is the correct function for the document object to return the complete evaluation.
var webPage = require('webpage');
var page = webPage.create();
page.open('blubb', function (status) {
var js= page.evaluate(function() {
return document.???;
});
console.log(js);
phantom.exit();
});
The main problem that you have is to get the console messages from the page into a single structure that you can do further processing on. This is easily done with the following code which waits indefinitely until the first console message appears and stops waiting as soon as no further messages appeared during 1 second.
var logs = []
timeoutID;
page.onConsoleMessage = function(msg){
if (timeoutID) clearTimeout(timeoutID);
logs.push(msg); // possibly also further processing
timeoutID = setTimeout(function(){
page.onConsoleMessage = function(msg){
console.log("CONSOLE: " + msg);
};
// TODO: further processing
console.log(JSON.stringify(logs, undefined, 4));
phantom.exit();
}, 1000);
};
page.open(url); // wait indefinitely
If each msg is valid JSON, then you can parse it immediately to get JavaScript objects. Change
logs.push(msg);
to
logs.push(JSON.parse(msg));
Related
Here is what I have going on. I have a rPI that launches chrome into three tabs that I have set using xdotool to cycle between the three tabs. Everything is working great with that functionality, but I am looking to have it stop cycling and stay on one of the tabs when an event on that website happens. I have the code done to go back to that tab and stay there for x-amount of time. What I need help with is getting the code to recognize the event happening. I have watched the console when the event occurs and there is a log of the function call as well as the object that is passed from the JS code. If there is a way to monitor that console log real-time in the background and catch that function call being printed to the log then I could use that to fire the rest of the logic to lock the screen to that tab.
Or if anyone can come up with a different/easier plan that would be greatly appreciated. When the function call happens there is a list of names that displays on the website. Maybe we could check that list for any name and then lock the screen.
I tried to use selenium to grab the logs. I was able to get it to start chrome and then go to the website and pull up the logs. That worked as it was supposed to from the documentation that I have read. The problem is I need something to run on an already running instance of chrome. Maybe have it in the code that when it goes to the tab where the function would be called it would check the log and execute code, not launch and then close an instance of chrome.
If there is a way to monitor that console log real-time in the
background and catch that function call being printed to the log
There is (though not in the background). Here's how you can do it
function myConsoleLogFunc(info) {
// examine the info being logged
this.log(info);
}
myConsoleLogFunc.log = console.log;
console.log = myConsoleLogFunc;
So I was able to get the answer I was looking for with puppeteer and some navigation and bash scripts. Below is the code that I used to complete the task.
const puppeteer = require('puppeteer-core');
async function start(){
const browser = await puppeteer.launch({executablePath: '/usr/bin/chromium-browser'}); //launch browser window in bg
const page = await browser.newPage(); //get new page in browser
await page.setViewport({width: 1280, height: 800}); //set window size
await page.goto('https://auth.iamresponding.com/login/member'); //open i am responding page
await page.click('#accept-policy'); //click accept cookies
await page.type('#Input_Agency', '#########'); //input agency name
await page.type('#Input_Username', '#########'); //input user name
await page.type('#Input_Password', '#########'); //input password
await Promise.all([page.click('button[name="Input.button"'), page.waitForNavigation()]) //click login button and wait for new page to load
var messageTest = "" // var to hold console message for testing
var testDone = false
var loaded = 0 // var to only fire code on first pushrespond notice
page.on('console', message => { //get the console logs from the browser and pass them to the test method
messageTest = message.text()
console.log(messageTest)
testDone = testValue(messageTest)
})
function testValue(cLog){ //method to test the console message for responding and clear responding
if (cLog.includes("pushrespond")) { //check to see if value is pushrespond
loaded += 1 // if it is increment the loaded var
if (loaded == 1){ // check if loaded = 1 and if so open new chrome window and execute login
require('child_process').exec('sh /home/pi/open.sh',
(error, stdout, stderr) => {
console.log(stdout);
console.log(stderr);
if (error !== null) {
console.log(`exec error: ${error}`);
}
});
return //return out of the method
}else {
return // if loaded is more than one return out of method without doing anything
}
return
}else if (cLog.includes("pushautoclear")){ //check to see if console message is push autoclear
if(loaded >= 1){ //make sure that there is a valid window to close out of as to not close main browser if no one was responding
require('child_process').exec('sh /home/pi/exit.sh', //close the window that was launched on responding
(error, stdout, stderr) => {
console.log(stdout);
console.log(stderr);
if (error !== null) {
console.log(`exec error: ${error}`);
}
});
loaded = 0 //reset loaded to 0 so all functions work properly on next iteration
}else{
return
}
return
}else{ //exit out of the method if message does not contain pushrespond or pushautoclear
return
}
}
}
I am loading a page, intercepting its requests, and when a certain element shows up I stop loading and extract the data I need...
Here is the problem that I am faced with.
When simplified the code looks actually something like this:
async function loadPage()
{
var contentLoaded = false;
var content;
//now i say when element shows up, do something
// it is page.waitForSelector but for simplicity, i use a timeout
// because the problem is the same
//i set it to "show up" in 10 seconds here.
//when it shows up, it sets the content to 100 (extracts the content i want)
//and stores it..
setTimeout(()=>{
content = 100;
contentLoaded = true;
},10000)
//Here i have a function that loads the page
//Intercepts request and handles them
//Until content is loaded
page.on('request', req =>{
if(!contentLoaded)
{
// keep loading page
}
})
// this is the piece of code i would like to not run,
// UNTIL i either get the data, or a timeout error
// from page.waitForSelector...
//but javascript will run it if it's not busy with the
//loading function above...
// In 10 seconds the content shows
// and it's stored in DATA, but this piece of code has
// already finished by the time that is done...
// and it returns false...
if(contentLoaded)
{return content}
else
{return false}
}
var x = loadPage();
x.then(console.log); //should log the data or false if error occured
Thank you all for taking the time to read this and help out, I'm a novice so any feedback or even reading material is welcome if you think there is something I'm not fully understanding
Solved
Simple explanation:
Here is what I was trying to accomplish:
Intercept page requests so that I can decide what not to load, and speedup loading
Once an element shows up on the page, i want to extract some data and return it.
I was trying to return it like this: (note, all the browser and error handling will be left out in these since it would just clutter the explanation)
var data = loadPage(url);
async function loadPage(URL)
{
var data;
page.waitForSelector(
var x = //page.evaluate returns data to x...
data = x;
)
return data;
}
Which doesn't work since return runs immediately but waitForSelector runs later, so we always return undefined...
The correct way of doing it, or rather the way it works for me is to return the whole promise, and then extract the data...
var data = loadPage(url);
data.then(//do what needs to be done with the data);
async function loadPage(URL)
{
var data = page.waitForSelector(
var x = //page.evaluate returns data to x...
data = x;
)
return data; // we return data as a promise
}
I hope it's a solid enough explanation, if someone needs to see the whole deal, I could edit the question and place the whole code there...
I am writing an overlay UI for one application using Greasemonkey, injecting JS to a page.
Now I have issue to hook on some ajax calls that runs on the webpage.
However, the page is generating console logs, informing that content is fully loaded. My question is:
In plain javascript, is there a possibility of reading console output generated by completely different script?
function blackBox(){
//this is generating come console output.
//this runs on code layer I have no access to.
}
function readConsole(){
//read console record by record
if(console.msg == "something I want"){
doSomething();
}
}
Thanks in advance :)
As others have stated, you can override the console.log() function with your own, and implement your own implementation:
var oldLog = unsafeWindow.console.log;
var messages = [];
unsafeWindow.console.log = function(msg) {
messages.push(msg);
oldLog.apply(null, arguments);
}
// Access the entire console.log history in messages
Redefine console.log and storage messages on a array:
var consoleStorage = [];
console.log = function(msg){
consoleStorage.push(msg);
console.warn(msg); // if you need to print the output
}
Now you can read all logs:
function readConsole(){
//read console record by record
consoleStorage.forEach(msg => {
doSomething(msg);
});
}
Even if it were possible, that would be bad practice. The console is for human eyes to see, when debugging. It usually takes several milliseconds to execute console.log, so executing it in production code is not a good idea.
I would say programming in booleans (like ifDidX and ifDidY) would be better. But if you really had to do this message-history thing, a better alternative would be to store messages in some other array. Here is an example:
var messagesLog = [];
//Since console.log takes several milliseconds to execute, you should tick this to false in production code.
const logToConsole = true;
function logMessage( message ){
//adds message to JS log.
messagesLog.push(message);
//performs console.log if not production code
if(logToConsole){
console.log(message);
}
}
logMessage("pizza");
logMessage("second to last message");
logMessage("last message");
//checks last message and this will be true
if( messagesLog[messagesLog.length - 1] == "last message" ){
logMessage("The last message in the log was 'last message'");
}
//checks last message and this will be false
if( messagesLog[messagesLog.length - 1] == "pizza" ){
logMessage("This will not occur because the last message was 'last message'");
}
//searches through log to find certain message
for(let i = 0; i < messagesLog.length; i++){
if(messagesLog[i] == "pizza"){
logMessage("one of the messages in the log was pizza");
}
}
Directly there's no way to access console outputs, but what you can do is that you can override console.log in a way where it checks for you condition first and then outputs the content, below is a sample code for that
console.stdlog = console.log.bind(console);
console.log = function(msg){
if(msg == "something I want"){
...
}
console.stdlog.apply(console, arguments);
}
Although you'll need to be very careful with this since if you add any console.log in that condition, it will create an infinite loop.
I am trying to get the onPrompt callback working with PhantomJS.
Just for testing I have a basic angular application that prompts the user on initialization and displays that data on the page.
It works fine when I enter the information into the prompt manually, but it will not work when using the PhantomJS onPrompt callback.
Here's the angular app:
angular
.module('app')
.controller('test', TestController)
function TestController() {
var vm = this;
vm.$onInit = onInit;
vm.testData = '';
function onInit() {
vm.testData = prompt('Name?');
}
}
This is the code I running with PhantomJS
var page = require('webpage').create();
page.open('http://localhost:3000', function() {
console.log('test')
page.onPrompt = function(msg, defaultVal) {
console.log("MESSAGE", msg)
return "Dog";
};
page.render('test.pdf');
phantom.exit();
});
I would expect to get a console.log that says "MESSAGE" and the text from the prompt and also a screenshot of my page with "Dog" displayed.
I get a screenshot of a blank page and no console log.
I would ideally like to use this callback with node webshot as an option. webshot phantom callbacks
Thanks for the help.
There is a context in which interaction with the actual interaction with the page takes place - usually within the page.evaluate(..) function. The onPrompt callback will execute within that context and will not print to the console as you expect - I think you need to marshal the output back to the console by setting the onConsoleMessage callback first:
page.onConsoleMessage = function (msg) {
console.log(msg);
};
And then it should print to the console as you expect :)
I have 2 code samples as following:
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
phantom.exit(); // case 1: inside page.evaluate()
});
});
});
-> In this case, phantom.exit() is put inside page.evaluate(), and error appears: "ReferenceError: Can't find variable: phantom"
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
});
phantom.exit(); // case 2: outside page.evaluate()
});
});
-> In case 2: phantom.exit() is put outside page.evaluate(). There is no error, but 'here' is not printed out.
=> Neither of 2 above ways can print out 'here'. Is there any effective way to print 'here' without error?
You can't use any of the phantom method inside of the page, in the same way the page itself can't use them.
The phantom object is a special host object provided by the runtime to the script it initially launches. Once a page has been loaded, the page runs in its own context with the usual browser variables, which do not include phantom.
You need to wait for the page.evaluate to call back to the phantom script, then call exit from there. The Phantom docs show this exact case, with exit being called after evaluate has returned. They don't seem to cover whether evaluate is blocking or not, but the example suggests that it may be.
To capture the log messages from the page, you need to subscribe to its onConsoleMessage event. The Phantom console only picks up messages from the script it is running, not any pages that it may load. All you need to do is route page messages to the phantom console:
page.onConsoleMessage = function(msg) {
console.log('PAGE: ' + msg);
};
Try returning required data back and do console.log() after evaluation
Example
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
return "here";
});
console.log(t);
phantom.exit(); // case 2: outside page.evaluate()
});
});