how to catch loaded script errors using puppeteer - javascript

im currently trying to catch an error occured by injected script.
what ive done
const path = require('path');
const puppeteer = require('puppeteer');
describe('Test page', () => {
it('should fail on script error', async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.addScriptTag({ path: path.join(__dirname, 'test.js') });
await browser.close();
});
});
my test.js
console.log(qwerty)
i need to work with scenario where my spec handles loaded script errors. Ive tried to watch for window errors within evaluate block const error = await page.evaluate(() => window.onerror = () => console.log('error')) but seems no results, also tried to catch this errors from puppeteer page like
page.on('pageerror', function(err) {
console.log(err);
});
i feel like im digging in a wrong context

Related

Puppeteer evaluate function on load to show alert

I am a newbie in puppeteer and I can't understand why the following code cannot work. any explanation would be appreciated.
const puppeteer=require("puppeteer");
(async () => {
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://bet254.com');
await page.evaluate(async ()=> {
window.addEventListener("load",(event)=>{
document.alert("Loaded!");
})
});
})();
I was expecting an alert after loading. But nothing happened! How can I add a listener to show an alert on page load?
page.goto already waits for the page to load, so by the time your evalute runs, you can't re-wait for the page to load, so the load event will never fire.
Another problem is that document.alert isn't a function. You may be thinking of document.write or window.alert. In any case, neither function is particularly useful for debugging, so I suggest sticking to console.log unless you have a very compelling reason not to.
When working with Puppeteer, it's important to isolate problems by running your evaluate code by hand in the browser without Puppeteer, otherwise you might have no idea whether it's Puppeteer or the browser code that's failing.
Anything logged in evaluate won't be shown in your Node stdout or stderr, so you'll probably want to monitor that with a log listener. You'll need to look at both Node and the browser console for errors.
Depending on what you're trying to accomplish, page.evaluateOnNewDocument(pageFunction[, ...args]) will let you attach code to evaluate whenever you navigate, which might be what you're trying for here.
Here's an example of alerting headfully:
const puppeteer = require("puppeteer"); // ^19.6.3
let browser;
(async () => {
browser = await puppeteer.launch({headless: false});
const [page] = await browser.pages();
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
alert("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Using console.log headlessly, with the log redirected to Node:
const puppeteer = require("puppeteer");
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
console.log("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
If all you're trying to do is run some code in the browser after load, then you might not even need to attach a listener to the load event at all:
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.goto("https://www.example.com", {waitUntil: "load"});
await page.evaluate(() => console.log("Loaded!"));
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Or if the code you want to run is purely Node just use normal control flow:
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto("https://www.example.com", {waitUntil: "load"});
console.log("Loaded!");
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
By the way, there's no need to make a function async unless you have await in it somewhere.
See also Puppeteer wait until page is completely loaded.

page.evaluate not returning response when scraping with Google Cloud Functions?

Puppeteer version: 9.0.0
Platform / OS version: google cloud functions
Node.js version: 14
const puppeteer = require('puppeteer')
exports.MyFunc = function MyFunc(req, res) {
MyFunc(req, res);
async function MyFunc(req, res) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await getSearchResults();
async function getSearchResults() {
const url = `https://abc.redacted.com/search?q=${query}&f=&orderBy=${sort_by}&skip=0&take=10`;
console.log(url);
await page.goto(url, { waitUntil: "domcontentloaded" });
console.log("Page downloaded"); // It console logs till here
const getResults = await page.evaluate(() => {
let items = [];
const results = document.querySelectorAll(
"#mainArea > router-view > ma-serp > div > div.results > div > compose > div > div.results > ma-card"
);
console.log(results);
for (let result of results) {
console.log(result.querySelector("span")?.innerText ?? "");
items.push({ title: result.querySelector("span")?.innerText ?? "", })
};
return items;
});
const data = getResults;
res.status(200).json(data); // just getting {}
await browser.close();
}
}
}
IDK why but page.evaluate() doesn't console log anything and doesn't return anything to node environment. From three days I'm trying different solutions on stack overflow and GitHub issues but no success until now.
I've also tried promise.resolve() when returning from page.evaluate but that doesn't work either.
When you run page.evaluate() you are actually operating within the browser context, not Node. So console.log outputs to the browser context and you will not see anything in your Node console.
Here is a workaround to get the browser context to output to the node console:
const page = await browser.newPage();
page.on('console', consoleObj => console.log(consoleObj.text()));
Reference: Puppeteer log inside page.evaluate

How to reload and wait for an element to appear?

I tried searching for this answer but there doesn't seem to be an answer on the Internet. What I want to do is use node js to reload a page until it finds the element with the query I want. I will be using puppeteer for other parts of the program if that will help.
Ok, I used functions from both answers and came up with this, probably unoptimized code:
const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("http://127.0.0.1:5500/main.html");
await page.waitForSelector("#buy-button");
console.log("worked");
} catch (err) {
console.log(`ERROR: ${err}`);
}
})();
But what I don't know how to do is to reload the page, and keep reloading until the id I want is there. For example, keep reloading youtube until the video you want is there(unpractical example, but I think it gets the point across).
Here's how I solved waiting for an element in puppeteer and reloading the page if it wasn't found;
async waitForSelectorWithReload(selector: string) {
const MAX_TRIES = 5;
let tries = 0;
while (tries <= MAX_TRIES) {
try {
const element = await this.page.waitForSelector(selector, {
timeout: 5000,
});
return element;
} catch (error) {
if (tries === MAX_TRIES) throw error;
tries += 1;
void this.page.reload();
await this.page.waitForNavigation({ waitUntil: 'networkidle0' });
}
}
}
And can be used as;
await waitForSelectorWithReload("input#name")
You can use "waitUntil: "networkidle2" to make sure the page is done loading. Obviously change the url, unless you are actually using evil.com
const puppeteer = require("puppeteer"); // include library
(async () =>{
const browser = await puppeteer.launch(); // run browser
const page = await browser.newPage(); // create new tab
await page.goto(
`http://www.evil.com`,
{
waitUntil: "networkidle2",
}
);
// do your stuff here
await browser.close();
})();
const puppeteer = require('puppeteer');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
page
.waitForSelector('#myId')
.then(() => console.log('got it'));
browser.close();
});

UnhandledPromiseRejectionWarning: ReferenceError: browser is not defined for azure login with puppeteer

I am trying to automate my application which is running on azure portal using puppeteer. And I am getting following error after entering the password it is not clicking the submit button.
node:55768) UnhandledPromiseRejectionWarning: ReferenceError: browser is not defined
Here is my sample code:
(async () => {
try {
const launchOptions = { headless: false, args: ['--start-maximized'] };
const browser = await puppeteer.launch(launchOptions);
const page = await browser.newPage();
await page.emulate(iPhonex);
await page.goto('https://apps.testpowerapps.com/play/72ff5b93-2327-404d-9423-92eedb44a287?tenantId=n082027');
//Enter User Name
const [userName] = await page.$x('//*[#id="i0116"]');
await userName.type("jyoti.m#azure.com");
const [loginButton] = await page.$x('//*[#id="idSIButton9"]');
await loginButton.press('Enter');
//Enter Password
const [passWord] = await page.$x('//*[#id="i0118"]');
await passWord.type("Pass123");
const [submitButton] = await page.$x('//*[#id="idSIButton9"]');
await submitButton.press('Enter');
//await page.keyboard.press('Enter');
}
catch(error){
console.error(error);
}
finally {
await browser.close();
}
})();
Tried with both way but not working only catch is the xpath for both the pages are same.
const [submitButton] = await page.$x('//*[#id="idSIButton9"]');
await submitButton.press('Enter');
//await page.keyboard.press('Enter');
any clue to resolve this.
You define the browser value in the try but you also use it in the catch. consts are block-scoped, so they are tied to the block, so a different block (the finally) can not see it.
Here is the problem:
try {
const browser = ...;
}
finally {
// different block!
await browser.close();
}
To solve this, move the browser out of the try-catch:
const browser = ...
try {
}
finally {
await browser.close();
}
This way it's available in the finally block.

How to get all html data after all scripts and page loading is done? (puppeteer)

Finally I figured how to use Node.js. Installed all libraries/extensions. So puppeteer is working, but as it was previous with Xmlhttp... it gets only template/body of the page, without needed information. All scripts on the page engage after few second it had been opened in browser (Web app?). I need to get information inside certain tags after Whole page is loaded. Also, I would ask, if it possible to have pure JavaScript, because I do not use jQuery like code. So it doubles difficulty for me...
Here what I have so far.
const puppeteer = require('puppeteer');
const $ = require('cheerio');
let browser;
let page;
const url = "really long link with latitude and attitude";
(async () => puppeteer
.launch()
.then(await function(browser) {
return browser.newPage();
})
.then(await function(page) {
return page.goto(url).then(function() {
return page.content();
});
})
.then(await function(html) {
$('strong', html).each(function() {
console.log($(this).text());
});
})
.catch(function(err) {
//handle error
}))();
I get only template default body elements inside strong tag. But it should contain a lot more data than just 10 items.
If you want full html same as inspect? Here it is:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(() => document.querySelector('*').outerHTML);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
let bodyHTML = await page.evaluate(() => document.documentElement.outerHTML);
This
Some notes:
You need not cheerio with puppeteer and you need not reparse page.content(): you already have the full DOM with all scripts run and you can evaluate any code in window context like in a browser using page.evaluate() and transferring serializable data between web API context and Node.js API context.
Try to use async/await only, this will simplify your code and flow.
If you need to wait till all the scripts and other dependencies are loaded, use waitUntil: 'networkidle0' in page.goto().
If you suspect that document scripts need some time till the needed state, use various test functions like page.waitForSelector() or fall back to page.waitFor(milliseconds).
Here is a simple script that outputs all tag names in a page.
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(
() => Array.from(document.querySelectorAll('*'))
.map(elem => elem.tagName)
);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
You can specify your task in more details and we can try to write something more appropriate.
Script for www.bezrealitky.cz (task from a comment below):
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
page.setDefaultTimeout(0);
await page.goto('https://www.bezrealitky.cz/vyhledat?offerType=pronajem&estateType=byt&disposition=&ownership=&construction=&equipped=&balcony=&order=timeOrder_desc&boundary=%5B%5B%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.154133576294%2C%22lng%22%3A14.599004629591036%7D%2C%7B%22lat%22%3A50.14524430128%2C%22lng%22%3A14.58773054712799%7D%2C%7B%22lat%22%3A50.129307131988%2C%22lng%22%3A14.60087568578706%7D%2C%7B%22lat%22%3A50.122604734575%2C%22lng%22%3A14.659116306376973%7D%2C%7B%22lat%22%3A50.106512499343%2C%22lng%22%3A14.657434650206028%7D%2C%7B%22lat%22%3A50.090685542974%2C%22lng%22%3A14.705099547441932%7D%2C%7B%22lat%22%3A50.072175921973%2C%22lng%22%3A14.700004206235008%7D%2C%7B%22lat%22%3A50.056898491904%2C%22lng%22%3A14.640206899053055%7D%2C%7B%22lat%22%3A50.038528576841%2C%22lng%22%3A14.666852728301023%7D%2C%7B%22lat%22%3A50.030955909657%2C%22lng%22%3A14.656128752460972%7D%2C%7B%22lat%22%3A50.013435368522%2C%22lng%22%3A14.66854956530301%7D%2C%7B%22lat%22%3A49.99444182116%2C%22lng%22%3A14.640153080292066%7D%2C%7B%22lat%22%3A50.010839032542%2C%22lng%22%3A14.527474219359988%7D%2C%7B%22lat%22%3A49.970771602447%2C%22lng%22%3A14.46224174052395%7D%2C%7B%22lat%22%3A49.970669964027%2C%22lng%22%3A14.400648545303966%7D%2C%7B%22lat%22%3A49.941901176098%2C%22lng%22%3A14.395563234671044%7D%2C%7B%22lat%22%3A49.948384148423%2C%22lng%22%3A14.337635637038034%7D%2C%7B%22lat%22%3A49.958376114735%2C%22lng%22%3A14.324977842107955%7D%2C%7B%22lat%22%3A49.9676286223%2C%22lng%22%3A14.34491711110104%7D%2C%7B%22lat%22%3A49.971859099005%2C%22lng%22%3A14.326815050839059%7D%2C%7B%22lat%22%3A49.990608728081%2C%22lng%22%3A14.342731259186962%7D%2C%7B%22lat%22%3A50.002211140429%2C%22lng%22%3A14.29483886971002%7D%2C%7B%22lat%22%3A50.023596577558%2C%22lng%22%3A14.315872285282012%7D%2C%7B%22lat%22%3A50.058309376419%2C%22lng%22%3A14.248086830069042%7D%2C%7B%22lat%22%3A50.073179111%2C%22lng%22%3A14.290193274400963%7D%2C%7B%22lat%22%3A50.102973823639%2C%22lng%22%3A14.224439442359994%7D%2C%7B%22lat%22%3A50.130060800171%2C%22lng%22%3A14.302396419107936%7D%2C%7B%22lat%22%3A50.116019827009%2C%22lng%22%3A14.360785349547996%7D%2C%7B%22lat%22%3A50.148005694843%2C%22lng%22%3A14.365662825877052%7D%2C%7B%22lat%22%3A50.14142969454%2C%22lng%22%3A14.394903042943952%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%5D%5D&hasDrawnBoundary=1&mapBounds=%5B%5B%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%5D%5D&center=%7B%22lat%22%3A50.16447196305031%2C%22lng%22%3A14.387521875272125%7D&zoom=11&locationInput=praha&limit=15');
await page.waitForSelector('#search-content button.btn-icon');
while (await page.$('#search-content button.btn-icon') !== null) {
const articlesForNow = (await page.$$('#search-content article')).length;
console.log(`Articles for now: ${articlesForNow}. Getting more...`);
await Promise.all([
page.evaluate(
() => { document.querySelector('#search-content button.btn-icon').click(); }
),
page.waitForFunction(
old => document.querySelectorAll('#search-content article').length > old,
{},
articlesForNow
),
]);
}
const articlesAll = (await page.$$('#search-content article')).length;
console.log(`All articles: ${articlesAll}.`);
fs.writeFileSync('full.html', await page.content());
fs.writeFileSync('articles.html', await page.evaluate(
() => document.querySelector('#search-content div.b-filter__inner').outerHTML
));
fs.writeFileSync('articles.txt', await page.evaluate(
() => [...document.querySelectorAll('#search-content article')]
.map(({ innerText }) => innerText)
.join(`\n${'-'.repeat(50)}\n`)
));
console.log('Saved.');
await browser.close();
} catch (err) {
console.error(err);
}
})();
Just one line:
const html = await page.content();
Details:
import puppeteer from 'puppeteer'
const test = async (url) => {
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto(url, { waitUntil: 'networkidle0' })
const html = await page.content()
console.log(html)
}
await test('https://stackoverflow.com/')

Categories

Resources