In the following code snippet, I try to click a button (after some Timeout) within the page.evaluate function. It does not work. Yet, when I open the console in the launched browser and manually type const btn = document.querySelectorAll("form button")[1]; btn.click() it does.
Can anyone explain to me the cause of this difference in behavior and how to fix it?
Here's a minimal reproducible example:
import { resolve } from 'path';
import puppeteer from 'puppeteer'
//go to page and handle cookie requests
const browser = await puppeteer.launch({defaultViewport: {width: 1920, height: 1080},
headless:false, args: ['--start-maximized']});
const page = await browser.newPage();
const url = "https://de.finance.yahoo.com/";
await page.goto(url);
await page.waitForSelector("div.actions");
await page.evaluate( () => {
let z= document.querySelector("div.actions"); z.children[4].click()
})
await page.waitForSelector("input[id=yfin-usr-qry]");
await page.evaluate( () => {let z= document.querySelector("input[id=yfin-usr-qry]");
z.value = "AAPL"; const btn = document.querySelectorAll("form button")[1];
return new Promise((resolve) => setTimeout(() => {btn.click();resolve()},1000))})
})
The form button selector appears to be incorrect, selecting a non-visible element with class .modules_clearBtn__uUU5h.modules_noDisplay__Qnbur. I'd suggest selecting by .finsrch-btn or #UH-0-UH-0-Header .finsrch-btn if you have to select this, but it's not really necessary, so I won't use it in my suggested solution below.
Beyond that, I'd tighten up some of the selectors, skip the timeout and prefer using trusted Puppeteer events when possible.
I'm not sure what data you want on the final page but this should give you a screenshot of it, showing all of the content:
const puppeteer = require("puppeteer"); // ^18.0.4
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
const $ = (...args) => page.waitForSelector(...args);
const url = "https://de.finance.yahoo.com/";
await page.goto(url, {waitUntil: "domcontentloaded"});
await (await $('button[name="agree"]')).click();
const input = await $("#yfin-usr-qry");
await input.type("AAPL");
await page.keyboard.press("Enter");
await $("#AAPL-interactive-2col-qsp-m");
await page.evaluate("scrollTo(0, document.body.scrollHeight)");
await $("#recommendations-by-symbol");
await page.screenshot({path: "aapl.png", fullPage: true});
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
That said, rather than navigating to the homepage, typing in a search, then pressing a button, you could consider building the URL directly, e.g. https://de.finance.yahoo.com/quote/${symbol} and navigating right to it. This is generally faster, more reliable, and easier to code.
Related
I'm triyng to get my puppeteer to login with my gmail on zalando. Im using the id for the button so it can typ my gmail into it but it just doesn't want to. Can you help me?
This is where the id, class etc is:
<input type="email" class="cDRR43 WOeOAB _0Qm8W1 _7Cm1F9 FxZV-M bsVOrE
mo6ZnF dUMFv9 K82if3 LyRfpJ pVrzNP NN8L-8 QGmTh2 Vn-7c-"
id="login.email" data-testid="email_input" name="login.email" value=""
placeholder="E-postadress" autocomplete="email">
This is my code:
const puppeteer = require('puppeteer');
const product_url = "https://www.zalando.se/nike-sportswear-air-flight-lite-mid-hoega-sneakers- whiteblack-ni112n02z-a11.html"
const cart = "https://www.zalando.se/cart"
async function givePage(){
const browser = await puppeteer.launch({headless: false})
const page = await browser.newPage();
return page;
}
async function addToCart(page){
// going to website
await page.goto(product_url)
// clicking "handla"
await page.waitForSelector("button[class='DJxzzA u9KIT8 uEg2FS U_OhzR ZkIJC- Vn-7c- FCIprz heWLCX JIgPn9 LyRfpJ pxpHHp Md_Vex NN8L-8 GTG2H9 MfX1a0 WCjo-q EKabf7 aX2-iv r9BRio mo6ZnF PLvOOB']");
await page.click("button[class='DJxzzA u9KIT8 uEg2FS U_OhzR ZkIJC- Vn-7c- FCIprz heWLCX JIgPn9 LyRfpJ pxpHHp Md_Vex NN8L-8 GTG2H9 MfX1a0 WCjo-q EKabf7 aX2-iv r9BRio mo6ZnF PLvOOB']", elem => elem.click());
// clicking "OK" to cookies
await page.waitForSelector("button[class='uc-btn uc-btn-primary']");
await page.click("button[class='uc-btn uc-btn-primary']", elem => elem.click());
// clicking "size EU 41"
await page.evaluate(() => document.getElementsByClassName('_6G4BGa _0Qm8W1 _7Cm1F9 FxZV-M IvnZ13 Pb4Ja8 ibou8b JT3_zV ZkIJC- Md_Vex JCuRr_ na6fBM _0xLoFW FCIprz pVrzNP KRmOLG NuVH8Q')[4].click());
console.log("körs")
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
// going to "cart"
await page.goto(cart)
// clicking "gå till checkout"
await page.waitForSelector("button[class='z-1-button z-coast-base-primary-accessible z-coast-base__sticky-sumary__cart__button-checkout z-1-button--primary z-1-button--button']");
await page.click("button[class='z-1-button z-coast-base-primary-accessible z-coast-base__sticky-sumary__cart__button-checkout z-1-button--primary z-1-button--button']", elem => elem.click());
}
async function Login(page){
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
await page.type("input[id='login.email']", 'david.exartor#gmail.com');
}
async function checkout(){
var page = await givePage();
await addToCart(page);
await Login(page);
}
checkout();
I've tried using the other things such as the name, class and testid but still no success. I was expecting that something would work but nothing did.
You're missing waiting for that input selector:
const uname = await page.waitForSelector("[id='login.email']");
await uname.type('david.exartor#gmail.com');
Suggestions/notes:
This code:
await page.click("button[class='uc-btn uc-btn-primary']", elem => elem.click());
can just be:
await page.click("button[class='uc-btn uc-btn-primary']");
The second argument is supposed to be an options object, not a callback. If you want to trigger a native click, use:
await page.$eval("button[class='uc-btn uc-btn-primary']", el => el.click());
When I run into trouble automating a login, I often add a userDataDir and pop open a browser session so I can log in to the site manually.
Try to avoid sleeping. It slows down your script and can lead to random failures. Pick tighter predicates like waitForSelector or waitForFunction and encode the exact condition you're waiting on.
Luckily, your attempts at sleeping don't actually do much of anything:
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
This just logs to the browser console after a second but doesn't block in Puppeteer. The async keyword isn't necessary. To actually sleep in the browser, you could do:
await page.evaluate(() => new Promise(r => setTimeout(r, 1000)));
or just sleep in Node:
await new Promise(r => setTimeout(r, 1000));
If you run console.log(await page.content()) headlessly, you'll see the site is detecting you as a bot and not returning the login page. The canonical is Why does headless need to be false for Puppeteer to work? if you plan to run headlessly in the future.
The givePage function leaks a browser handle, hanging the process. Better to write your script without abstractions until you have everything working, then factor out abstractions. My usual boilerplate is something like:
const puppeteer = require("puppeteer");
const scrape = async page => {
// write your code here
const url = "https://www.example.com";
await page.goto(url, {waitUntil: "domcontentloaded"});
console.log(await page.title());
};
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await scrape(page);
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Be extremely careful with your [class="foo bar baz"] selectors. These are rigid and overly-precise relative to the preferred .foo.bar.baz version. The former is an exact match, so if another class shows up or the order of the classes change, your script will break. Here's an example of the problem:
const puppeteer = require("puppeteer"); // ^19.0.0
const html = `<p class="foo bar">OK</p>`;
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.setContent(html);
const p = (...args) => console.log(...args);
const text = sel => page
.$eval(sel, el => el.textContent)
.catch(err => "FAIL");
// Good:
p(await text(".foo.bar")); // => OK
p(await text(".bar.foo")); // => OK
p(await text(".foo")); // => OK
p(await text(".bar")); // => OK
// Works but verbose:
p(await text('[class~="foo"][class~="bar"]')); // => OK
// Works but brittle:
p(await text('[class="foo bar"]')); // => OK
// Special cases that are sometimes necessary:
p(await text('[class^="foo "]')); // => OK
p(await text('[class$=" bar"]')); // => OK
p(await text('[class*="fo"]')); // => OK
// Fails:
p(await text('[class="foo"]')); // => FAIL
p(await text('[class="bar"]')); // => FAIL
p(await text('[class="bar foo"]')); // => FAIL
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
The [attr=""] selector is suitable in uncommon situations when you need to test semantics like "begins with", "ends with", "substring" or in a very rare case where you actually need to distinguish between class="foo bar" and class="bar foo", which I've never had to do before.
Be careful with overly-specific selectors like .foo.bar.baz.quux.garply.corge. If you can distinguish that element with a simple .foo or a #baz .foo, just use that in most circumstances. Related: overusing browser-generated selectors and Convenient way to get input for puppeteer page.click().
Block images and extra resources to speed up your script once you get the basic functionality working.
I am a newbie in puppeteer and I can't understand why the following code cannot work. any explanation would be appreciated.
const puppeteer=require("puppeteer");
(async () => {
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://bet254.com');
await page.evaluate(async ()=> {
window.addEventListener("load",(event)=>{
document.alert("Loaded!");
})
});
})();
I was expecting an alert after loading. But nothing happened! How can I add a listener to show an alert on page load?
page.goto already waits for the page to load, so by the time your evalute runs, you can't re-wait for the page to load, so the load event will never fire.
Another problem is that document.alert isn't a function. You may be thinking of document.write or window.alert. In any case, neither function is particularly useful for debugging, so I suggest sticking to console.log unless you have a very compelling reason not to.
When working with Puppeteer, it's important to isolate problems by running your evaluate code by hand in the browser without Puppeteer, otherwise you might have no idea whether it's Puppeteer or the browser code that's failing.
Anything logged in evaluate won't be shown in your Node stdout or stderr, so you'll probably want to monitor that with a log listener. You'll need to look at both Node and the browser console for errors.
Depending on what you're trying to accomplish, page.evaluateOnNewDocument(pageFunction[, ...args]) will let you attach code to evaluate whenever you navigate, which might be what you're trying for here.
Here's an example of alerting headfully:
const puppeteer = require("puppeteer"); // ^19.6.3
let browser;
(async () => {
browser = await puppeteer.launch({headless: false});
const [page] = await browser.pages();
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
alert("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Using console.log headlessly, with the log redirected to Node:
const puppeteer = require("puppeteer");
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
console.log("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
If all you're trying to do is run some code in the browser after load, then you might not even need to attach a listener to the load event at all:
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.goto("https://www.example.com", {waitUntil: "load"});
await page.evaluate(() => console.log("Loaded!"));
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Or if the code you want to run is purely Node just use normal control flow:
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto("https://www.example.com", {waitUntil: "load"});
console.log("Loaded!");
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
By the way, there's no need to make a function async unless you have await in it somewhere.
See also Puppeteer wait until page is completely loaded.
I'm trying to use Puppeteer to extract some data from a website. The script needs to input a form, submit it, click a clickable-row, then find the data inside another table. The first two steps work perfectly, but when trying to click the td with the class clickable row (or the a nested inside it), I get an odd result. If I'm running Chromium headless:false the link clicks but then all actions stop working on the page(in the code snipped I've tried inputing the form again for example). I've tried taking a screenshot after clicking the td, and it doesn't even take a shot of the third page, but rather the second (looks like the link has not been clicked), then after a while prompts the error
"{ TimeoutError: Navigation timeout of 30000 ms exceeded
at Promise.then (/IMM2/node_modules/puppeteer/lib/cjs/puppeteer/common/LifecycleWatcher.js:106:111) name: 'TimeoutError' }"
I've also tried using the await page.waitForNavigation({waitUntil: 'networkidle0',timeout:0}); but it didn't work either.
Here's the code:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless : false });
const listaFirme = `https://www.listafirme.ro/`
const page = await browser.newPage();
await page.goto(listaFirme, {waitUntil: 'networkidle2'});
await page.type('input[name=searchfor]', '35629144');
await Promise.all ([
page.click('.input-group-btn .btn'),
page.waitForNavigation(),
]).catch(e=> console.log(e))
await Promise.all ([
page.click('.content table tbody tr:nth-child(even) .clickable-row a'),
page.waitForNavigation(),
]).catch(e=> console.log(e))
await Promise.all ([
page.type('input[name=searchfor]', '35629144'),
page.waitForNavigation(),
]).catch(e=> console.log(e))
await browser.close();
})();
Screenshots to clarify:
First page
Second page
I'm new to puppeteer so it's probably a silly mistake but I've been trying to solve it for a while now and it doesn't seem to have a solution that would work for me so far.
The issue is that the second click opens a new page, so you need to catch this page instead of waiting for navigation on the current one:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false });
const listaFirme = `https://www.listafirme.ro/`;
const page = await browser.newPage();
await page.goto(listaFirme, { waitUntil: 'networkidle2' });
await page.type('input[name=searchfor]', '35629144');
await Promise.all([
page.click('.input-group-btn .btn'),
page.waitForNavigation(),
]).catch(e => console.log(e));
const [newPage] = await Promise.all([
getNewPage(),
page.click('.content table tbody tr:nth-child(even) .clickable-row a'),
]).catch(e => console.log(e));
await newPage.waitForSelector('input[name=searchfor]');
await newPage.type('input[name=searchfor]', '35629144');
await Promise.all([
newPage.click('.input-group-btn .btn'),
newPage.waitForNavigation(),
]).catch(e => console.log(e));
console.log('Done');
await browser.close();
function getNewPage() {
return new Promise((resolve) => {
browser.on('targetcreated', checkNewTarget);
function checkNewTarget(target) {
if (target.type() === 'page') {
browser.off('targetcreated', checkNewTarget);
resolve(target.page());
}
}
});
}
})();
I am trying to wait for a popup to load completely before proceeding but i am not sure how to accomplish this, currently i am using a await page.waitFor(3000);. Is there a more elegant way to do this and wait for the popup to fully load and then proceed.
below is my relevant part of the code.
await page.evaluate(async () => {
await $('#myDataExport').click();
await $('.export-btn a').click();
},);
await page.waitFor(3000);
const browserPages = await browser.pages();
const exportPopup = browserPages[browserPages.length - 1];
I have also tried to use the below
await Promise.all([
await page.click('.export-btn a'),
await page.waitForNavigation({ waitUntil: 'networkidle2' }),
]);
But I get an error Error: Node is either not visible or not an HTMLElement
Any help in this would be really great, Thanks.
I tried to make a working example. You can just ignore the request interception code.
const puppeteer = require('puppeteer')
;(async () => {
const browser = await puppeteer.launch({headless: false})
const [page] = await browser.pages()
// This network interception due to massive ads on the page
// You can remove this if you like, as this is just an example
// page.setRequestInterception(true)
// page.on('request', request => {
// if (request.url().startsWith('https://www.w3schools.com/')) {
// request.continue()
// } else {
// request.abort()
// }
// })
await page.goto('https://www.w3schools.com/tags/att_a_target.asp', {waitUntil: 'domcontentloaded'})
const [popup] = await Promise.all([
new Promise(resolve => page.on('popup', resolve)),
// THE LINES COMMENTED BELOW IS JUST AN W3SCHOOL EXAMPLE
// page.waitForSelector('a[target="_blank"].w3-btn.w3-margin-bottom'),
// page.click('a[target="_blank"].w3-btn.w3-margin-bottom'),
// YOUR CODE SHOULD LIKE THIS
page.waitForSelector('.export-btn a'),
page.click('.export-btn a'),
])
await popup.waitForSelector('#iframeResult')
await popup.screenshot({path: 'targetpopup.png'})
await popup.close()
await browser.close()
})()
Have you tried: browser.once with targetcreated target domain event?
Calling target.page() connects Puppeteer to the tab and generates a Page object.
New tabs aren't opened immediately on click. A way to await events is to create a new promise. [source]
Example:
const newPagePromise = new Promise(resolve => browser.once('targetcreated', target => resolve(target.page()));
await page.click('.export-btn a');
const newPage = await newPagePromise;
Hi, i would like to be able to click on system tools and then on the firmware upgrade button, but when i use the ID or the selector ( by right click -> copy selector), it jst says that it cant find it.
It's my first time using Puppeteer, can someone help please :) ?
Thanks
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1000, height: 500})
await page.goto('http://192.168.2.107:8080/', {waitUntil: 'networkidle2'});
await page.waitFor('input[id=pcPassword]');
await page.$eval('input[id=pcPassword]', el => el.value = 'admin');
page.keyboard.press('Enter')
await page.waitFor(3000);
await page.click(
'[id="the Id im talking about "]'
);
//await page.waitFor(5000);
await browser.close();
};
Ok I just had to select the frame, i didn't know that,
Here is the code :
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox'], headless:false});
const page = await browser.newPage();
await page.setViewport({width: 1900, height: 700})
await page.goto('http://192.168.2.105:8080', {waitUntil: 'networkidle2'});
await page.waitFor('input[id=pcPassword]');
await page.$eval('input[id=pcPassword]', el => el.value = 'admin');
page.keyboard.press('Enter');
await page.waitFor(3000);
const frame = await page.frames().find(f => f.name() === 'bottomLeftFrame');
const button = await frame.$('#menu_tools');
button.click();
await page.waitFor(1000);
const button2 = await frame.$('#menu_softup');
button2.click();
}
scrape().then((value) => {
console.log(value); // Success!
});
From your comments to this answer I think that we are dealing with frames in the page. That's the reason why puppeteer is unable to find the element #menu_tools despite it being visible to you in the page. To access the frames in the page look more at Puppeteer pageFrames.
Here's a demo of how your code may look.
let scrape = async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({width: 1000, height: 500})
await page.goto('http://192.168.2.107:8080/', {waitUntil: 'networkidle2'});
// Find out which frame holds your desired selector then edit the pageFrame below.
const pageFrame = await page.mainFrame().childFrames[0];
await pageFrame.waitFor('input[id=pcPassword]');
await pageFrame.$eval('input[id=pcPassword]', el => el.value = 'admin');
pageFrame.keyboard.press('Enter')
await pageFrame.waitFor(3000);
await pageFrame.waitFor('#menu_tools')
await pageFrame.click('#menu_tools');
await browser.close();
};
You can find all the frames available in the page with await page.frames() then connect to that frame. You can then proceed to perform operations like you usually do so with a page by using the handle for that frame.