If I 'goto' a web page that contains async code, it is not executed and I don't understand why. Can anyone please help?
Node:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('http://127.0.0.1:8080/?code=ABC123&id=1', {
waitUntil: 'networkidle0',
});
await page.screenshot({ path: 'example.png' });
await browser.close();
})();
web page:
onmount = () => {
const params = new URLSearchParams(document.location.search);
const code = params.get('code');
// above lines are executed
// the code below is never executed
// search is an async function (not shown for brevity)
search(code).then(result => document.getElementById('title').textContent = result.title);
};
You could use page.waitForFunction to listen to the expected result of the original function. Like this, you can make sure the next step (page.screenshot) will happen only after the onmount promise is resolved in the app.
For this purpose, you could declare the value of the code= URL parameter on the top so you could compare its value later on passing it to the waitForFunction method as an argument.
await page.waitForFunction(codeValue => document.querySelector('#title').textContent == codeValue, {}, codeValue);
Example:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const codeValue = 'ABC123';
await page.goto(`http://127.0.0.1:8080/?code=${codeValue}&id=1`, {
waitUntil: 'networkidle0',
});
await page.waitForFunction(codeValue => document.querySelector('#title').textContent == codeValue, {}, codeValue);
await page.screenshot({ path: 'example.png' });
await browser.close();
})();
Related
I'm in the process of making an Autocheckout bot, I'm attempting to make the section that checks if the item is in stock and I want to make it all different functions in different code blocks. The problem is I cant get it to run.
When I wrap the function in () only the first function runs while the second one does nothing.
Here is the code without the () around the functions, anyone know what I'm doing wrong?
const puppeteer = require ('puppeteer');
const puppeteerExtra = require('puppeteer-extra');
const pluginStealth = require('puppeteer-extra-plugin-stealth');
const rand_url = "https://www.walmart.com/ip/Cyberpunk-2077-Warner-Bros-PlayStation-4/786104378";
async function initBrowser(){
const browser = await puppeteer.launch({args: ["--incognito"],headless:false}); //Launches browser in incognito
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage(); //Ensures the new page is also incognito
await page.evaluateOnNewDocument(() => {delete navigator.__proto__.webdriver;});
await page.goto(rand_url); //goes to given link
return page;
};
async function checkstock(page){
await page.reload();
let content = await page.evaluate(() => document.body.innerHTML)
$("link[itemprop ='availability']", content).each(function(){
let out_of_stock = $(this).attr('href').toLowerCase().includes("outofstock");
if(out_of_stock){
console.log("Out of Stock");
} else{
await browser.close();
console.log("In Stock")
//await page.waitForSelector("button[class='button spin-button prod-ProductCTA--primary button--primary']", {visible: true,}); //Waits for Add to Cart Button
//await page.$eval("button[class='button spin-button prod-ProductCTA--primary button--primary']", elem => elem.click()); //Clicks Add to cart button
}
});
};
To execute the code do it as follow, but you will get ReferenceError: $ is not defined.
const puppeteer = require ('puppeteer');
const puppeteerExtra = require('puppeteer-extra');
const pluginStealth = require('puppeteer-extra-plugin-stealth');
const rand_url = "https://www.walmart.com/ip/Cyberpunk-2077-Warner-Bros-PlayStation-4/786104378";
async function initBrowser(){
const browser = await puppeteer.launch({args: ["--incognito"],headless:false}); //Launches browser in incognito
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage(); //Ensures the new page is also incognito
await page.evaluateOnNewDocument(() => {delete navigator.__proto__.webdriver;});
await page.goto(rand_url); //goes to given link
return page;
};
async function checkstock(page){
await page.reload();
let content = await page.evaluate(() => document.body.innerHTML)
console.error(content);
$("link[itemprop ='availability']", content).each(async function(){
let out_of_stock = $(this).attr('href').toLowerCase().includes("outofstock");
if(out_of_stock){
console.log("Out of Stock");
} else{
await browser.close();
}
});
};
(async () => {
const page = await initBrowser()
await checkstock(page)
})()
I debugged your code, and after add to launch.json:
"outputCapture": "std"
I noticed that there is an error in the following line:
await browser.close();
^^^^^
SyntaxError: await is only valid in async function
You need to add async
$("link[itemprop ='availability']", content).each(async function(){
I am trying to wait for a popup to load completely before proceeding but i am not sure how to accomplish this, currently i am using a await page.waitFor(3000);. Is there a more elegant way to do this and wait for the popup to fully load and then proceed.
below is my relevant part of the code.
await page.evaluate(async () => {
await $('#myDataExport').click();
await $('.export-btn a').click();
},);
await page.waitFor(3000);
const browserPages = await browser.pages();
const exportPopup = browserPages[browserPages.length - 1];
I have also tried to use the below
await Promise.all([
await page.click('.export-btn a'),
await page.waitForNavigation({ waitUntil: 'networkidle2' }),
]);
But I get an error Error: Node is either not visible or not an HTMLElement
Any help in this would be really great, Thanks.
I tried to make a working example. You can just ignore the request interception code.
const puppeteer = require('puppeteer')
;(async () => {
const browser = await puppeteer.launch({headless: false})
const [page] = await browser.pages()
// This network interception due to massive ads on the page
// You can remove this if you like, as this is just an example
// page.setRequestInterception(true)
// page.on('request', request => {
// if (request.url().startsWith('https://www.w3schools.com/')) {
// request.continue()
// } else {
// request.abort()
// }
// })
await page.goto('https://www.w3schools.com/tags/att_a_target.asp', {waitUntil: 'domcontentloaded'})
const [popup] = await Promise.all([
new Promise(resolve => page.on('popup', resolve)),
// THE LINES COMMENTED BELOW IS JUST AN W3SCHOOL EXAMPLE
// page.waitForSelector('a[target="_blank"].w3-btn.w3-margin-bottom'),
// page.click('a[target="_blank"].w3-btn.w3-margin-bottom'),
// YOUR CODE SHOULD LIKE THIS
page.waitForSelector('.export-btn a'),
page.click('.export-btn a'),
])
await popup.waitForSelector('#iframeResult')
await popup.screenshot({path: 'targetpopup.png'})
await popup.close()
await browser.close()
})()
Have you tried: browser.once with targetcreated target domain event?
Calling target.page() connects Puppeteer to the tab and generates a Page object.
New tabs aren't opened immediately on click. A way to await events is to create a new promise. [source]
Example:
const newPagePromise = new Promise(resolve => browser.once('targetcreated', target => resolve(target.page()));
await page.click('.export-btn a');
const newPage = await newPagePromise;
I'm using adal-angular for my application, and as soon as it navigates to localhost, it redirects to another website to login. I received (node:38052) UnhandledPromiseRejectionWarning: Error: Navigation to http://localhost:8080/ was canceled by another one.
(async () => {
try {
const browser = await playwright.chromium.launch();
const context = await browser.newContext();
const core = await context.newPage("http://localhost:8080/");
await page.screenshot({ path: `example.png` });
await browser.close();
} catch (e) {
console.log(e);
}
})();
This flow is regular AD Single Sign-On behavior, and I need to await the redirect to be over, and then insert user and password to obtain a token and call my API.
I tried using goto to no avail:
const playwright = require("playwright");
(async () => {
try {
const browser = await playwright.chromium.launch();
const context = await browser.newContext();
const core = await context.newPage("about:blank");
const page = await core.goto("http://localhost:8080/");
await page.screenshot({ path: `example.png` });
await browser.close();
} catch (e) {
console.log(e);
}
})();
Here is a puppeteer example
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("http://localhost:8080", { waitUntil: "networkidle2" });
await page.screenshot({ path: "example.png" });
await browser.close();
})();
At the end you get an image with the following:
I'm new to puppeteer and trying to figure out how to execute a javascript code, provided as a string value, in puppeteer.
For example, the value (which is retrieved from an input) can look like this: document.getElementById('selector').value='some_value';
I've implemented the following code
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.southwest.com/', { waitUntil: 'domcontentloaded' });
const script = await page.evaluate("document.getElementById('LandingAirBookingSearchForm_originationAirportCode').value='Dallas'; document.getElementById('LandingAirBookingSearchForm_originationAirportCode').dispatchEvent(new Event('input',{bubbles:!0}));");
await browser.close();
But it returns the following error:
Evaluation failed: TypeError: Cannot set property 'value' of null
Evaluate ur script on the page in a callback
Wait for the element with the ID of 'LandingAirBookingSearchForm_originationAirportCode' before you execute the script to be sure the side has loaded
const puppeteer = require('puppeteer');
(async function () {
const browser = await puppeteer.launch(/*{headless: false}*/);
const page = await browser.newPage();
await page.goto('https://www.southwest.com/', { waitUntil: 'domcontentloaded' });
await page.waitFor('#LandingAirBookingSearchForm_originationAirportCode');
await page.evaluate(() => {
document.getElementById('LandingAirBookingSearchForm_originationAirportCode').value='Dallas';
document.getElementById('LandingAirBookingSearchForm_originationAirportCode').dispatchEvent(new Event('input',{bubbles:!0}));
});
await browser.close();
})();
Finally I figured how to use Node.js. Installed all libraries/extensions. So puppeteer is working, but as it was previous with Xmlhttp... it gets only template/body of the page, without needed information. All scripts on the page engage after few second it had been opened in browser (Web app?). I need to get information inside certain tags after Whole page is loaded. Also, I would ask, if it possible to have pure JavaScript, because I do not use jQuery like code. So it doubles difficulty for me...
Here what I have so far.
const puppeteer = require('puppeteer');
const $ = require('cheerio');
let browser;
let page;
const url = "really long link with latitude and attitude";
(async () => puppeteer
.launch()
.then(await function(browser) {
return browser.newPage();
})
.then(await function(page) {
return page.goto(url).then(function() {
return page.content();
});
})
.then(await function(html) {
$('strong', html).each(function() {
console.log($(this).text());
});
})
.catch(function(err) {
//handle error
}))();
I get only template default body elements inside strong tag. But it should contain a lot more data than just 10 items.
If you want full html same as inspect? Here it is:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(() => document.querySelector('*').outerHTML);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
let bodyHTML = await page.evaluate(() => document.documentElement.outerHTML);
This
Some notes:
You need not cheerio with puppeteer and you need not reparse page.content(): you already have the full DOM with all scripts run and you can evaluate any code in window context like in a browser using page.evaluate() and transferring serializable data between web API context and Node.js API context.
Try to use async/await only, this will simplify your code and flow.
If you need to wait till all the scripts and other dependencies are loaded, use waitUntil: 'networkidle0' in page.goto().
If you suspect that document scripts need some time till the needed state, use various test functions like page.waitForSelector() or fall back to page.waitFor(milliseconds).
Here is a simple script that outputs all tag names in a page.
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(
() => Array.from(document.querySelectorAll('*'))
.map(elem => elem.tagName)
);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
You can specify your task in more details and we can try to write something more appropriate.
Script for www.bezrealitky.cz (task from a comment below):
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
page.setDefaultTimeout(0);
await page.goto('https://www.bezrealitky.cz/vyhledat?offerType=pronajem&estateType=byt&disposition=&ownership=&construction=&equipped=&balcony=&order=timeOrder_desc&boundary=%5B%5B%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.154133576294%2C%22lng%22%3A14.599004629591036%7D%2C%7B%22lat%22%3A50.14524430128%2C%22lng%22%3A14.58773054712799%7D%2C%7B%22lat%22%3A50.129307131988%2C%22lng%22%3A14.60087568578706%7D%2C%7B%22lat%22%3A50.122604734575%2C%22lng%22%3A14.659116306376973%7D%2C%7B%22lat%22%3A50.106512499343%2C%22lng%22%3A14.657434650206028%7D%2C%7B%22lat%22%3A50.090685542974%2C%22lng%22%3A14.705099547441932%7D%2C%7B%22lat%22%3A50.072175921973%2C%22lng%22%3A14.700004206235008%7D%2C%7B%22lat%22%3A50.056898491904%2C%22lng%22%3A14.640206899053055%7D%2C%7B%22lat%22%3A50.038528576841%2C%22lng%22%3A14.666852728301023%7D%2C%7B%22lat%22%3A50.030955909657%2C%22lng%22%3A14.656128752460972%7D%2C%7B%22lat%22%3A50.013435368522%2C%22lng%22%3A14.66854956530301%7D%2C%7B%22lat%22%3A49.99444182116%2C%22lng%22%3A14.640153080292066%7D%2C%7B%22lat%22%3A50.010839032542%2C%22lng%22%3A14.527474219359988%7D%2C%7B%22lat%22%3A49.970771602447%2C%22lng%22%3A14.46224174052395%7D%2C%7B%22lat%22%3A49.970669964027%2C%22lng%22%3A14.400648545303966%7D%2C%7B%22lat%22%3A49.941901176098%2C%22lng%22%3A14.395563234671044%7D%2C%7B%22lat%22%3A49.948384148423%2C%22lng%22%3A14.337635637038034%7D%2C%7B%22lat%22%3A49.958376114735%2C%22lng%22%3A14.324977842107955%7D%2C%7B%22lat%22%3A49.9676286223%2C%22lng%22%3A14.34491711110104%7D%2C%7B%22lat%22%3A49.971859099005%2C%22lng%22%3A14.326815050839059%7D%2C%7B%22lat%22%3A49.990608728081%2C%22lng%22%3A14.342731259186962%7D%2C%7B%22lat%22%3A50.002211140429%2C%22lng%22%3A14.29483886971002%7D%2C%7B%22lat%22%3A50.023596577558%2C%22lng%22%3A14.315872285282012%7D%2C%7B%22lat%22%3A50.058309376419%2C%22lng%22%3A14.248086830069042%7D%2C%7B%22lat%22%3A50.073179111%2C%22lng%22%3A14.290193274400963%7D%2C%7B%22lat%22%3A50.102973823639%2C%22lng%22%3A14.224439442359994%7D%2C%7B%22lat%22%3A50.130060800171%2C%22lng%22%3A14.302396419107936%7D%2C%7B%22lat%22%3A50.116019827009%2C%22lng%22%3A14.360785349547996%7D%2C%7B%22lat%22%3A50.148005694843%2C%22lng%22%3A14.365662825877052%7D%2C%7B%22lat%22%3A50.14142969454%2C%22lng%22%3A14.394903042943952%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%5D%5D&hasDrawnBoundary=1&mapBounds=%5B%5B%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%5D%5D¢er=%7B%22lat%22%3A50.16447196305031%2C%22lng%22%3A14.387521875272125%7D&zoom=11&locationInput=praha&limit=15');
await page.waitForSelector('#search-content button.btn-icon');
while (await page.$('#search-content button.btn-icon') !== null) {
const articlesForNow = (await page.$$('#search-content article')).length;
console.log(`Articles for now: ${articlesForNow}. Getting more...`);
await Promise.all([
page.evaluate(
() => { document.querySelector('#search-content button.btn-icon').click(); }
),
page.waitForFunction(
old => document.querySelectorAll('#search-content article').length > old,
{},
articlesForNow
),
]);
}
const articlesAll = (await page.$$('#search-content article')).length;
console.log(`All articles: ${articlesAll}.`);
fs.writeFileSync('full.html', await page.content());
fs.writeFileSync('articles.html', await page.evaluate(
() => document.querySelector('#search-content div.b-filter__inner').outerHTML
));
fs.writeFileSync('articles.txt', await page.evaluate(
() => [...document.querySelectorAll('#search-content article')]
.map(({ innerText }) => innerText)
.join(`\n${'-'.repeat(50)}\n`)
));
console.log('Saved.');
await browser.close();
} catch (err) {
console.error(err);
}
})();
Just one line:
const html = await page.content();
Details:
import puppeteer from 'puppeteer'
const test = async (url) => {
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto(url, { waitUntil: 'networkidle0' })
const html = await page.content()
console.log(html)
}
await test('https://stackoverflow.com/')