so im new to async functions and promises , imagine a promiselike this (pleas ignore syntax errors)
await new Promise(async (resolve, reject ) => {
const page = await browser.newPage();
await page.goto('https://example.com').catch( ()=>reject('ERROR -> LINK 1 TIMEOUT '));
// INSERT USERNAME AND PASSWORD
await page.$eval('form', form => form.submit()).catch( ()=>reject('ERROR -> FORM SUBMIT ERROR '));
if( await page.$("#username"))
{
reject(" ERROR -> LOGIN FAILED !!!!!!!!!!!!!! ");
}
await page.waitForSelector('#xxx').catch( ()=>reject('ERROR -> WAITING FOR ELEMENT TIMEOUT '));
var scraped_data = // dop some page evaluate and scrap data;
resolve(scraped_data);
}).then(function(scraped_data){
await page.close();
console.log('all done');
insert_data_in_databas(scraped_data);
})
.catch(function(error){
console.log(' tab failed : ');
console.log(error);
});
i want to convert this to a async function ... what is proper way to do this ? should i just put all of them in a try/catch block like
async function do_stuff(){
try {
const page = await browser.newPage();
await page.setViewport({ width: 1000, height: 1100});
await page.goto( 'https://example.com' );
// INSERT USERNAME AND PASSWORD
await page.$eval('form', form => form.submit());
await page.waitForSelector('#xxx');
var scraped_data = // dop some page evaluate and scrap data;
await page.close();
console.log('all done');
insert_data_in_databas(scraped_data);
}
catch (e) {
await page.close();
console.log('error');
console.log(e);
}
}
how can i reject when there is a error so the rest of the code wouldnt execute ? can i have custom error text in catche block like
ERROR -> FORM SUBMIT ERROR
how should i to this
if( await page.$("#username"))
{
reject(" ERROR -> LOGIN FAILED !!!!!!!!!!!!!! ");
}
which is not an actual error (i mean its not code error) in try/catche ?
------------------------------------------ edit --------------------
i tried
async function open_tab(){
try {
const page = await browser.newPage();
await page.setViewport({ width: 1000, height: 1100});
await page.goto( 'https://google.com' );
await page.waitForSelector('#xxx').catch(()=> { throw new Error('ERROR -> LOGIN FAILED')});
await page.close();
console.log('all done');
}
catch (e) {
console.log('error');
console.log(e);
await page.close();
}
}
its almost working but i cant close the tab in the catch block i get
UnhandledPromiseRejectionWarning: ReferenceError: page is not defined
and the tab remains open which is not ideal
There should be no new Promise because a promise already exists and can be chained.
If resulting promise should be rejected in async function, it is:
if (await page.$("#username")) {
throw new Error('ERROR -> LOGIN FAILED');
}
It should be
let page;
try {
page = await browser.newPage();
instead of
try {
const page = await browser.newPage();
// Best solution for error handling in puppeteer
const puppeteer = require("puppeteer");
const param_puppeteer = {
args: [
"--incognito",
"--ignore-certificate-errors",
"--no-sandbox",
"--disable-setuid-sandbox",
"--window-size=1920,1080",
"--disable-accelerated-2d-canvas",
"--disable-gpu",
// '--unlimited-storage',
// '--no-startup-window',
// '--disable-dev-shm-usage',
// '--disable-crash-reporter',
// '--disable-breakpad'
],
headless: false,
};
async function start() {
return puppeteer
.launch(param_puppeteer)
.then(async (browser) => {
const page = await browser.newPage();
return await task(page)
.catch((err) => console.log(err))
.finally(() => browser.close());
})
.catch((err) => console.log(err));
}
async function task(page) {
await page.setViewport({ width: 1000, height: 1100 });
await page.goto("https://google.com");
await page.waitForSelector("#hplogo");
let exist = await page.$("#hplogo").then((res) => !!res);
if (exist) {
return new Promise((resolve, reject) => resolve("success"));
} else {
return new Promise((resolve, reject) => reject("failed"));
}
}
start();
Related
I am automating the login flow via puppeteer.
Scenario - Login to a browser and get the access token from network console logs
I am able to login to browser successfully, but not getting how to fetch the network console logs, where I can see requests, request headers and responses
Here is my code which works fine for login workflow, but no network console logs are captured.
/**
* Tests for Authorization
*
* #group login
*/
const fetch = require("node-fetch");
const puppeteer = require('puppeteer');
const usernameSelector = '#idp-discovery-username';
const nextButtonSelector = '#idp-discovery-submit';
const PasswordSelector = '#okta-signin-password';
const LoginButtonSelector = '#okta-signin-submit';
const pxHeaderSelector = '[data-auto-id="HeaderCPLogoLink"]';
const cxHeaderSelector = '.header-logo';
const generationBrowserToken = async (url, username, password, portal) => {
let browser, page;
browser = await puppeteer.launch({
headless: false,
slowMo: 0,
args: ['--start-maximized'],
defaultViewport: null,
})
console.log('Getting JWT token from Browser')
page = await browser.newPage(url)
await page.evaluate(() => {
debugger;
});
console.log('URL is : ' + url)
await page.goto(url, {
waitUntil: 'load',
timeout: 0
});
await new Promise((r) => setTimeout(r, 3000));
await page.waitForSelector(usernameSelector)
await page.type(usernameSelector, username);
await new Promise((r) => setTimeout(r, 3000));
await page.waitForSelector(nextButtonSelector)
await page.click(nextButtonSelector)
await new Promise((r) => setTimeout(r, 5000));
await page.waitForSelector(PasswordSelector)
await page.type(PasswordSelector, password);
await new Promise((r) => setTimeout(r, 3000));
await page.click(LoginButtonSelector);
await new Promise((r) => setTimeout(r, 3000));
if(portal === 'PX') {
await page.waitForSelector(pxHeaderSelector);
} else if(portal === 'CX') {
await page.waitForSelector(cxHeaderSelector);
}
page.on('response', response => {
if (response.url().endsWith("details"))
console.log("response: ", response());
});
await browser.close()
};
module.exports = {
generationBrowserToken
}
Can someone please help me how to fetch network logs from this flow, where I can read API calls and get the access_token from one of the API headers.
I'm fetching data from a website but the process sometimes fails so I make the function to retry it. But I'm using a 15 seconds timeout, so when the timeout is triggered it will stop retrying the function and it will return some error message from the website.
This works fine in my local machine but when I deploy my code to Vercel (running on AWS Lambda as far as I know), the setTimeout is being completely ignored, so the fetchData function keeps running until it gets the correct response or until the server default 60 seconds timeout triggers.
Here's the code:
router.get('/test', async (req, res) => {
try {
const browser = await playwright.launchChromium({
headless: false });
const context = await browser.newContext();
const page = await context.newPage();
let timeout = false;
let fetchDataTimeout = setTimeout(() => {
timeout = true;
}, 15000);
const fetchData = async () => {
await page.type('#code', '6824498040');
let data = await (
await Promise.all([
page.waitForResponse(
(response) =>
response.url() === `${env.API_URL2}` && response.status() === 200,
{ timeout: 10000 },
),
page.click('#btn-check'),
])
)[0].json();
if (data.errors && !timeout) {
await page.reload();
return await fetchData();
} else {
clearTimeout(fetchDataTimeout);
return data;
}
};
let data = await fetchData();
await browser.close();
res.json({
status: 200,
message: data,
});
} catch (error) {
console.error(error);
return res.status(500).send({ 'Server Error': `${error}` });
}
});
I read that you have to wrap the setTimeout into a Promise and return it as an async function. Like this:
const timeOut = async (t) => {
return new Promise((resolve, reject) => {
setTimeout(() => {
resolve(`Completed in ${t}`)
}, t)
})
}
await timeOut(15000).then((result) => console.log(result))
But this will trigger the 15 seconds wait always. I need to discard the waiting if I get the correct response from fetchData and trigger the timeout if I don't 15 seconds after I start trying.
Any ideas???
The solution was quite simple. Since AWS Lambda requires an async function to properly wait the timeouts, I just had to wrap the timeout in a Promise. But then what did the trick was wrapping both functions, the timeout and the fetchData, in a Promise.race() method. By doing this, the promise that resolves first will stop the other one from running.
The code is now like this:
router.get('/test', async (req, res) => {
try {
const browser = await playwright.launchChromium({headless: false });
const context = await browser.newContext();
const page = await context.newPage();
const timeout = () =>
new Promise((resolve, reject) => {
setTimeout(() => {
reject('FUNCTION TIMEOUT');
}, 12000);
});
const fetchData = async () => {
await page.type('#code', '6824498040');
let response = await (
await Promise.all([
page.waitForResponse(
(res) =>
res.url() === `${env.API_URL2}` && res.status() === 200,
{ timeout: 10000 },
),
page.click('#btn-check'),
])
)[0].json();
if (data.error) {
await page.reload();
return await fetchData();
} else return data;
}
};
let data = await Promise.race([fetchData(), timeout()]);
await browser.close();
res.json({
status: 200,
message: data,
});
} catch (error) {
console.error(error);
return res.status(500).send({ 'Server Error': `${error}` });
}
});
I implemented a 12 seconds timeout instand of 15. I tested this on Vercel (AWS Lambda) and it works fine.
Thanks everybody, especially dandavis, this implementation was his idea.
Anyway, I hope it helps.
I'm trying to scrape the source of the first image with a specific class. On the page, there are multiple images with different additional classes but they share the class opwvks06. I have tried the following:
(async () => {
let browser, page;
let url = 'https://www.facebook.com/radiosalue/photos/?ref=page_internal';
try {
browser = await puppeteer.launch({ headless: true });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
const image = await page.evaluate(() => {
const getImage = document
.querySelector('img[class="opwvks06"]')
.getAttribute('src');
return getImage;
});
console.log(image);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log('closing browser');
}
}
})();
However, this returns null. Following is the html structure.
To the answer Mike 'Pomax' Kamermans all you had to do was add:
await page.waitForSelector("img.opwvks06:first-child");
You can also try using Stealth Puppeteer if the site is protected from bots, but in your case it is not necessary. Here is the final code:
(async () => {
let browser, page;
let url = "https://www.facebook.com/radiosalue/photos/?ref=page_internal";
try {
browser = await puppeteer.launch({ headless: true });
page = await browser.newPage();
await page.setViewport({ width: 1366, height: 500 });
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 60000 });
await page.waitForSelector("img.opwvks06:first-child");
const image = await page.evaluate(() => {
const getImage = document.querySelector("img.opwvks06:first-child").getAttribute("src");
return getImage;
});
console.log(image);
} catch (error) {
console.log(error.message);
} finally {
if (browser) {
await browser.close();
console.log("closing browser");
}
}
})();
Output:
https://scontent.fiev13-1.fna.fbcdn.net/v/t39.30808-6/279856934_10159266106247585_585375152905621309_n.jpg?stp=dst-jpg_p206x206&_nc_cat=106&ccb=1-6&_nc_sid=8024bb&_nc_ohc=owbdAyQwP3wAX-8rdo5&_nc_ht=scontent.fiev13-1.fna&oh=00_AT8yJizEIWx8oEFLUBb90ZIIj-Q4WLmmiWtpd1aRVy-UkA&oe=627C10A5
closing browser
So I've been trying to work with puppeteer, and honestly it has been hassle but I am determined to get a better understanding of it.
So I am trying to use the Page.Type() function, and I am having trouble with it either finding the inputs or... what I think might be causing the issue is that my internet is too slow and it's not able to find the selectors...
But I had thought that the
await instagram.page.waitForNavigation({ waitUntil: 'networkidle2' });
Would've solved the issue, maybe I am doing something else wrong I am not sure.
Here are the files :
main.js -
const insta = require('./instagram');
(async () => {
await insta.initialize();
await insta.login('user', 'password');
debugger;
})()
instagram.js -
const puppeteer = require('puppeteer');
const BASE_URL = 'https://instagram.com/';
const instagram = {
browser: null,
page: null,
initialize: async () => {
instagram.browser = await puppeteer.launch({
headless: false
});
instagram.page = await instagram.browser.newPage();
},
login: async (username, password) => {
await instagram.page.goto(BASE_URL, { waitUntil: 'networkidle2' });
let loginButton = await instagram.page.$x('//*[#id="loginForm"]/div/div[3]/button')
await loginButton[0].click();
await instagram.page.waitForNavigation({ waitUntil: 'networkidle2' });
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username, { delay: 500 });
await instagram.page.type('input[name="password"]', password, { delay: 500 });
debugger;
}
}
module.exports = instagram;
Your login: function should look like this:
login: async (username, password) => {
await instagram.page.goto(BASE_URL, { waitUntil: 'networkidle2' });
await instagram.page.waitForNavigation({ waitUntil: 'networkidle2' });
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username, { delay: 500 });
await instagram.page.type('input[name="password"]', password, { delay: 500 });
const loginButton = await instagram.page.$x('//*[#id="loginForm"]/div/div[3]/button');
await Promise.all([
this.page.waitForNavigation(),
loginButton[0].click();
]);
debugger;
}
You were clicking the login button before you filled in the inputs. Also, the Promise.all block as I've shown is the recommended way to click a button that navigates to a new URL.
How can I pass data into an async function? I'm trying to get the Google search result and pass it into puppeteer to open the page.
google('example.com test', function (err, res){
if (err) console.error(err)
var link = res.links[0].link.toString();
console.log(res.links[0].link)
});
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto(link, {waitUntil: 'networkidle2'});
const m = page.mouse
await m.click(110,420)
browser.on('targetcreated', pageOpened => {
console.log('1');
});
await page.waitForNavigation({waitUntil: 'load'});
let url = await page.url()
console.log(url)
fs.appendFile('saved.txt', url, function (err) {
if (err) throw err;
console.log('Saved!');
});
})();
Just promisify the google call:
const link = new Promise(resolve => {
google('example.com test', (err, res) => {
if(err) {
console.error(err);
return;
}
resolve(res.links[0].link.toString());
});
});
Then inside your async function just do
await link
to use the link somewhere.