This is the code I have given:
const puppeteer = require('puppeteer');
(async () => {
const baseUrl = 'example.com';
const browser = await puppeteer.launch({ headless: true});
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage();
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time)
});
}
await page.goto(baseUrl);
await page.waitForSelector('[name="loginfmt"]');
await page.type('[name="loginfmt"]', 'abc#amazon.com');
await page.click('[type="submit"]');
delay(1000);
await page.authenticate({ username: `abc#amazon.com`, password: `abc#123` });
await page.click((`[data-row-id="111-222-333-444"]`));
await delay(1000);
await page.pdf(
{
path: "Z1.pdf",
printBackground: true,
width: '1300px',
height: '14200px'
}
)
browser.close();
})()
When inspected the page with document.querySelectorAll([data-row-id="111-222-333-444"]); it returned a NodeList array which consisted the details which I have been looking for.
However when tried the same via Puppeteer it terminates with an error saying "Error: No node found for selector: [data-row-id="111-222-333-444"]"
Tried using:
"const button= await page.$(([data-row-id="111-222-333-444"]);
button.click();"
I have been trying to fix this since a long time. It would be really helpful if I get a fix for this.
Try using this code:
const puppeteer = require('puppeteer');
(async () => {
const baseUrl = 'example.com';
const browser = await puppeteer.launch({ headless: true});
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage();
await page.goto(baseUrl);
await page.waitForSelector('[name="loginfmt"]');
await page.type('[name="loginfmt"]', 'abc#amazon.com');
await page.click('[type="submit"]');
await page.waitForTimeout(1000)
await page.authenticate({ username: `abc#amazon.com`, password: `abc#123` });
await page.waitForTimeout(5000)
await page.$(`[data-row-id="111-222-333-444"]`);
await page.click((`[data-row-id="111-222-333-444"]`));
await page.waitForTimeout(1000)
await page.pdf(
{
path: "Z1.pdf",
printBackground: true,
width: '1300px',
height: '14200px'
}
)
browser.close();
})()
Related
Im getting the error Error: net::ERR_TIMED_OUT at http://lumtest.com/myip.json
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--proxy-server=191.243.218.249:53281']
});
const page = await browser.newPage();
await page.authenticate();
await page.goto('http://lumtest.com/myip.json');
await page.screenshot({path: 'example.png'});
await browser.close();
})();
Try to use the prefered protocol before ip address and port.
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--proxy-server=https://191.243.218.249:53281']
});
const page = await browser.newPage();
await page.authenticate();
await page.goto('http://lumtest.com/myip.json');
await page.screenshot({path: 'example.png'});
await browser.close();
})();
The goal is to obtain all product links from all pages in the pagination. So far I have managed to print information to the console with console.log (links). However, since I am completely new to this field and completely inexperienced, I have a problem how to pass some value back with the return command. return links.
With console.log(links) I get a warning: getLinks is not iterable
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://example.com/product-category?p=1&nidx"
await page.goto(url)
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
console.log(links)
}
}
return links // Is returning links only form the first page and then the loop stops
I tried something with Promise.all () but it wasn't entirely clear to me how to do it.
Please help and be gentle as I am just starting to learn the basics
You need to create an array and push all the helmet links from each page onto it.
This tested successfully for me.
const puppeteer = require('puppeteer')
async function getLinks(){
const browser = await puppeteer.launch({headless: false, defaultViewport: null});
const page = await browser.newPage();
const url = "https://www.motocard.com/en/motorcycle-road-gear/helmets/precio_150-3200/full-face?p=1&nidx"
var all_links = [];
await page.goto(url);
while(await page.$('.change-country-buttons > button:nth-child(1)')){
await page.waitForTimeout(2000);
await page.keyboard.press('ArrowDown');
await page.waitForSelector('.change-country-buttons');
await page.waitForTimeout(2000);
await page.click('.change-country-buttons > button:nth-child(1)');
await page.waitForTimeout(2000);
}
while(await page.$(".pagination .pagination--next")){
await page.waitForTimeout(2000);
await page.evaluate(() => {
document.querySelector(".pagination .pagination--next").scrollIntoView();
});
await page.waitForTimeout(1000);
await page.waitForSelector(".pagination .pagination--next")
await page.waitForTimeout(500);
await page.click('.pagination .pagination--next')
const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
await page.waitForTimeout(1500);
//console.log(links)
all_links.push(...links);
}
return all_links;
}
(async ()=>{
var links = await getLinks();
console.log('done');
console.log(links);
})();
i want to store puppeteer page into session to use in another middleware in express
here is my code in first middleware:
(async () => {
let solve;
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.setDefaultNavigationTimeout(0)
await page.goto(URL, { waitUntil: 'domcontentloaded' });
await page.click('button.imeiInquiry');
const images = await page.$eval(('.captcha-image[src]'),node => node.src);
res.send({
type: 'img',
url: images,
});
req.session.page = page;
})();
and here is the code in 2nd middleware:
(async () => {
const page = await req.session.page;
res.send(await req.session.page.title);
await page.type('#CaptchaInputText', req.body.capchaNumber);
await page.type('#ImeiNumber', req.body.imei);
await page.click('.swal2-confirm')
})
but 2nd middleware doesnt work
I'm having trouble signing in, when I open this page
https://www.nike.com.br/chuteira-nike-phantom-gt-elite-3d-unissex-153-169-171-316414?gridPosition=A1
It goes to another page, and I can't fill out the form, can someone help me fill out the login form?
(async () => {
try{
console.log("Started!")
const browser = await puppeteer.launch({
executablePathh:'/usr/bin/chromium', headless:false,
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
const blockedResourceTypes = ["image", "bacon", "imageset", "font", "stylesheet", "texttrack", "csp_report"]
const blockedURLs = [
""
]
const allowedRequest = req => !blockedResourceTypes.includes(req.resourceType()) && !blockedURLs.includes(req.url())
page.on('request', (req) => {
if(allowedRequest(req)) {
req.continue();
}
else {
req.abort();
}
});
await page.goto('https://www.nike.com.br/chuteira-nike-phantom-gt-elite-3d-unissex-153-169-171-316414?gridPosition=A1', {
waitUntil: 'domcontentloaded', timeout:0});
await page.waitForXPath('//label[#for="tamanho__idM40F395"]', {
visible:true, timeout:0});
const tamanho = await page.$x('//label[#for="tamanho__idM40F395"]')
await tamanho[0].click('//label[#for="tamanho__idM40F395"]');
await page.waitForSelector('button#anchor-acessar-unite-oauth2') await page.click('button#anchor-acessar-unite-oauth2')
const iframe = wait.ForSelector('iframe#nike-unite-oauth2-iframe')
const frame = await iframe.contentFrame()
await frame.type('input[name="emailAddress"]', 'test#gmail.com')
await frame.type('input[name="password"]', 'pass')
await frame.click('input[value="ENTRAR"]')
They realized that if you run the script it will go to the login page and then I can't fill out the form
To login in a new page instead of an iframe, try to replace this:
await page.click('button#anchor-acessar-unite-oauth2')
const iframe = wait.ForSelector('iframe#nike-unite-oauth2-iframe')
const frame = await iframe.contentFrame()
await frame.type('input[name="emailAddress"]', 'test#gmail.com')
await frame.type('input[name="password"]', 'pass')
await frame.click('input[value="ENTRAR"]')
with this:
await Promise.all([
page.click('button#anchor-acessar-unite-oauth2'),
page.waitForNavigation(),
])
await page.waitForSelector('input[name="emailAddress"]')
await page.type('input[name="emailAddress"]', 'test#gmail.com')
await page.type('input[name="password"]', 'pass')
await page.click('input[value="ENTRAR"]')
I am trying to create UI in Electron for scraper in Puppeteer.
Every time I use page.evaluate() it returns an empty object [object Object],
here is an example:
const puppeteer = require('puppeteer');
const scrape = async () => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
headless: true,
});
const page = await browser.newPage();
await page.goto("https://google.com/", {
waitUntil: 'networkidle2',
timeout: 90000
});
const length = await page.evaluate(`selector => {
return Array.from(document.querySelectorAll(selector)).length;
}`, 'div');
await page.close();
await browser.close();
return length;
}
document.querySelector("button").addEventListener("click", async function() {
const divs_len = await scrape();
const par = document.querySelector('#par');
par.innerText = divs_len;
});
// par shows [object Object]
EDIT
I have used the following resource to fix the sample code:
https://github.com/puppeteer/puppeteer/issues/4221#issuecomment-478780545
And here is the working version:
const puppeteer = require('puppeteer');
const scrape = async () => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
headless: true,
});
const page = await browser.newPage();
await page.goto("https://google.com/", {
waitUntil: 'networkidle2',
timeout: 90000
});
const functionToBeEvaluated = selector => {
return Array.from(document.querySelectorAll(selector)).length;
}
const result = await page.evaluate('(' + functionToBeEvaluated.toString() + ')("div");');
await page.close();
await browser.close();
return result;
}
document.querySelector("button").addEventListener("click", async function() {
const divs_len = await scrape();
const par = document.querySelector('#par');
par.innerText = divs_len;
});
In page.evaluate() argument, you can use a function expression that will be called or a string with a direct code that will be executed. If you send a function expression as a string, page.evaluate() returns just a reference to this very function which becomes an empty object as functions are not serializable. Try this:
const length = await page.evaluate(selector => {
return Array.from(document.querySelectorAll(selector)).length;
}, 'div');