I have minimal coding knowledge and I'm trying to adapt some tutorials without success.
The JavaScript code I wish to adapt (script A) is pasted into the Chrome developer console and successfully pulls the data I need. This JavaScript snippet identifies the largest price graphic in an e-commerce site.
A second tutorial (script B) is run from the shell and calls the Puppeteer library. This script pulls some hotel booking data and runs successfully.
I wish to adapt script A to run from the shell using the Puppeteer library.
This is Script A -
let elements = [
...document.querySelectorAll(' body *')
function createRecordFromElement(element) {
const text = element.textContent.trim()
var record = {}
const bBox = element.getBoundingClientRect()
if(text.length <= 30 && !(bBox.x == 0 && bBox.y == 0)) {
record['fontSize'] = parseInt(getComputedStyle(element)['fontSize']) }
record['y'] = bBox.y
record['x'] = bBox.x
record['text'] = text
return record
let records = elements.map(createRecordFromElement)
function canBePrice(record) {
if( record['y'] > 600 ||
record['fontSize'] == undefined ||
!record['text'].match(/(^(US ){0,1}(rs\.|Rs\.|RS\.|\$|₹|INR|USD|CAD|C\$){0,1}(\s){0,1}[\d,]+(\.\d+){0,1}(\s){0,1}(AED){0,1}$)/)
return false
else return true
let possiblePriceRecords = records.filter(canBePrice)
let priceRecordsSortedByFontSize = possiblePriceRecords.sort(function(a, b) {
if (a['fontSize'] == b['fontSize']) return a['y'] > b['y']
return a['fontSize'] < b['fontSize']
This is Script B -
const puppeteer = require('puppeteer');
let bookingUrl = 'insert booking URL';
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 926 });
await page.goto(bookingUrl);
// get hotel details
let hotelData = await page.evaluate(() => {
let hotels = [];
// get the hotel elements
let hotelsElms = document.querySelectorAll('div.sr_property_block[data-hotelid]');
// get the hotel data
hotelsElms.forEach((hotelelement) => {
let hotelJson = {};
try {
hotelJson.name = hotelelement.querySelector('span.sr-hotel__name').innerText;
hotelJson.reviews = hotelelement.querySelector('span.review-score-widget__subtext').innerText;
hotelJson.rating = hotelelement.querySelector('span.review-score-badge').innerText;
hotelJson.price = hotelelement.querySelector('strong.price').innerText;
catch (exception){
return hotels;
I've had various attempts at adapting Script A into the format of Script B. Various and many different errors have been thrown. Without coding knowledge, I'm not getting anywhere.
Here's one of many variations I've tried, called Script C -
const puppeteer = require('puppeteer-core');
let bookingUrl = 'https://shop.coles.com.au/a/dianella/product/moccona-coffee-capsules-espresso-7';
(async () => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/chromium-browser',
headless: true
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 926 });
await page.goto(bookingUrl);
// get hotel details
let hotelData = await page.evaluate(() => {
let hotels = [];
// get the hotel elements
let elements = [
...document.querySelectorAll(' body *')
function createRecordFromElement(element) {
const text = element.textContent.trim()
var record = {}
const bBox = element.getBoundingClientRect()
if(text.length <= 30 && !(bBox.x == 0 && bBox.y == 0)) {
record['fontSize'] = parseInt(getComputedStyle(element)['fontSize']) }
record['y'] = bBox.y
record['x'] = bBox.x
record['text'] = text
return record
let records = elements.map(createRecordFromElement)
function canBePrice(record) {
if( record['y'] > 600 ||
record['fontSize'] == undefined ||
!record['text'].match(/(^(US ){0,1}(rs\.|Rs\.|RS\.|\$|₹|INR|USD|CAD|C\$){0,1}(\s){0,1}[\d,]+(\.\d+){0,1}(\s){0,1}(AED){0,1}$)/)
return false
else return true
let possiblePriceRecords = records.filter(canBePrice)
let priceRecordsSortedByFontSize = possiblePriceRecords.sort(function(a, b) {
if (a['fontSize'] == b['fontSize']) return a['y'] > b['y']
return a['fontSize'] < b['fontSize']
Here's the links to the tutorials for info -
Is there anything obviously wrong in Script C?
After reading through script C, it appears that you have not made any mistakes, rather the website you are attempting to access has decided to block scraper bots.
A quick host lookup on the domain shows that they are using security service section.io to block scraper bots on their website. See:
shop.coles.com.au is an alias for shop.coles.com.au.c.section.io.
shop.coles.com.au.c.section.io is an alias for shop.coles.com.au.x.section.io
I am trying to improve my skills with async, await. So I am trying to make an app that collects the prices of different flights in different periods and then it decides in which period the plane ticket is cheapest for personal use.
const puppeteerExtra = require("puppeteer-extra");
const pluginStealth = require("puppeteer-extra-plugin-stealth");
const PCR = require("puppeteer-chromium-resolver");
const howLongStart = 7;
const howLongEnd = 8;
const fromDate = new Date("2023-07-15");
const toDate = new Date("2023-08-31");
const airport = "PDL";
let tickets = [];
for (let i = 0; i < howLongEnd - howLongStart; i++) {
let howLong = howLongStart + i;
let tempFromDate = new Date("2023-07-15");
let tempFromD = new Date("2023-07-15");
let tempToDate = addDays(tempFromD, howLong);
async function ticketFirstMethod() {
const ticketFirst = await searchFlight(airport, tempFromDate, tempToDate);
while (addDays(tempToDate, 1) <= toDate) {
tempFromDate = addDays(tempFromDate, 1);
tempToDate = addDays(tempToDate, 1);
async function ticketMethod() {
let ticket = await searchFlight(airport, tempFromDate, tempToDate);
let lowestTicket;
let lowest = Number.POSITIVE_INFINITY;
let highest = Number.NEGATIVE_INFINITY;
let tmp;
for (let i = tickets.length - 1; i >= 0; i--) {
tmp = tickets[i][0];
if (tmp < lowest) {
lowest = tmp;
lowestTicket = tickets[i];
if (tmp > highest) highest = tmp;
function addDays(date, days) {
date.setDate(date.getDate() + days);
return date;
async function searchFlight(airport, tempFromDate, tempToDate) {
const stats = await PCR();
const browser = await puppeteerExtra.launch({
executablePath: stats.executablePath,
headless: false,
const page = await browser.newPage();
await page.goto(
"https://www.pelikan.cz/cs/letenky/T:1,P:4000E_0_0,CDF:PRGMUCFRATXLVIE,CDT:C" +
airport +
",R:1,DD:" +
tempFromDate.getFullYear +
"_" +
tempFromDate.getMonth +
"_" +
tempFromDate.getDay +
",DR:" +
tempToDate.getFullYear +
"_" +
tempToDate.getMonth +
"_" +
tempToDate.getDay +
{ waitUntil: "networkidle2", timeout: 0 }
const cheapestPrice = await page.waitForSelector(
"#flight-10000 > div:nth-child(1) > flights-flight:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(1) > div:nth-child(3)"
const price = await page.evaluate((el) => el.textContent, cheapestPrice);
const priceOnly = price.replace(/\D/g, "");
const ticket = [priceOnly, page.url()];
await browser.close();
return ticket;
I have tried to put here an example of the code.
Can anyone please help me?
Firstly I choose a period from when to when it should be searching for the ticket. Then I call searchFlight with this period of time to search for the ticket. The main thread will wait for the function to be processed and then the ticket is pushed to tickets.
The main thread will not wait and it continous so there is undefined ticket pushed to tickets.
I was trying to use the then method on the line where I am calling searchFlight function. In then method I put tickets.push(ticket). But that didn't work.
I was trying to search for fix but because I dont understand await, async that much I could not fix my code.
First off, remove the (async () => { .... }() wrapper. That's superfluous and getting in the way. The parent function is already async so the wrapper is not needed.
Then, searchFlight is async so you need to await its result where you are calling it. And, you'll need to make it's parent function async so you can use that await.
const ticket = await searchFlight(airport, tempFromDate, tempToDate);
Then, you have to actually return a result from inside of searchFlight. Right now, you have no return result at the top level of that function.
I would suggest you do that by not mixing await and .then(). Just use await like this:
async function searchFlight(airport, tempFromDate, tempToDate){
const stats = await PCR();
const browser = await puppeteerExtra.launch({
executablePath: stats.executablePath,
headless: false
const page = await browser.newPage()
await page.goto("...", {waitUntil: "networkidle2", timeout: 0})
const cheapestPrice = await page.waitForSelector('...');
const price = await page.evaluate(el => el.textContent, cheapestPrice);
const priceOnly = price.replace(/\D/g, "");
const ticket = [priceOnly, page.url()];
await browser.close()
return ticket;
And, please eliminate any use of var. One should only be using const or let in modern Javascript.
I want to scrape some data from login protected page using Node Js and Puppeteer. When I try to scrape data, the shows page is not reachable. I don't have any idea about why I am getting output like this. Here is my code.
I am new to Puppeteer and Node Js, its been 8 hours I am trying to modify that code, but not working.
async function EmailLookup(MlsNumber)
resp = new { Success = false };
(page = await singletonBrowser.Instance.newPage())
await page.setRequestInterception(true);
page.Request += (sender, e) =>
if (e.Request.resourceType === resourceType.document || e.Request.resourceType === resourceType.Script || e.Request.resourceType === resourceType.Xhr)
await page.async("https://example.com/idp/login");
if (!page.Url.Contains("layouts"))
await page.waitForSelector("#username");
await page.waitForSelector("#password");
await page.waitForSelector("#loginbtn");
await page.async("#username", "xxxxx");
await page.async("#password", "xxxxx");
await page.async("#password", "\r");
await page.waitForNavigation(new navigationOptions { Timeout = 0 });
await page.goto("https://example.com/launch?layoutid=61&appid=433");
await page.waitForSelector("#ctl02_m_ucSpeedBar_m_tbSpeedBar");
await page.async("#ctl02_m_ucSpeedBar_m_tbSpeedBar", MlsNumber);
await page.async("#ctl02_m_ucSpeedBar_m_tbSpeedBar", "\r");
await page.waitForNavigation(new navigationOptions { Timeout = 0 });
var MLSLink = await page.waitForXPath("//a[text()='" + MlsNumber + "']");
if (MLSLink != null)
await MLSLink.click();
await page.waitForNavigation(new navigationOptions{ Timeout = 0 });
var Content = await page.get();
htmldoc = new htmldoc();
var parcelNode = htmldoc.document.selectSingleNode("//a[contains(#href,'AssessorParcelDetail')]");
var emailNode = htmldoc.document.selectSingleNode("//a[contains(#href,'mailto:')]");
if (emailNode != null && parcelNode != null)
resp.Success = true;
resp.Email = emailNode.innerText;
resp.Parcel = parcelNode.innerText;
return json(resp);
I'm creating a script to take screenshots of Web pages with the puppeteer, I don't understand why on this site the image is saved with a width greater than that which I have set, 1920px.
If I have the fixed width of the browser, why does the screenshot come out with a greater width?
I would like to save the screenshot with a fixed width of 1920px and height based on the total content of the page.
The width of the saved image should be as wide as the width of the browser, why doesn't this happen?
const puppeteer = require('puppeteer');
const os = require('os');
const username = require('username');
//I identify the operating system and the architect of the CPU to run the Google Chrome Patch
var architetturaCPU = os.arch();
var sistemaOperativo = os.type();
console.log('System OS: '+sistemaOperativo+' '+architetturaCPU);
// Device width and height
const device_width = 1920;
const device_height = 1080;
//Patch di Chrome
var systemPath = '';
if (sistemaOperativo == 'Darwin'){
console.log('Chrome for MacOS');
var systemPath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
} else
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x64'){
console.log('Chrome for Windows 64bit');
var systemPath = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe';
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'x32'){
console.log('Chrome for Windows 32bit');
var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
if(sistemaOperativo == 'Windows_NT' && architetturaCPU == 'ia32'){
console.log('Chrome for Windows 32bit');
var systemPath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
//I build an Array and insert all the buttons for the consent of the Cookies for the Network sites
const btncookie1 = 'button.cs-close-btn';
const btncookie2 = 'button.cs-accept-btn.cs-btn-primary';
var BtnCookie = [
(async function () {
//I read the url file
var fs = require('fs');
var urlArray = fs.readFileSync('url-list.js').toString().split("\n").filter(a => a);
//Launch Puppeteer
const browser = await puppeteer.launch({
headless: true,
executablePath: systemPath,
args: ['--disable-dev-shm-usage','--no-sandbox','--window-size=1920,1080'],
defaultViewport: null
//Loop through all the url-list.js URL
var contaUrl = 0;
for(var i = 0; i < urlArray.length; i++){
//Check if empty spaces are present in the url file list
if (urlArray[i].indexOf("http") != '-1'){
//I open the boswser, delete the cache and set the page size
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');
await page.setCacheEnabled(false);
await page.setViewport({width: device_width, height: device_height});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');
//Tell me which URL you are working on
console.log(' ');
console.log('\x1b[33m%s','Open URL > '+urlArray[i],'\x1b[0m');
console.log(' ');
await page.goto(urlArray[i],{waitUntil:'networkidle2'});
await page.waitFor(20000);
//Find the class / id of the button on the page to accept cookies
var contaNumeroValoriBtnCookie = BtnCookie.length;
for(var n = 0; n <= BtnCookie.length; n++){
if (await page.$(BtnCookie[n]) !== null ) {
const navigationPromise = page.waitForSelector(BtnCookie[n]);
await page.click(BtnCookie[n]);
await navigationPromise;
console.log('\x1b[32m%s', 'Bypass Cookie... OK!','\x1b[0m');
}else if (n == contaNumeroValoriBtnCookie) {
console.log('\x1b[31m%s', 'Cookie not found!','\x1b[0m');
}else {
//console.log('I'm looking for the cookie...');
} //end - Find the class / id of the button on the page to accept cookies
//Scroll the entire page to load the content
await autoScroll(page);
async function autoScroll(page){
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 100;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if(totalHeight >= scrollHeight){
}, 300);
//Go back to the top of the page
await page.evaluate(_ => {window.scrollTo(0, 0);});
await page.waitFor(10000);
//I clean up the URL before saving the file
var str = urlArray[i];
str = str.replace(/[^\w]+/ig,'-');
var convertiUrl = str;
//SAVE screenshot
await page.screenshot({path: './screenshot/'+convertiUrl+i+'.jpg', fullPage: true});
await page.waitFor(5000);
await page.close();
}//end if (urlArray[i].indexOf("http") != '-1'){
}//end loop
console.log(' ');
console.log('\x1b[32m%s', contaUrl+' all screenshot saved :)','\x1b[0m');
console.log(' ');
})(); //end script
Try to add these line to resize viewport after the page.goto method:
await page.goto(urlArray[i],{timeout: 0, waitUntil:'networkidle2'});
await page.waitFor(20000);
await page.setViewport({
width: 1920,
height: 1080,
//Find the class / id of the button on the page to accept cookies
var contaNumeroValoriBtnCookie = BtnCookie.length;
I want to build a simple bot with puppeteeer.
I used page.$$eval then I tried to fetch data from table(10 page) and mapped that data.
However I can fetch data very well on the other hand the code is working 10 times per page. I mean every row fetched 10 times.
Here is my code snippet:
const tablolariCek = async (url, sayfaSayisi) => {
const browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto(url);
await page.waitForSelector('#mydata_next');
let okulUni = [];
for (let index = 0; index <= sayfaSayisi; index++) {
let okullar = await page.$$eval(
'#mydata > tbody > [role="row"]',
(uniler) =>
uniler.map((okul) => {
//Here is working 10 times per page.
let uni = {};
uni.okulkodu = okul.querySelector('a').innerText.trim();
const fontVeriler = okul.querySelectorAll('font');
const strongVeriler = okul.querySelectorAll('strong');
for (let index = 0; index < strongVeriler.length; index++) {
if (index == 0) {
uni.uniadi = strongVeriler[index].innerText.trim();
} else if (index == 1) {
uni.bolumadi = strongVeriler[index].innerText.trim();
for (let index = 0; index < fontVeriler.length; index++) {
if (index == 1) {
uni.bolumadi += ' ' + fontVeriler[index].innerText.trim();
} else if (index == 10) {
uni.siralama2019 = fontVeriler[index].innerText.trim();
} else if (index == 14) {
uni.puan2019 = fontVeriler[index].innerText.trim();
return uni;
await page.click('#mydata_next');
okullar.forEach((okul) => {
return okulUni;
Here it is what am I trying to fetch
<table id="mydata">
<tr role="row" class="odd">//this line
I can't find solution.
I found a solution by changing this line.
const browser = await puppeteer.launch({ headless: false });
const browser = await puppeteer.launch({ headless: false,slowMo: 150 });
I think due to the speed The code can't fetch table that exactly right. Everything works fine now. Thank you for the answers.
I found a solution by changing this line.
const browser = await puppeteer.launch({ headless: false });
const browser = await puppeteer.launch({ headless: false,slowMo: 150 });
I think due to the speed The code can't fetch table that exactly right. Everything works fine now. Thank you for the answers.
I have this piece of code which loops through one page with 3 frames and collect data from them an put them together.The problem is that the code is display incomplete results for first 2 loops , after that everything is fine, or randomly i got an error like Execution context was destroyed, most likely because of a navigation.
Please excuse my bad code but I have only 2 months on coding in javaScript
const puppeteer = require('puppeteer');
const elementsToClickSelector = 'body > form > font > select option';
const allLineIds = 'body > form > font > select > option';
const timpSosiri = 'body > b > font';
function run () {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false
const page = await browser.newPage();
await page.goto('');
const frame = page.frames().find(f => f.name() === 'stanga');
const cframe = page.frames().find(f => f.name() === 'centru');
const dframe = page.frames().find(f => f.name() === 'dreapta');
// get all station name to be clicked
let elementsToClick = await frame.$$(elementsToClickSelector);
console.log(`Elements to click: ${elementsToClick.length}`);
if (page.frames().find(f => f.name().includes('stanga'))) {
console.info('Frame was in the DOM and in the frames list')
} else {
console.error('Frame was in the DOM but not in the frames list')
let test =[];
for (let i =0, length = elementsToClick.length; i< length; i++){
const item = await frame.evaluateHandle((i) =>{
return document.querySelectorAll('option')[i];
await frame.waitFor(1000);
const statieNume = await (await elementsToClick[i].getProperty('innerText')).jsonValue();
await item.click();
// get all linie ids to be clicked
let idLine = await cframe.$$(allLineIds);
for(let j = 0, length1 = idLine.length; j<length1; j++){
const lineItem = await cframe.evaluateHandle((j) =>{
return document.querySelectorAll('option')[j];
}, j);
const linie = await (await idLine[j].getProperty('innerText')).jsonValue();
cframe.waitForSelector('body > form > font > select option');
let timp = await dframe.$$(timpSosiri);
for( let k = 0, lengthk = timp.length; k < lengthk; k++){
const sosiri = await dframe.evaluateHandle((k) =>{
return document.querySelectorAll('b')[k];
dframe.waitForSelector('body > b > font');
const timpLinie = await (await timp[k].getProperty('innerHTML')).jsonValue();
linie: linie,
timpi: timpLinie
return resolve(JSON.stringify(test));
} catch (e) {
return reject(e);
A Saguna
[1] S5
[0] E3
[0] S5
[1] E8
Sosire1: 17:31<br> Sosire2: 17:38
[0] 21
Sosire1: 17:31<br> Sosire2: 17:38
[0] S10
Sosire1: 17:26<br> Sosire2: 17:55
[1] Tv5
Sosire1: 17:26<br> Sosire2: 17:55
First Mures doesn't display lines and time and A Saguna dosen't disply time.