If I go to https://investor.vanguard.com/mutual-funds/profile/VMMXX and execute document.querySelector("[data-ng-if='productSummaryTitle']").innerText from console, I get what I am expecting: Product summary.
But when I try to do the same with puppeteer, I get UnhandledPromiseRejectionWarning: Error: Evaluation failed: TypeError: Cannot read property 'innerText' of null at __puppeteer_evaluation_script__:3:83. What am I missing?
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto('https://investor.vanguard.com/mutual-funds/profile/VMMXX')
const result = await page.evaluate(() => {
let myText = document.querySelector("[data-ng-if='productSummaryTitle']").innerText
return {
You could wait for that selector first
const element = await page.waitForSelector('[data-ng-if='productSummaryTitle']');
const text = await element.evaluate(el => el.innerText);
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://investor.vanguard.com/mutual-funds/profile/VMMXX');
const element = await page.waitForSelector("[data-ng-if='productSummaryTitle']");
const text = await element.evaluate(el => el.innerText);
await browser.close();
I need to go to the UPS site, type in a tracking number, then get the data about that tracking number. I turned headless to false then the timeouts disappeared, turn it to true, and it will timeout or wait forever. I tried a number of different methods (any I could find), to mess around with the promises, but nothing helped.
const puppeteer = require('puppeteer');
var fs = require("fs").promises;
var Page = {};
var Browser = {};
async function printPage(){
await fs.writeFile("pagecontent_afterClick.txt", await Page.content());
const count = await Page.$$eval('#st_App_PkgStsMonthNum', divs => divs.length);
const deliveredOn = await Page.$eval('#st_App_PkgStsMonthNum', el => el.textContent);
const deliveredToAddress = await Page.$eval('#stApp_txtAddress', el => el.textContent);
const deliveredToCountry = await Page.$eval('#stApp_txtCountry', el => el.textContent);
const deliveredBy = await Page.$eval('#stApp_valReceivedBy', el => el.textContent);
console.log("browser close");
await Browser.close();
async function start(){
const browser = await puppeteer.launch({
headless: false
const page = await browser.newPage();
Page = page
Browser = browser
const navigationPromise = page.waitForNavigation({waitUntil: 'load'})
await navigationPromise;
await fs.writeFile("pagecontent_B4.txt", await page.content());
await page.type("#stApp_trackingNumber", "1Z0F1R740320306827");
const searchValue = await page.$eval('#stApp_trackingNumber', el => el.value);
const count = await page.$$eval('button#stApp_btnTrack.ups-cta.ups-cta_primary', divs => divs.length);
try {
console.log("end waiting");
await Promise.all([
page.waitForNavigation({waitUntil: 'networkidle2'})
} catch (e) {
if (e instanceof puppeteer.errors.TimeoutError) {
I got the following error in a JavaScript program with puppeteer: UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'getProperty' of undefined
I've checked several times but the element exists and can be found through the XPath. Here's my code
const puppeteer = require('puppeteer');
async function scraping(url, username, password) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url, { waitUntil: 'networkidle0' });
const [loginTitle] = await page.$x('/html/body/div[1]/div[2]/div/div/div[1]/h3');
const loginTitleTxt = await loginTitle.getProperty('textContent')
const loginTitleRawTxt = await loginTitleTxt.jsonValue();
const [usernameInputField] = await page.$x('//*[#id="form-username"]/input');
const [passwordInputField] = await page.$x('//*[#id="form-password"]/input');
await page.evaluate(() => {
document.evaluate('/html/body/div[1]/div[2]/div/div/div[2]/form/div[3]/div[1]/button', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click();
// Here is the error
const [planTitle] = await page.$x('/html/body/center[1]/p[1]/table/tbody/tr[2]/td');
const planTitleTxt = await planTitle.getProperty('textContent')
const planTitleRawTxt = await planTitleTxt.jsonValue();
scraping('https://hereisttheurl.net/abc.html', 'aa.bb', '123');
I was trying to scrape a thumbnail image from youtube with its XPath but I am getting undefined for the src. I can't figure out what is causing this? I already tried using both the XPath and full XPath but that didn't help. Any help is appreciated. Thanks in advance.
const puppeteer = require('puppeteer');
async function scrapeChannel1(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url, {
timeout: 0
const [el2] = await page.$x('//*[#id="dismissible"]/ytd-thumbnail');
const src1 = await el2.getProperty('src');
const thumbnailURL1 = await src1.jsonValue();
return {
The <img> you are looking for is placed a bit deeper in the DOM at: '//*[#id="dismissible"]/ytd-thumbnail/a/yt-img-shadow/img' (so you should add: /a/yt-img-shadow/img at the end of your XPath expression).
Note, you have more powerful tools in puppeteer than .getProperty('src') to retrieve DOM element properties.
E.g. page.$eval:
const selector = 'ytd-thumbnail > a > yt-img-shadow > #img'
const imageSrc = await page.$eval(selector, el => el.src)
// returns: https://i.ytimg.com/vi/{youtube_id}/hqdefault.jpg...
Or if you want all images use page.$$eval:
const imageSrcs = await page.$$eval(selector, elems => elems.map(el => el.src))
If you want to get images src from YouTube, you need to scroll video thumbnails into view like in the code below (also check it on the online IDE):
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
const mainPageUrl = "https://www.youtube.com";
async function scrollPage(page, scrollElements) {
let currentElement = 0;
while (true) {
let elementsLength = await page.evaluate((scrollElements) => {
return document.querySelectorAll(scrollElements).length;
}, scrollElements);
for (; currentElement < elementsLength; currentElement++) {
await page.waitForTimeout(200);
await page.evaluate(
(currentElement, scrollElements) => {
await page.waitForTimeout(5000);
let newElementsLength = await page.evaluate((scrollElements) => {
return document.querySelectorAll(scrollElements).length;
}, scrollElements);
if (newElementsLength === elementsLength || currentElement > 100) break; // if you want to get all elements (or some other number of elements) change number to 'Infinity' (or some other number)
async function getThumbnails() {
const browser = await puppeteer.launch({
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
const page = await browser.newPage();
await page.setDefaultNavigationTimeout(60000);
await page.goto(mainPageUrl);
await page.waitForSelector("#contents");
const scrollElements = "a#thumbnail";
await scrollPage(page, scrollElements);
await page.waitForTimeout(10000);
const urls = await page.$$eval("a#thumbnail #img", (els) => els.map(el => el.getAttribute('src')).filter(el => el));
await browser.close();
return urls;
So, I am trying to scrape a couple of searchengines with a couple of search phrases with Playwright.
Running the script with one query is working.
const { chromium } = require('playwright');
(async () => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
const keyWord = ('Arsenal');
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
await page.goto('https://yandex.com/');
await page.fill('//input[#aria-label="Request"]', keyWord);
await page.keyboard.press('Enter');
const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
await page.waitForSelector(getTwo)
const pushTwo = await page.$(getTwo);
const Two = await pushTwo.evaluate(element => element.innerText);
await browser.close()
But when I use an array with phrases (keyWordlist) I fail to get the script running.
Have searched around for using Array with 'For' and 'Foreach' loops, but haven't been able to fix it.
I want to run the different keywords through the different searchengines and list the results.
For 3 keywords in two searchengines that would get 6 results.
const { chromium } = require('playwright');
(async () => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
let kewWordlist = ['Arsenal', 'Liverpool', 'Ajax']
for (var i=0; i<=kewWordlist.length; i++) {
// for (const i in kewWordlist){
async () => {
const keyWord = kewWordlist[i];
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
// await page.fill('//input[#name="q"]',[i]);
// await page.fill('//input[#name="q"]',`${keyWord}`);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
// await page.goto('https://yandex.com/');
// await page.fill('//input[#aria-label="Request"]', keyWord);
// await page.keyboard.press('Enter');
// const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
// await page.waitForSelector(getTwo)
// const pushTwo = await page.$(getTwo);
// const Two = await pushTwo.evaluate(element => element.innerText);
// console.log(Two);
await browser.close()
If anyone has some pointers on how to solve this, much obliged.
maybe the result selectors needs some tweaking but I think this is what you were looking for:
test.only('search search engines', async({page, context}) => {
const search = [
name: 'yandex',
url: 'https://yandex.com/',
elementFill: '//input[#aria-label="Request"]',
elementResult: '//li[#data-first-snippet] //div[#class="organic__url-text"]'
name: 'google',
url: 'https://www.google.nl',
elementFill: '//input[#name="q"]',
elementResult: '(//h2[#class="result__title"])[9]'
name: '',
url: 'https://duckduckgo.com/',
elementFill: '//input[#name="q"]',
elementResult: '(//h2[#class="result__title"])[9]'
const kewWordlist = ['Arsenal', 'Liverpool', 'Ajax']
for (let i = 0; i < search.length; i++) {
const searchName = search[i].name
const searchResult = search[i].elementResult
const searchFill = search[i].elementFill
const searchPage = await context.newPage()
await searchPage.waitForLoadState()
await searchPage.goto(`${search[i].url}`)
for (let i = 0; i < kewWordlist.length; i++) {
await searchPage.fill(searchFill,kewWordlist[i])
await searchPage.keyboard.press('Enter')
await searchPage.waitForSelector(searchResult)
const result = await page.$(searchResult)
console.log(`${searchName}: ${result} `)
The reason your loop isn't working is that you have an async function inside of it that you never call. There are a few ways you could go about this:
You could take your first version, have it accept a word to search, and run that over each element of the array:
const searchOneKeyword = async (keyWord) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
// rest of code
const kewWordList = ['Arsenal', 'Liverpool', 'Ajax']
keyWordList.forEach((k) => {
Or if you'd like to keep the same browser instance, you can do it in a loop in the function:
const search = async (words) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
for (const keyWord of words) {
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
// etc.
await browser.close()
In both of those cases, you're logging, but never returning anything, so if you need that data in another function afterwards, you'd have to change that. Example:
const search = async (words) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
const results = await Promise.all(words.map((keyWord) => {
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
// etc.
return [ One, Two ]
await browser.close()
return results
search(keyWordList).then((results) => { console.log(results.flat()) })
I have spent a couple of hours trying to get the script working based on your suggestions. No result unfortunately. I get errors like 'await is only valid in async function' and 'Unreachable code detected'. Searched for other examples, for some inspiration, but none found. If you or someone else has a suggestion, please share! This is code I have now:
const { chromium } = require('playwright');
let keyWordList = ['Arsenal', 'Liverpool', 'Ajax']
const search = async function words() {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
const results = await Promise.all(words.map(keyWord))
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
await page.goto('https://yandex.com/');
await page.fill('//input[#aria-label="Request"]', keyWord);
await page.keyboard.press('Enter');
const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
await page.waitForSelector(getTwo)
const pushTwo = await page.$(getTwo);
const Two = await pushTwo.evaluate(element => element.innerText);
return [ One , Two ]
return results
search(keyWordList).then((results) => { console.log(results.flat())
await browser.close();
I am trying to create UI in Electron for scraper in Puppeteer.
Every time I use page.evaluate() it returns an empty object [object Object],
here is an example:
const puppeteer = require('puppeteer');
const scrape = async () => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
headless: true,
const page = await browser.newPage();
await page.goto("https://google.com/", {
waitUntil: 'networkidle2',
timeout: 90000
const length = await page.evaluate(`selector => {
return Array.from(document.querySelectorAll(selector)).length;
}`, 'div');
await page.close();
await browser.close();
return length;
document.querySelector("button").addEventListener("click", async function() {
const divs_len = await scrape();
const par = document.querySelector('#par');
par.innerText = divs_len;
// par shows [object Object]
I have used the following resource to fix the sample code:
And here is the working version:
const puppeteer = require('puppeteer');
const scrape = async () => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
headless: true,
const page = await browser.newPage();
await page.goto("https://google.com/", {
waitUntil: 'networkidle2',
timeout: 90000
const functionToBeEvaluated = selector => {
return Array.from(document.querySelectorAll(selector)).length;
const result = await page.evaluate('(' + functionToBeEvaluated.toString() + ')("div");');
await page.close();
await browser.close();
return result;
document.querySelector("button").addEventListener("click", async function() {
const divs_len = await scrape();
const par = document.querySelector('#par');
par.innerText = divs_len;
In page.evaluate() argument, you can use a function expression that will be called or a string with a direct code that will be executed. If you send a function expression as a string, page.evaluate() returns just a reference to this very function which becomes an empty object as functions are not serializable. Try this:
const length = await page.evaluate(selector => {
return Array.from(document.querySelectorAll(selector)).length;
}, 'div');