I try to run puppeteer in a firebase function. As I understand this
https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-on-google-cloud-functions
it should work adding
"engines": {
"node": "8"
},
in package.json. I get positive feedback it use firebase deploy
functions: updating Node.js 8 function helloWorld(us-central1)...
Sadly it cashes with
Error: Error: Failed to launch chrome!
[0408/080847.149912:ERROR:zygote_host_impl_linux.cc(89)] Running as root without --no-sandbox is not supported. See https://crbug.com/638180.
TROUBLESHOOTING: https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md
at startPuppeteer.then.then.then.catch.error (/srv/index.js:42:15)
at <anonymous>
at process._tickDomainCallback (internal/process/next_tick.js:229:7)
Tested the same code on local which worked.
That' the cloud functions code which fails.
const functions = require('firebase-functions');
const puppeteer = require('puppeteer');
let browser = ""
let page = ""
const startPuppeteer = async () => {
browser = await puppeteer.launch();
page = await browser.newPage()
}
const usePageInPuppeteer = async (url) => {
await page.goto(url);
return await page.title()
}
const closePuppeteer = async () => {
return await browser.close();
}
const runtimeOpts = {
timeoutSeconds: 500,
memory: '2GB'
}
exports.helloWorld = functions
.runWith(runtimeOpts)
.https.onRequest((request, response) => {
//response.send()
startPuppeteer()
.then(() => {
return usePageInPuppeteer('https://www.google.com')
})
.then(returnUse => {
console.log(returnUse)
return response.send(returnUse)
})
.then(() => {
return closePuppeteer()
})
.catch(error => {
throw new Error(error)
});
});
That the local test, which works
const puppeteer = require('puppeteer');
let browser = ""
let page = ""
const startPuppeteer = async () => {
browser = await puppeteer.launch();
page = await browser.newPage()
}
const usePageInPuppeteer = async (url) => {
await page.goto(url);
return await page.title()
}
const closePuppeteer = async () => {
return await browser.close();
}
startPuppeteer()
.then(() => {
return usePageInPuppeteer('https://www.google.com')
})
.then(returnUse => {
console.log(returnUse)
return closePuppeteer()
})
.catch(error => {
throw new Error(error)
});
const startPuppeteer = async () => {
browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
page = await browser.newPage()
}
works great. Thanks to https://stackoverflow.com/users/2911633/igor-ilic for the hint
you need to launch the chrome in sandbox mode. Running chrome as root is not supported directly, you can pass the argument --no-sandbox to launch it as a root user.
The final code will look like this
browser = await puppeteer.launch({args: ['--no-sandbox']});
Related
I am automating the login flow via puppeteer.
Scenario - Login to a browser and get the access token from network console logs
I am able to login to browser successfully, but not getting how to fetch the network console logs, where I can see requests, request headers and responses
Here is my code which works fine for login workflow, but no network console logs are captured.
/**
* Tests for Authorization
*
* #group login
*/
const fetch = require("node-fetch");
const puppeteer = require('puppeteer');
const usernameSelector = '#idp-discovery-username';
const nextButtonSelector = '#idp-discovery-submit';
const PasswordSelector = '#okta-signin-password';
const LoginButtonSelector = '#okta-signin-submit';
const pxHeaderSelector = '[data-auto-id="HeaderCPLogoLink"]';
const cxHeaderSelector = '.header-logo';
const generationBrowserToken = async (url, username, password, portal) => {
let browser, page;
browser = await puppeteer.launch({
headless: false,
slowMo: 0,
args: ['--start-maximized'],
defaultViewport: null,
})
console.log('Getting JWT token from Browser')
page = await browser.newPage(url)
await page.evaluate(() => {
debugger;
});
console.log('URL is : ' + url)
await page.goto(url, {
waitUntil: 'load',
timeout: 0
});
await new Promise((r) => setTimeout(r, 3000));
await page.waitForSelector(usernameSelector)
await page.type(usernameSelector, username);
await new Promise((r) => setTimeout(r, 3000));
await page.waitForSelector(nextButtonSelector)
await page.click(nextButtonSelector)
await new Promise((r) => setTimeout(r, 5000));
await page.waitForSelector(PasswordSelector)
await page.type(PasswordSelector, password);
await new Promise((r) => setTimeout(r, 3000));
await page.click(LoginButtonSelector);
await new Promise((r) => setTimeout(r, 3000));
if(portal === 'PX') {
await page.waitForSelector(pxHeaderSelector);
} else if(portal === 'CX') {
await page.waitForSelector(cxHeaderSelector);
}
page.on('response', response => {
if (response.url().endsWith("details"))
console.log("response: ", response());
});
await browser.close()
};
module.exports = {
generationBrowserToken
}
Can someone please help me how to fetch network logs from this flow, where I can read API calls and get the access_token from one of the API headers.
I'm (very) new to webdriverio, and I'm struggling with creating a before hook.
My setup includes the latest webdriverio using Mocha as my testing fromework.
So, in essence, what I'd like to do is create a before hook for the following code (which would be used for each test case, hence why I thought a before hook would be suitable);
it('close cmp', async () => {
await browser.pause(5000);
const cmpDismissal = await $('~ACCEPT AND CLOSE');
if (await cmpDismissal.isExisting()) {
await cmpDismissal.click();
await cmpDismissal.waitForExist({ reverse: true });
}
});
it('allow notifications', async() => {
const notificationsDismissal = await $('~Allow');
if (await notificationsDismissal.isExisting()) {
await notificationsDismissal.click();
await notificationsDismissal.waitForExist({ reverse: true });
}
});
it('click on skip button', async() => {
const skipToContent = await $('~SKIP');
await skipToContent.waitForExist({timeout: 50000});
await skipToContent.isExisting();
await skipToContent.click();
});
it('allow', async() => {
const useData = await $('~Allow');
if (await useData.isExisting()) {
await useData.click();
await useData.waitForExist({ reverse: true });
}
});
So in my wdio.conf.js config file I've added;
before: function () {
const cmpDismissal = await ('~ACCEPT AND CLOSE');
if (await cmpDismissal.isExisting()) {
await cmpDismissal.click();
await cmpDismissal.waitForExist({ reverse: true });
}
//
const notificationsDismissal = await $('~Allow');
if (await notificationsDismissal.isExisting()) {
await notificationsDismissal.click();
await notificationsDismissal.waitForExist({ reverse: true });
}
//
const skipToContent = await $('~SKIP');
await skipToContent.waitForExist({timeout: 50000});
await skipToContent.isExisting();
await skipToContent.click();
//
const useData = await $('~Allow');
if (await useData.isExisting()) {
await useData.click();
await useData.waitForExist({ reverse: true });
}
}
But this doesn't work.
I'm presuming it's possible to create a before hook for this kind of functionality?
Or is there a better way of doing this?
Any help would be appreciated.
I am trying to use puppeteer to try and scroll all the way to the bottom of the site but the code I am using is not working. What I did was set a while loop then check if new height equals previous height then set a promise but for some reason it is not working. Where did I go wrong and how can I fix it. Thanks in advance.
const puppeteer = require('puppeteer');
const scrapeInfiniteScrollItems = async(page) => {
while (true) {
previousHeight = await page.evaluate('document.body.scrollHeight')
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`)
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}
(async() => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.youtube.com', {
waitUntil: 'networkidle2',
});
await scrapeInfiniteScrollItems(page)
})();
In case of youtube the height of body is 0 that's why your function is not working. If we see in devtools on youtube the whole content is in ytd-app element.
So we should use document.querySelector('ytd-app').scrollHeight instead of document.body.scrollHeight to scroll down to bottom.
working code.
const scrapeInfiniteScrollItems = async (page: puppeteer.Page) => {
while (true) {
const previousHeight = await page.evaluate(
"document.querySelector('ytd-app').scrollHeight"
);
await page.evaluate(() => {
const youtubeScrollHeight =
document.querySelector("ytd-app").scrollHeight;
window.scrollTo(0, youtubeScrollHeight);
});
try {
await page.waitForFunction(
`document.querySelector('ytd-app')?.scrollHeight > ${previousHeight}`,
{ timeout: 5000 }
);
} catch {
console.log("done");
break;
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
};
I have sw.js which stores data in cache storage.
And there is a dataGrid that displays a list of users.
I want to add users and immediately see the changes, without sw.js everything works fine.
When I use the get api, I always get the cached response until I clear the cache and reload the page.
The cache is not updating.
How should i change this code to make it work correctly?
requests:
export const fetchUsers = createAsyncThunk(
"users/fetchUsers", async () => {
const response = await axiosInstance.get("api/users");
return response.data;
});
export const addNewUser = createAsyncThunk(
'users/addNewUser', async (newUser) => {
const response = await axiosInstance.post("api/users", newUser)
return response.data
})
sw.js
const staticCacheName = 'static-cache-v0';
const dynamicCacheName = 'dynamic-cache-v0';
const staticAssets = [
'./',
'./index.html',
'./images/icons/icon-128x128.png',
'./images/icons/icon-192x192.png',
'./offline.html',
'./css/main.css',
'./js/app.js',
'./js/main.js',
'./images/no-image.jpg'
];
self.addEventListener('install', async event => {
const cache = await caches.open(staticCacheName);
await cache.addAll(staticAssets);
console.log('Service worker has been installed');
});
self.addEventListener('activate', async event => {
const cachesKeys = await caches.keys();
const checkKeys = cachesKeys.map(async key => {
if (![staticCacheName, dynamicCacheName].includes(key)) {
await caches.delete(key);
}
});
await Promise.all(checkKeys);
console.log('Service worker has been activated');
});
self.addEventListener('fetch', event => {
console.log(`Trying to fetch ${event.request.url}`);
event.respondWith(checkCache(event.request));
});
async function checkCache(req) {
const cachedResponse = await caches.match(req);
return cachedResponse || checkOnline(req);
}
async function checkOnline(req) {
const cache = await caches.open(dynamicCacheName);
try {
const res = await fetch(req);
await cache.put(req, res.clone());
return res;
} catch (error) {
const cachedRes = await cache.match(req);
if (cachedRes) {
return cachedRes;
} else if (req.url.indexOf('.html') !== -1) {
return caches.match('./offline.html');
} else {
return caches.match('./images/no-image.jpg');
}
}
}
I need to create scraping tool using puppeteer however I have some issues adding items to the queue
What I got
const PromisePool = require("#supercharge/promise-pool");
const puppeteer = require("puppeteer");
const domain = process.argv[2];
let list = [];
list[0] = domain;
const run = async () => {
const { results, errors } = await PromisePool.for(list)
.withConcurrency(2)
.process(async (webpage) => {
links = [];
const getData = async () => {
return await page.evaluate(async () => {
return await new Promise((resolve) => {
resolve(Array.from(document.querySelectorAll("a")).map((anchor) => [anchor.href]));
});
});
};
links = await getData();
for (var link in links) {
var new_url = String(links[link]);
new_url = new_url.split("#")[0];
console.log("new url: " + new_url);
if (new_url.includes(domain)) {
if (new_url in list) {
console.log("Url already exists: " + new_url);
continue;
}
list[new_url] = new_url;
} else {
console.log("Url is external: " + new_url);
}
}
browser.close();
});
};
const mainFunction = async () => {
const result = await run();
return result;
};
(async () => {
console.log(await mainFunction());
console.log(list);
})();
The problem is inside
links = [];
const getData = async () => {
return await page.evaluate(async () => {
return await new Promise((resolve) => {
resolve(Array.from(document.querySelectorAll("a")).map((anchor) => [anchor.href]));
});
});
};
links = await getData();
page.evaluate is async and it doesn't wait for a return this links is never updated for the next PromisePool process.
I need a way to wait for response to return and then continue rest of the script to process.
You could use page.$$eval to retrieve the same links with a single await.
page.$$eval(selector, pageFunction[, ...args])
It is basically what you are trying to achieve as the $$eval method "runs Array.from(document.querySelectorAll(selector)) within the page [context] and passes it as the first argument to pageFunction." (docs)
E.g.:
const links = await page.$$eval('a', anchors => anchors.map(el => el.href));