I am trying to maximize the browser window using Playwright. I tried below code but the browser is not maximized to full mode.
hooks.cjs class:
const playwright = require('playwright');
const { BeforeAll, Before, After, AfterAll , Status } = require('#cucumber/cucumber');
// Launch options.
const options = {
slowMo: 100,
ignoreHTTPSErrors: true,
};
// Create a global browser for the test session.
BeforeAll(async () => {
console.log('Launch Browser');
global.browser = await playwright['chromium'].launch({ headless: false,
args:['--window-size=1920,1040']}); // --start-maximized //defaultViewport: null
//global.context = await playwright['chromium'].launch({ args: ['--start-maximized'] });
})
// Create a new browser context for each test.
Before(async () => {
console.log('Create a new Context and Page')
global.context = await global.browser.newContext()
global.page = await global.context.newPage()
})
// Close the page and context after each test.
After(async () => {
console.log('Close Context and Page');
await global.page.close();
await global.context.close();
})
// Take Screenshot if Scenario Fails
After(async function (scenario) {
if (scenario.result.status === Status.FAILED) {
const buffer = await global.page.screenshot({ path: `reports/${scenario.pickle.name}.png`, fullPage: true })
this.attach(buffer, 'image/png');
}
})
// Close Browser
AfterAll(async () => {
console.log('close Browser')
await global.browser.close()
});
I'm running with npm: npm run test -- --tags "#Begin"
I have tried in many ways, but I cannot launch browser maximized. How can I do this?
If you want to start your browser maximized, then use the --start-maximized flag when starting the browser, and disable fixed viewport when launching a context or page. Example
global.browser = await playwright['chromium'].launch({ headless: false,
args:['--start-maximized']});
global.context = await global.browser.newContext({ viewport: null})
In python, use the equivalent no_viewport arg and pass it as True when starting a context
Related
I've been making incremental progress, but I'm fairly stumped at this point.
This is the site I'm trying to download from https://www.transtats.bts.gov/OT_Delay/OT_DelayCause1.asp
The reason I'm using Puppeteer is because I can't find a supported API to get this data (if there is one happy to try it)
The link is "Download Raw Data"
My script runs to the end, but doesn't seem to actually download any files. I tried installing puppeteer-extra and setting the downloads path:
const puppeteer = require("puppeteer-extra");
const { executablePath } = require('puppeteer')
...
var dir = "/home/ubuntu/AirlineStatsFetcher/downloads";
console.log('dir to set for downloads', dir);
puppeteer.use(require('puppeteer-extra-plugin-user-preferences')
(
{
userPrefs: {
download: {
prompt_for_download: false,
open_pdf_in_system_reader: true,
default_directory: dir,
},
plugins: {
always_open_pdf_externally: true
},
}
}));
const browser = await puppeteer.launch({
headless: true, slowMo: 100, executablePath: executablePath()
});
...
// Doesn't seem to work
await page.waitForSelector('table > tbody > tr > .finePrint:nth-child(3) > a:nth-child(2)');
console.log('Clicking on link to download CSV');
await page.click('table > tbody > tr > .finePrint:nth-child(3) > a:nth-child(2)');
After a while I figured why not tried to build the full URL and then do a GET request but then i run into other problems (UNABLE_TO_VERIFY_LEAF_SIGNATURE). Before going down this route farther (which feels a little hacky) I wanted to ask advice here.
Is there something I'm missing in terms of configuration to get it to download?
Downloading files using puppeteer seems to be a moving target btw not well supported today. For now (puppeteer 19.2.2) I would go with https.get instead.
"use strict";
const fs = require("fs");
const https = require("https");
// Not sure why puppeteer-extra is used... maybe https://stackoverflow.com/a/73869616/1258111 solves the need in future.
const puppeteer = require("puppeteer-extra");
const { executablePath } = require("puppeteer");
(async () => {
puppeteer.use(
require("puppeteer-extra-plugin-user-preferences")({
userPrefs: {
download: {
prompt_for_download: false,
open_pdf_in_system_reader: false,
},
plugins: {
always_open_pdf_externally: false,
},
},
})
);
const browser = await puppeteer.launch({
headless: true,
slowMo: 100,
executablePath: executablePath(),
});
const page = await browser.newPage();
await page.goto(
"https://www.transtats.bts.gov/OT_Delay/OT_DelayCause1.asp ",
{
waitUntil: "networkidle2",
}
);
const handle = await page.$(
"table > tbody > tr > .finePrint:nth-child(3) > a:nth-child(2)"
);
const relativeZipUrl = await page.evaluate(
(anchor) => anchor.getAttribute("href"),
handle
);
const url = "https://www.transtats.bts.gov/OT_Delay/".concat(relativeZipUrl);
const encodedUrl = encodeURI(url);
//Don't use in production
https.globalAgent.options.rejectUnauthorized = false;
https.get(encodedUrl, (res) => {
const path = `${__dirname}/download.zip`;
const filePath = fs.createWriteStream(path);
res.pipe(filePath);
filePath.on("finish", () => {
filePath.close();
console.log("Download Completed");
});
});
await browser.close();
})();
Unfortunately, Brave does not have a launch arg for certain settings I need to override. So I am trying to navigate to the settings page manually.
import { chromium } from "playwright-extra";
import stealth from "puppeteer-extra-plugin-stealth";
export default async function createStealthBrowser(
braveExecutablePath: string
) {
chromium.use(stealth());
let browser = await chromium.launch({
headless: false,
executablePath: braveExecutablePath,
});
let context = await browser.newContext({ bypassCSP: true });
let page = await context.newPage();
await page.goto("brave://settings/shields");
return browser;
}
And here is my test:
import { test, expect } from "#playwright/test";
import createStealthBrowser from "../StealthBrowser";
test("Create a stealth browser", async () => {
let browser = await createStealthBrowser(
"C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe"
);
await browser.close();
});
The test fails with the following output:
page.goto: Navigation failed because page was closed!
=========================== logs ===========================
navigating to "brave://settings/shields", waiting until "load"
============================================================
So what I am trying to do is to open puppeteer window with my google profile, but what I want is to do it multiple times, what I mean is 2-4 windows but with the same profile - is that possible? I am getting this error when I do it:
(node:17460) UnhandledPromiseRejectionWarning: Error: Failed to launch the browser process!
[45844:13176:0410/181437.893:ERROR:cache_util_win.cc(20)] Unable to move the cache: Access is denied. (0x5)
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless:false,
'--user-data-dir=C:\\Users\\USER\\AppData\\Local\\Google\\Chrome\\User Data',
);
const page = await browser.newPage();
await page.goto('https://example.com');
await page.screenshot({ path: 'example.png' });
await browser.close();
})();
Note: It is already pointed in the comments but there is a syntax error in the example. The launch should look like this:
const browser = await puppeteer.launch({
headless: false,
args: ['--user-data-dir=C:\\Users\\USER\\AppData\\Local\\Google\\Chrome\\User Data']
});
The error is coming from the fact that you are launching multiple browser instances at the very same time hence the profile directory will be locked and cannot be moved to reuse by puppeteer.
You should avoid starting chromium instances with the very same user data dir at the same time.
Possible solutions
Make the opened windows sequential, can be useful if you have only a few. E.g.:
const firstFn = async () => await puppeteer.launch() ...
const secondFn = async () => await puppeteer.launch() ...
(async () => {
await firstFn()
await secondFn()
})();
Creating copies of the user-data-dir as User Data1, User Data2 User Data3 etc. to avoid conflict while puppeteer copies them. This could be done on the fly with Node's fs module or even manually (if you don't need a lot of instances).
Consider reusing Chromium instances (if your use case allows it), with browser.wsEndpoint and puppeteer.connect, this can be a solution if you would need to open thousands of pages with the same user data dir.
Note: this one is the best for performance as only one browser will be launched, then you can open as many pages in a for..of or regular for loop as you want (using forEach by itself can cause side effects), E.g.:
const puppeteer = require('puppeteer')
const urlArray = ['https://example.com', 'https://google.com']
async function fn() {
const browser = await puppeteer.launch({
headless: false,
args: ['--user-data-dir=C:\\Users\\USER\\AppData\\Local\\Google\\Chrome\\User Data']
})
const browserWSEndpoint = await browser.wsEndpoint()
for (const url of urlArray) {
try {
const browser2 = await puppeteer.connect({ browserWSEndpoint })
const page = await browser2.newPage()
await page.goto(url) // it can be wrapped in a retry function to handle flakyness
// doing cool things with the DOM
await page.screenshot({ path: `${url.replace('https://', '')}.png` })
await page.goto('about:blank') // because of you: https://github.com/puppeteer/puppeteer/issues/1490
await page.close()
await browser2.disconnect()
} catch (e) {
console.error(e)
}
}
await browser.close()
}
fn()
I have a list of urls that need to be scraped from a website that uses React, for this reason I am using Puppeteer.
I do not want to be blocked by anti-bot servers, for this reason I have added puppeteer-extra-plugin-stealth
I want to prevent ads from loading on the pages, so I am blocking ads by using puppeteer-extra-plugin-adblocker
I also want to prevent my IP address from being blacklisted, so I have used TOR nodes to have different IP addresses.
Below is a simplified version of my code and the setup works (TOR_port and webUrl are assigned dynamically though but for simplifying my question I have assigned it as a variable) .
There is a problem though:
const puppeteer = require('puppeteer-extra');
const _StealthPlugin = require('puppeteer-extra-plugin-stealth');
const _AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
puppeteer.use(_StealthPlugin());
puppeteer.use(_AdblockerPlugin());
var TOR_port = 13931;
var webUrl ='https://www.zillow.com/homedetails/2861-Bass-Haven-Ln-Saint-Augustine-FL-32092/47739703_zpid/';
const browser = await puppeteer.launch({
dumpio: false,
headless: false,
args: [
`--proxy-server=socks5://127.0.0.1:${TOR_port}`,
`--no-sandbox`,
],
ignoreHTTPSErrors: true,
});
try {
const page = await browser.newPage();
await page.setViewport({ width: 1280, height: 720 });
await page.goto(webUrl, {
waitUntil: 'load',
timeout: 30000,
});
page
.waitForSelector('.price')
.then(() => {
console.log('The price is available');
await browser.close();
})
.catch(() => {
// close this since it is clearly not a zillow website
throw new Error('This is not the zillow website');
});
} catch (e) {
await browser.close();
}
The above setup works but is very unreliable and I recently learnt about Puppeteer-Cluster. I need it to help me manage crawling multiple pages, to track my scraping tasks.
So, my question is how do I implement Puppeteer-Cluster with the above set-up. I am aware of an example(https://github.com/thomasdondorf/puppeteer-cluster/blob/master/examples/different-puppeteer-library.js) offered by the library to show how you can implement plugins, but is so bare that I didn't quite understand it.
How do I implement Puppeteer-Cluster with the above TOR, AdBlocker, and Stealth configurations?
You can just hand over your puppeteer Instance like following:
const puppeteer = require('puppeteer-extra');
const _StealthPlugin = require('puppeteer-extra-plugin-stealth');
const _AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
puppeteer.use(_StealthPlugin());
puppeteer.use(_AdblockerPlugin());
const browser = await puppeteer.launch({
puppeteer,
});
Src: https://github.com/thomasdondorf/puppeteer-cluster#clusterlaunchoptions
You can just add the plugins with puppeteer.use()
You have to use puppeteer-extra.
const { addExtra } = require("puppeteer-extra");
const vanillaPuppeteer = require("puppeteer");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
const RecaptchaPlugin = require("puppeteer-extra-plugin-recaptcha");
const { Cluster } = require("puppeteer-cluster");
(async () => {
const puppeteer = addExtra(vanillaPuppeteer);
puppeteer.use(StealthPlugin());
puppeteer.use(RecaptchaPlugin());
// Do stuff
})();
I'm using Jest Puppeteer and I have a situation where I'd like to run my login test (which sets cookie/localStorage for the authentication) first and run the others after, however, I know that Jest doesn't work this way - as it searches the local filesystem and runs tests based on the patterns in the filename, and the order in which they run is different.
I'm not entirely sure I'm going about this the correct way as I'm relying on a test to set the the authentication session for the other tests.
Is it possible to do the above, or do I need to rethink my approach?
This is not a timely answer. I had similar problem. I am able to run tests in order by nesting describe blocks as shown below. my tests are in separate files that I require here.
const puppeteer = require('puppeteer');
const login = require('./login');
const upload = require('./upload');
let browser;
beforeAll(async () => {
browser = await puppeteer.launch({
headless: false,
devtools: true,
slowMo: 50
});
})
describe('test suite', () => {
describe('login', () => {
test('url is correct', async () => {
const url = await login();
expect(url).toBe('https://uat2.onplanapp.com/#/');
}, 25000);
});
describe('upload', () => {
test('file upload ok', async () => {
url = await upload();
console.log('page.url');
expect(url).toBe('https://uat2.onplanapp.com/#/moduleLibrary');
//expect(url).toBe('https://uat2.onplanapp.com/#/uploadFile');
}, 10000);
});
afterAll(async done => {
console.log('GOT TO AFTER ALL');
browser.close()
done();
});
});